Btrfs: fix use-after-free in __btrfs_end_transaction
[deliverable/linux.git] / fs / btrfs / free-space-cache.c
CommitLineData
0f9dd46c
JB
1/*
2 * Copyright (C) 2008 Red Hat. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
96303081 19#include <linux/pagemap.h>
0f9dd46c 20#include <linux/sched.h>
5a0e3ad6 21#include <linux/slab.h>
96303081 22#include <linux/math64.h>
6ab60601 23#include <linux/ratelimit.h>
0f9dd46c 24#include "ctree.h"
fa9c0d79
CM
25#include "free-space-cache.h"
26#include "transaction.h"
0af3d00b 27#include "disk-io.h"
43be2146 28#include "extent_io.h"
581bb050 29#include "inode-map.h"
fa9c0d79 30
96303081
JB
31#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
32#define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
0f9dd46c 33
34d52cb6 34static int link_free_space(struct btrfs_free_space_ctl *ctl,
0cb59c99
JB
35 struct btrfs_free_space *info);
36
0414efae
LZ
37static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
38 struct btrfs_path *path,
39 u64 offset)
0af3d00b
JB
40{
41 struct btrfs_key key;
42 struct btrfs_key location;
43 struct btrfs_disk_key disk_key;
44 struct btrfs_free_space_header *header;
45 struct extent_buffer *leaf;
46 struct inode *inode = NULL;
47 int ret;
48
0af3d00b 49 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
0414efae 50 key.offset = offset;
0af3d00b
JB
51 key.type = 0;
52
53 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
54 if (ret < 0)
55 return ERR_PTR(ret);
56 if (ret > 0) {
b3b4aa74 57 btrfs_release_path(path);
0af3d00b
JB
58 return ERR_PTR(-ENOENT);
59 }
60
61 leaf = path->nodes[0];
62 header = btrfs_item_ptr(leaf, path->slots[0],
63 struct btrfs_free_space_header);
64 btrfs_free_space_key(leaf, header, &disk_key);
65 btrfs_disk_key_to_cpu(&location, &disk_key);
b3b4aa74 66 btrfs_release_path(path);
0af3d00b
JB
67
68 inode = btrfs_iget(root->fs_info->sb, &location, root, NULL);
69 if (!inode)
70 return ERR_PTR(-ENOENT);
71 if (IS_ERR(inode))
72 return inode;
73 if (is_bad_inode(inode)) {
74 iput(inode);
75 return ERR_PTR(-ENOENT);
76 }
77
adae52b9
MX
78 inode->i_mapping->flags &= ~__GFP_FS;
79
0414efae
LZ
80 return inode;
81}
82
83struct inode *lookup_free_space_inode(struct btrfs_root *root,
84 struct btrfs_block_group_cache
85 *block_group, struct btrfs_path *path)
86{
87 struct inode *inode = NULL;
5b0e95bf 88 u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
0414efae
LZ
89
90 spin_lock(&block_group->lock);
91 if (block_group->inode)
92 inode = igrab(block_group->inode);
93 spin_unlock(&block_group->lock);
94 if (inode)
95 return inode;
96
97 inode = __lookup_free_space_inode(root, path,
98 block_group->key.objectid);
99 if (IS_ERR(inode))
100 return inode;
101
0af3d00b 102 spin_lock(&block_group->lock);
5b0e95bf 103 if (!((BTRFS_I(inode)->flags & flags) == flags)) {
2f356126 104 printk(KERN_INFO "Old style space inode found, converting.\n");
5b0e95bf
JB
105 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM |
106 BTRFS_INODE_NODATACOW;
2f356126
JB
107 block_group->disk_cache_state = BTRFS_DC_CLEAR;
108 }
109
300e4f8a 110 if (!block_group->iref) {
0af3d00b
JB
111 block_group->inode = igrab(inode);
112 block_group->iref = 1;
113 }
114 spin_unlock(&block_group->lock);
115
116 return inode;
117}
118
0414efae
LZ
119int __create_free_space_inode(struct btrfs_root *root,
120 struct btrfs_trans_handle *trans,
121 struct btrfs_path *path, u64 ino, u64 offset)
0af3d00b
JB
122{
123 struct btrfs_key key;
124 struct btrfs_disk_key disk_key;
125 struct btrfs_free_space_header *header;
126 struct btrfs_inode_item *inode_item;
127 struct extent_buffer *leaf;
5b0e95bf 128 u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC;
0af3d00b
JB
129 int ret;
130
0414efae 131 ret = btrfs_insert_empty_inode(trans, root, path, ino);
0af3d00b
JB
132 if (ret)
133 return ret;
134
5b0e95bf
JB
135 /* We inline crc's for the free disk space cache */
136 if (ino != BTRFS_FREE_INO_OBJECTID)
137 flags |= BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
138
0af3d00b
JB
139 leaf = path->nodes[0];
140 inode_item = btrfs_item_ptr(leaf, path->slots[0],
141 struct btrfs_inode_item);
142 btrfs_item_key(leaf, &disk_key, path->slots[0]);
143 memset_extent_buffer(leaf, 0, (unsigned long)inode_item,
144 sizeof(*inode_item));
145 btrfs_set_inode_generation(leaf, inode_item, trans->transid);
146 btrfs_set_inode_size(leaf, inode_item, 0);
147 btrfs_set_inode_nbytes(leaf, inode_item, 0);
148 btrfs_set_inode_uid(leaf, inode_item, 0);
149 btrfs_set_inode_gid(leaf, inode_item, 0);
150 btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600);
5b0e95bf 151 btrfs_set_inode_flags(leaf, inode_item, flags);
0af3d00b
JB
152 btrfs_set_inode_nlink(leaf, inode_item, 1);
153 btrfs_set_inode_transid(leaf, inode_item, trans->transid);
0414efae 154 btrfs_set_inode_block_group(leaf, inode_item, offset);
0af3d00b 155 btrfs_mark_buffer_dirty(leaf);
b3b4aa74 156 btrfs_release_path(path);
0af3d00b
JB
157
158 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
0414efae 159 key.offset = offset;
0af3d00b
JB
160 key.type = 0;
161
162 ret = btrfs_insert_empty_item(trans, root, path, &key,
163 sizeof(struct btrfs_free_space_header));
164 if (ret < 0) {
b3b4aa74 165 btrfs_release_path(path);
0af3d00b
JB
166 return ret;
167 }
168 leaf = path->nodes[0];
169 header = btrfs_item_ptr(leaf, path->slots[0],
170 struct btrfs_free_space_header);
171 memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header));
172 btrfs_set_free_space_key(leaf, header, &disk_key);
173 btrfs_mark_buffer_dirty(leaf);
b3b4aa74 174 btrfs_release_path(path);
0af3d00b
JB
175
176 return 0;
177}
178
0414efae
LZ
179int create_free_space_inode(struct btrfs_root *root,
180 struct btrfs_trans_handle *trans,
181 struct btrfs_block_group_cache *block_group,
182 struct btrfs_path *path)
183{
184 int ret;
185 u64 ino;
186
187 ret = btrfs_find_free_objectid(root, &ino);
188 if (ret < 0)
189 return ret;
190
191 return __create_free_space_inode(root, trans, path, ino,
192 block_group->key.objectid);
193}
194
0af3d00b
JB
195int btrfs_truncate_free_space_cache(struct btrfs_root *root,
196 struct btrfs_trans_handle *trans,
197 struct btrfs_path *path,
198 struct inode *inode)
199{
65450aa6 200 struct btrfs_block_rsv *rsv;
c8174313 201 u64 needed_bytes;
0af3d00b
JB
202 loff_t oldsize;
203 int ret = 0;
204
65450aa6 205 rsv = trans->block_rsv;
c8174313
JB
206 trans->block_rsv = &root->fs_info->global_block_rsv;
207
208 /* 1 for slack space, 1 for updating the inode */
209 needed_bytes = btrfs_calc_trunc_metadata_size(root, 1) +
210 btrfs_calc_trans_metadata_size(root, 1);
211
212 spin_lock(&trans->block_rsv->lock);
213 if (trans->block_rsv->reserved < needed_bytes) {
214 spin_unlock(&trans->block_rsv->lock);
215 trans->block_rsv = rsv;
216 return -ENOSPC;
217 }
218 spin_unlock(&trans->block_rsv->lock);
0af3d00b
JB
219
220 oldsize = i_size_read(inode);
221 btrfs_i_size_write(inode, 0);
222 truncate_pagecache(inode, oldsize, 0);
223
224 /*
225 * We don't need an orphan item because truncating the free space cache
226 * will never be split across transactions.
227 */
228 ret = btrfs_truncate_inode_items(trans, root, inode,
229 0, BTRFS_EXTENT_DATA_KEY);
65450aa6 230
0af3d00b 231 if (ret) {
c8174313 232 trans->block_rsv = rsv;
79787eaa 233 btrfs_abort_transaction(trans, root, ret);
0af3d00b
JB
234 return ret;
235 }
236
82d5902d 237 ret = btrfs_update_inode(trans, root, inode);
79787eaa
JM
238 if (ret)
239 btrfs_abort_transaction(trans, root, ret);
c8174313
JB
240 trans->block_rsv = rsv;
241
82d5902d 242 return ret;
0af3d00b
JB
243}
244
9d66e233
JB
245static int readahead_cache(struct inode *inode)
246{
247 struct file_ra_state *ra;
248 unsigned long last_index;
249
250 ra = kzalloc(sizeof(*ra), GFP_NOFS);
251 if (!ra)
252 return -ENOMEM;
253
254 file_ra_state_init(ra, inode->i_mapping);
255 last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
256
257 page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);
258
259 kfree(ra);
260
261 return 0;
262}
263
a67509c3
JB
264struct io_ctl {
265 void *cur, *orig;
266 struct page *page;
267 struct page **pages;
268 struct btrfs_root *root;
269 unsigned long size;
270 int index;
271 int num_pages;
5b0e95bf 272 unsigned check_crcs:1;
a67509c3
JB
273};
274
275static int io_ctl_init(struct io_ctl *io_ctl, struct inode *inode,
276 struct btrfs_root *root)
277{
278 memset(io_ctl, 0, sizeof(struct io_ctl));
279 io_ctl->num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
280 PAGE_CACHE_SHIFT;
281 io_ctl->pages = kzalloc(sizeof(struct page *) * io_ctl->num_pages,
282 GFP_NOFS);
283 if (!io_ctl->pages)
284 return -ENOMEM;
285 io_ctl->root = root;
5b0e95bf
JB
286 if (btrfs_ino(inode) != BTRFS_FREE_INO_OBJECTID)
287 io_ctl->check_crcs = 1;
a67509c3
JB
288 return 0;
289}
290
291static void io_ctl_free(struct io_ctl *io_ctl)
292{
293 kfree(io_ctl->pages);
294}
295
296static void io_ctl_unmap_page(struct io_ctl *io_ctl)
297{
298 if (io_ctl->cur) {
299 kunmap(io_ctl->page);
300 io_ctl->cur = NULL;
301 io_ctl->orig = NULL;
302 }
303}
304
305static void io_ctl_map_page(struct io_ctl *io_ctl, int clear)
306{
307 WARN_ON(io_ctl->cur);
308 BUG_ON(io_ctl->index >= io_ctl->num_pages);
309 io_ctl->page = io_ctl->pages[io_ctl->index++];
310 io_ctl->cur = kmap(io_ctl->page);
311 io_ctl->orig = io_ctl->cur;
312 io_ctl->size = PAGE_CACHE_SIZE;
313 if (clear)
314 memset(io_ctl->cur, 0, PAGE_CACHE_SIZE);
315}
316
317static void io_ctl_drop_pages(struct io_ctl *io_ctl)
318{
319 int i;
320
321 io_ctl_unmap_page(io_ctl);
322
323 for (i = 0; i < io_ctl->num_pages; i++) {
a1ee5a45
LZ
324 if (io_ctl->pages[i]) {
325 ClearPageChecked(io_ctl->pages[i]);
326 unlock_page(io_ctl->pages[i]);
327 page_cache_release(io_ctl->pages[i]);
328 }
a67509c3
JB
329 }
330}
331
332static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct inode *inode,
333 int uptodate)
334{
335 struct page *page;
336 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
337 int i;
338
339 for (i = 0; i < io_ctl->num_pages; i++) {
340 page = find_or_create_page(inode->i_mapping, i, mask);
341 if (!page) {
342 io_ctl_drop_pages(io_ctl);
343 return -ENOMEM;
344 }
345 io_ctl->pages[i] = page;
346 if (uptodate && !PageUptodate(page)) {
347 btrfs_readpage(NULL, page);
348 lock_page(page);
349 if (!PageUptodate(page)) {
350 printk(KERN_ERR "btrfs: error reading free "
351 "space cache\n");
352 io_ctl_drop_pages(io_ctl);
353 return -EIO;
354 }
355 }
356 }
357
f7d61dcd
JB
358 for (i = 0; i < io_ctl->num_pages; i++) {
359 clear_page_dirty_for_io(io_ctl->pages[i]);
360 set_page_extent_mapped(io_ctl->pages[i]);
361 }
362
a67509c3
JB
363 return 0;
364}
365
366static void io_ctl_set_generation(struct io_ctl *io_ctl, u64 generation)
367{
368 u64 *val;
369
370 io_ctl_map_page(io_ctl, 1);
371
372 /*
5b0e95bf
JB
373 * Skip the csum areas. If we don't check crcs then we just have a
374 * 64bit chunk at the front of the first page.
a67509c3 375 */
5b0e95bf
JB
376 if (io_ctl->check_crcs) {
377 io_ctl->cur += (sizeof(u32) * io_ctl->num_pages);
378 io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages);
379 } else {
380 io_ctl->cur += sizeof(u64);
381 io_ctl->size -= sizeof(u64) * 2;
382 }
a67509c3
JB
383
384 val = io_ctl->cur;
385 *val = cpu_to_le64(generation);
386 io_ctl->cur += sizeof(u64);
a67509c3
JB
387}
388
389static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation)
390{
391 u64 *gen;
392
5b0e95bf
JB
393 /*
394 * Skip the crc area. If we don't check crcs then we just have a 64bit
395 * chunk at the front of the first page.
396 */
397 if (io_ctl->check_crcs) {
398 io_ctl->cur += sizeof(u32) * io_ctl->num_pages;
399 io_ctl->size -= sizeof(u64) +
400 (sizeof(u32) * io_ctl->num_pages);
401 } else {
402 io_ctl->cur += sizeof(u64);
403 io_ctl->size -= sizeof(u64) * 2;
404 }
a67509c3 405
a67509c3
JB
406 gen = io_ctl->cur;
407 if (le64_to_cpu(*gen) != generation) {
408 printk_ratelimited(KERN_ERR "btrfs: space cache generation "
409 "(%Lu) does not match inode (%Lu)\n", *gen,
410 generation);
411 io_ctl_unmap_page(io_ctl);
412 return -EIO;
413 }
414 io_ctl->cur += sizeof(u64);
5b0e95bf
JB
415 return 0;
416}
417
418static void io_ctl_set_crc(struct io_ctl *io_ctl, int index)
419{
420 u32 *tmp;
421 u32 crc = ~(u32)0;
422 unsigned offset = 0;
423
424 if (!io_ctl->check_crcs) {
425 io_ctl_unmap_page(io_ctl);
426 return;
427 }
428
429 if (index == 0)
cb54f257 430 offset = sizeof(u32) * io_ctl->num_pages;
5b0e95bf
JB
431
432 crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc,
433 PAGE_CACHE_SIZE - offset);
434 btrfs_csum_final(crc, (char *)&crc);
435 io_ctl_unmap_page(io_ctl);
436 tmp = kmap(io_ctl->pages[0]);
437 tmp += index;
438 *tmp = crc;
439 kunmap(io_ctl->pages[0]);
440}
441
442static int io_ctl_check_crc(struct io_ctl *io_ctl, int index)
443{
444 u32 *tmp, val;
445 u32 crc = ~(u32)0;
446 unsigned offset = 0;
447
448 if (!io_ctl->check_crcs) {
449 io_ctl_map_page(io_ctl, 0);
450 return 0;
451 }
452
453 if (index == 0)
454 offset = sizeof(u32) * io_ctl->num_pages;
455
456 tmp = kmap(io_ctl->pages[0]);
457 tmp += index;
458 val = *tmp;
459 kunmap(io_ctl->pages[0]);
460
461 io_ctl_map_page(io_ctl, 0);
462 crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc,
463 PAGE_CACHE_SIZE - offset);
464 btrfs_csum_final(crc, (char *)&crc);
465 if (val != crc) {
466 printk_ratelimited(KERN_ERR "btrfs: csum mismatch on free "
467 "space cache\n");
468 io_ctl_unmap_page(io_ctl);
469 return -EIO;
470 }
471
a67509c3
JB
472 return 0;
473}
474
475static int io_ctl_add_entry(struct io_ctl *io_ctl, u64 offset, u64 bytes,
476 void *bitmap)
477{
478 struct btrfs_free_space_entry *entry;
479
480 if (!io_ctl->cur)
481 return -ENOSPC;
482
483 entry = io_ctl->cur;
484 entry->offset = cpu_to_le64(offset);
485 entry->bytes = cpu_to_le64(bytes);
486 entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP :
487 BTRFS_FREE_SPACE_EXTENT;
488 io_ctl->cur += sizeof(struct btrfs_free_space_entry);
489 io_ctl->size -= sizeof(struct btrfs_free_space_entry);
490
491 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
492 return 0;
493
5b0e95bf 494 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
a67509c3
JB
495
496 /* No more pages to map */
497 if (io_ctl->index >= io_ctl->num_pages)
498 return 0;
499
500 /* map the next page */
501 io_ctl_map_page(io_ctl, 1);
502 return 0;
503}
504
505static int io_ctl_add_bitmap(struct io_ctl *io_ctl, void *bitmap)
506{
507 if (!io_ctl->cur)
508 return -ENOSPC;
509
510 /*
511 * If we aren't at the start of the current page, unmap this one and
512 * map the next one if there is any left.
513 */
514 if (io_ctl->cur != io_ctl->orig) {
5b0e95bf 515 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
a67509c3
JB
516 if (io_ctl->index >= io_ctl->num_pages)
517 return -ENOSPC;
518 io_ctl_map_page(io_ctl, 0);
519 }
520
521 memcpy(io_ctl->cur, bitmap, PAGE_CACHE_SIZE);
5b0e95bf 522 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
a67509c3
JB
523 if (io_ctl->index < io_ctl->num_pages)
524 io_ctl_map_page(io_ctl, 0);
525 return 0;
526}
527
528static void io_ctl_zero_remaining_pages(struct io_ctl *io_ctl)
529{
5b0e95bf
JB
530 /*
531 * If we're not on the boundary we know we've modified the page and we
532 * need to crc the page.
533 */
534 if (io_ctl->cur != io_ctl->orig)
535 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
536 else
537 io_ctl_unmap_page(io_ctl);
a67509c3
JB
538
539 while (io_ctl->index < io_ctl->num_pages) {
540 io_ctl_map_page(io_ctl, 1);
5b0e95bf 541 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
a67509c3
JB
542 }
543}
544
5b0e95bf
JB
545static int io_ctl_read_entry(struct io_ctl *io_ctl,
546 struct btrfs_free_space *entry, u8 *type)
a67509c3
JB
547{
548 struct btrfs_free_space_entry *e;
2f120c05
JB
549 int ret;
550
551 if (!io_ctl->cur) {
552 ret = io_ctl_check_crc(io_ctl, io_ctl->index);
553 if (ret)
554 return ret;
555 }
a67509c3
JB
556
557 e = io_ctl->cur;
558 entry->offset = le64_to_cpu(e->offset);
559 entry->bytes = le64_to_cpu(e->bytes);
5b0e95bf 560 *type = e->type;
a67509c3
JB
561 io_ctl->cur += sizeof(struct btrfs_free_space_entry);
562 io_ctl->size -= sizeof(struct btrfs_free_space_entry);
563
564 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
5b0e95bf 565 return 0;
a67509c3
JB
566
567 io_ctl_unmap_page(io_ctl);
568
2f120c05 569 return 0;
a67509c3
JB
570}
571
5b0e95bf
JB
572static int io_ctl_read_bitmap(struct io_ctl *io_ctl,
573 struct btrfs_free_space *entry)
a67509c3 574{
5b0e95bf
JB
575 int ret;
576
5b0e95bf
JB
577 ret = io_ctl_check_crc(io_ctl, io_ctl->index);
578 if (ret)
579 return ret;
580
a67509c3
JB
581 memcpy(entry->bitmap, io_ctl->cur, PAGE_CACHE_SIZE);
582 io_ctl_unmap_page(io_ctl);
5b0e95bf
JB
583
584 return 0;
a67509c3
JB
585}
586
0414efae
LZ
587int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
588 struct btrfs_free_space_ctl *ctl,
589 struct btrfs_path *path, u64 offset)
9d66e233 590{
9d66e233
JB
591 struct btrfs_free_space_header *header;
592 struct extent_buffer *leaf;
a67509c3 593 struct io_ctl io_ctl;
9d66e233 594 struct btrfs_key key;
a67509c3 595 struct btrfs_free_space *e, *n;
9d66e233
JB
596 struct list_head bitmaps;
597 u64 num_entries;
598 u64 num_bitmaps;
599 u64 generation;
a67509c3 600 u8 type;
f6a39829 601 int ret = 0;
9d66e233
JB
602
603 INIT_LIST_HEAD(&bitmaps);
604
9d66e233 605 /* Nothing in the space cache, goodbye */
0414efae 606 if (!i_size_read(inode))
a67509c3 607 return 0;
9d66e233
JB
608
609 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
0414efae 610 key.offset = offset;
9d66e233
JB
611 key.type = 0;
612
613 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
0414efae 614 if (ret < 0)
a67509c3 615 return 0;
0414efae 616 else if (ret > 0) {
945d8962 617 btrfs_release_path(path);
a67509c3 618 return 0;
9d66e233
JB
619 }
620
0414efae
LZ
621 ret = -1;
622
9d66e233
JB
623 leaf = path->nodes[0];
624 header = btrfs_item_ptr(leaf, path->slots[0],
625 struct btrfs_free_space_header);
626 num_entries = btrfs_free_space_entries(leaf, header);
627 num_bitmaps = btrfs_free_space_bitmaps(leaf, header);
628 generation = btrfs_free_space_generation(leaf, header);
945d8962 629 btrfs_release_path(path);
9d66e233
JB
630
631 if (BTRFS_I(inode)->generation != generation) {
632 printk(KERN_ERR "btrfs: free space inode generation (%llu) did"
0414efae 633 " not match free space cache generation (%llu)\n",
9d66e233 634 (unsigned long long)BTRFS_I(inode)->generation,
0414efae 635 (unsigned long long)generation);
a67509c3 636 return 0;
9d66e233
JB
637 }
638
639 if (!num_entries)
a67509c3 640 return 0;
9d66e233 641
706efc66
LZ
642 ret = io_ctl_init(&io_ctl, inode, root);
643 if (ret)
644 return ret;
645
9d66e233 646 ret = readahead_cache(inode);
0414efae 647 if (ret)
9d66e233 648 goto out;
9d66e233 649
a67509c3
JB
650 ret = io_ctl_prepare_pages(&io_ctl, inode, 1);
651 if (ret)
652 goto out;
9d66e233 653
5b0e95bf
JB
654 ret = io_ctl_check_crc(&io_ctl, 0);
655 if (ret)
656 goto free_cache;
657
a67509c3
JB
658 ret = io_ctl_check_generation(&io_ctl, generation);
659 if (ret)
660 goto free_cache;
9d66e233 661
a67509c3
JB
662 while (num_entries) {
663 e = kmem_cache_zalloc(btrfs_free_space_cachep,
664 GFP_NOFS);
665 if (!e)
9d66e233 666 goto free_cache;
9d66e233 667
5b0e95bf
JB
668 ret = io_ctl_read_entry(&io_ctl, e, &type);
669 if (ret) {
670 kmem_cache_free(btrfs_free_space_cachep, e);
671 goto free_cache;
672 }
673
a67509c3
JB
674 if (!e->bytes) {
675 kmem_cache_free(btrfs_free_space_cachep, e);
676 goto free_cache;
9d66e233 677 }
a67509c3
JB
678
679 if (type == BTRFS_FREE_SPACE_EXTENT) {
680 spin_lock(&ctl->tree_lock);
681 ret = link_free_space(ctl, e);
682 spin_unlock(&ctl->tree_lock);
683 if (ret) {
684 printk(KERN_ERR "Duplicate entries in "
685 "free space cache, dumping\n");
686 kmem_cache_free(btrfs_free_space_cachep, e);
9d66e233
JB
687 goto free_cache;
688 }
a67509c3
JB
689 } else {
690 BUG_ON(!num_bitmaps);
691 num_bitmaps--;
692 e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
693 if (!e->bitmap) {
694 kmem_cache_free(
695 btrfs_free_space_cachep, e);
9d66e233
JB
696 goto free_cache;
697 }
a67509c3
JB
698 spin_lock(&ctl->tree_lock);
699 ret = link_free_space(ctl, e);
700 ctl->total_bitmaps++;
701 ctl->op->recalc_thresholds(ctl);
702 spin_unlock(&ctl->tree_lock);
703 if (ret) {
704 printk(KERN_ERR "Duplicate entries in "
705 "free space cache, dumping\n");
dc89e982 706 kmem_cache_free(btrfs_free_space_cachep, e);
9d66e233
JB
707 goto free_cache;
708 }
a67509c3 709 list_add_tail(&e->list, &bitmaps);
9d66e233
JB
710 }
711
a67509c3
JB
712 num_entries--;
713 }
9d66e233 714
2f120c05
JB
715 io_ctl_unmap_page(&io_ctl);
716
a67509c3
JB
717 /*
718 * We add the bitmaps at the end of the entries in order that
719 * the bitmap entries are added to the cache.
720 */
721 list_for_each_entry_safe(e, n, &bitmaps, list) {
9d66e233 722 list_del_init(&e->list);
5b0e95bf
JB
723 ret = io_ctl_read_bitmap(&io_ctl, e);
724 if (ret)
725 goto free_cache;
9d66e233
JB
726 }
727
a67509c3 728 io_ctl_drop_pages(&io_ctl);
9d66e233
JB
729 ret = 1;
730out:
a67509c3 731 io_ctl_free(&io_ctl);
9d66e233 732 return ret;
9d66e233 733free_cache:
a67509c3 734 io_ctl_drop_pages(&io_ctl);
0414efae 735 __btrfs_remove_free_space_cache(ctl);
9d66e233
JB
736 goto out;
737}
738
0414efae
LZ
739int load_free_space_cache(struct btrfs_fs_info *fs_info,
740 struct btrfs_block_group_cache *block_group)
0cb59c99 741{
34d52cb6 742 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
0414efae
LZ
743 struct btrfs_root *root = fs_info->tree_root;
744 struct inode *inode;
745 struct btrfs_path *path;
5b0e95bf 746 int ret = 0;
0414efae
LZ
747 bool matched;
748 u64 used = btrfs_block_group_used(&block_group->item);
749
750 /*
751 * If we're unmounting then just return, since this does a search on the
752 * normal root and not the commit root and we could deadlock.
753 */
7841cb28 754 if (btrfs_fs_closing(fs_info))
0414efae
LZ
755 return 0;
756
757 /*
758 * If this block group has been marked to be cleared for one reason or
759 * another then we can't trust the on disk cache, so just return.
760 */
9d66e233 761 spin_lock(&block_group->lock);
0414efae
LZ
762 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
763 spin_unlock(&block_group->lock);
764 return 0;
765 }
9d66e233 766 spin_unlock(&block_group->lock);
0414efae
LZ
767
768 path = btrfs_alloc_path();
769 if (!path)
770 return 0;
771
772 inode = lookup_free_space_inode(root, block_group, path);
773 if (IS_ERR(inode)) {
774 btrfs_free_path(path);
775 return 0;
776 }
777
5b0e95bf
JB
778 /* We may have converted the inode and made the cache invalid. */
779 spin_lock(&block_group->lock);
780 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
781 spin_unlock(&block_group->lock);
a7e221e9 782 btrfs_free_path(path);
5b0e95bf
JB
783 goto out;
784 }
785 spin_unlock(&block_group->lock);
786
0414efae
LZ
787 ret = __load_free_space_cache(fs_info->tree_root, inode, ctl,
788 path, block_group->key.objectid);
789 btrfs_free_path(path);
790 if (ret <= 0)
791 goto out;
792
793 spin_lock(&ctl->tree_lock);
794 matched = (ctl->free_space == (block_group->key.offset - used -
795 block_group->bytes_super));
796 spin_unlock(&ctl->tree_lock);
797
798 if (!matched) {
799 __btrfs_remove_free_space_cache(ctl);
800 printk(KERN_ERR "block group %llu has an wrong amount of free "
801 "space\n", block_group->key.objectid);
802 ret = -1;
803 }
804out:
805 if (ret < 0) {
806 /* This cache is bogus, make sure it gets cleared */
807 spin_lock(&block_group->lock);
808 block_group->disk_cache_state = BTRFS_DC_CLEAR;
809 spin_unlock(&block_group->lock);
82d5902d 810 ret = 0;
0414efae
LZ
811
812 printk(KERN_ERR "btrfs: failed to load free space cache "
813 "for block group %llu\n", block_group->key.objectid);
814 }
815
816 iput(inode);
817 return ret;
9d66e233
JB
818}
819
c09544e0
JB
820/**
821 * __btrfs_write_out_cache - write out cached info to an inode
822 * @root - the root the inode belongs to
823 * @ctl - the free space cache we are going to write out
824 * @block_group - the block_group for this cache if it belongs to a block_group
825 * @trans - the trans handle
826 * @path - the path to use
827 * @offset - the offset for the key we'll insert
828 *
829 * This function writes out a free space cache struct to disk for quick recovery
830 * on mount. This will return 0 if it was successfull in writing the cache out,
831 * and -1 if it was not.
832 */
0414efae
LZ
833int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
834 struct btrfs_free_space_ctl *ctl,
835 struct btrfs_block_group_cache *block_group,
836 struct btrfs_trans_handle *trans,
837 struct btrfs_path *path, u64 offset)
0cb59c99
JB
838{
839 struct btrfs_free_space_header *header;
840 struct extent_buffer *leaf;
0cb59c99
JB
841 struct rb_node *node;
842 struct list_head *pos, *n;
0cb59c99 843 struct extent_state *cached_state = NULL;
43be2146
JB
844 struct btrfs_free_cluster *cluster = NULL;
845 struct extent_io_tree *unpin = NULL;
a67509c3 846 struct io_ctl io_ctl;
0cb59c99
JB
847 struct list_head bitmap_list;
848 struct btrfs_key key;
db804f23 849 u64 start, extent_start, extent_end, len;
0cb59c99
JB
850 int entries = 0;
851 int bitmaps = 0;
c09544e0
JB
852 int ret;
853 int err = -1;
0cb59c99 854
0cb59c99
JB
855 INIT_LIST_HEAD(&bitmap_list);
856
0414efae
LZ
857 if (!i_size_read(inode))
858 return -1;
2b20982e 859
706efc66
LZ
860 ret = io_ctl_init(&io_ctl, inode, root);
861 if (ret)
862 return -1;
be1a12a0 863
43be2146 864 /* Get the cluster for this block_group if it exists */
0414efae 865 if (block_group && !list_empty(&block_group->cluster_list))
43be2146
JB
866 cluster = list_entry(block_group->cluster_list.next,
867 struct btrfs_free_cluster,
868 block_group_list);
869
a67509c3
JB
870 /* Lock all pages first so we can lock the extent safely. */
871 io_ctl_prepare_pages(&io_ctl, inode, 0);
0cb59c99 872
0cb59c99 873 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
d0082371 874 0, &cached_state);
0cb59c99 875
f75b130e
JB
876 node = rb_first(&ctl->free_space_offset);
877 if (!node && cluster) {
878 node = rb_first(&cluster->root);
879 cluster = NULL;
880 }
881
5b0e95bf
JB
882 /* Make sure we can fit our crcs into the first page */
883 if (io_ctl.check_crcs &&
884 (io_ctl.num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE) {
885 WARN_ON(1);
886 goto out_nospc;
887 }
888
a67509c3 889 io_ctl_set_generation(&io_ctl, trans->transid);
43be2146 890
a67509c3
JB
891 /* Write out the extent entries */
892 while (node) {
893 struct btrfs_free_space *e;
0cb59c99 894
a67509c3
JB
895 e = rb_entry(node, struct btrfs_free_space, offset_index);
896 entries++;
0cb59c99 897
a67509c3
JB
898 ret = io_ctl_add_entry(&io_ctl, e->offset, e->bytes,
899 e->bitmap);
900 if (ret)
901 goto out_nospc;
2f356126 902
a67509c3
JB
903 if (e->bitmap) {
904 list_add_tail(&e->list, &bitmap_list);
905 bitmaps++;
2f356126 906 }
a67509c3
JB
907 node = rb_next(node);
908 if (!node && cluster) {
909 node = rb_first(&cluster->root);
910 cluster = NULL;
43be2146 911 }
a67509c3 912 }
43be2146 913
a67509c3
JB
914 /*
915 * We want to add any pinned extents to our free space cache
916 * so we don't leak the space
917 */
db804f23
LZ
918
919 /*
920 * We shouldn't have switched the pinned extents yet so this is the
921 * right one
922 */
923 unpin = root->fs_info->pinned_extents;
924
925 if (block_group)
926 start = block_group->key.objectid;
927
a67509c3
JB
928 while (block_group && (start < block_group->key.objectid +
929 block_group->key.offset)) {
db804f23
LZ
930 ret = find_first_extent_bit(unpin, start,
931 &extent_start, &extent_end,
a67509c3
JB
932 EXTENT_DIRTY);
933 if (ret) {
934 ret = 0;
935 break;
0cb59c99 936 }
0cb59c99 937
a67509c3 938 /* This pinned extent is out of our range */
db804f23 939 if (extent_start >= block_group->key.objectid +
a67509c3
JB
940 block_group->key.offset)
941 break;
2f356126 942
db804f23
LZ
943 extent_start = max(extent_start, start);
944 extent_end = min(block_group->key.objectid +
945 block_group->key.offset, extent_end + 1);
946 len = extent_end - extent_start;
0cb59c99 947
a67509c3 948 entries++;
db804f23 949 ret = io_ctl_add_entry(&io_ctl, extent_start, len, NULL);
a67509c3
JB
950 if (ret)
951 goto out_nospc;
0cb59c99 952
db804f23 953 start = extent_end;
a67509c3 954 }
0cb59c99
JB
955
956 /* Write out the bitmaps */
957 list_for_each_safe(pos, n, &bitmap_list) {
0cb59c99
JB
958 struct btrfs_free_space *entry =
959 list_entry(pos, struct btrfs_free_space, list);
960
a67509c3
JB
961 ret = io_ctl_add_bitmap(&io_ctl, entry->bitmap);
962 if (ret)
963 goto out_nospc;
0cb59c99 964 list_del_init(&entry->list);
be1a12a0
JB
965 }
966
0cb59c99 967 /* Zero out the rest of the pages just to make sure */
a67509c3 968 io_ctl_zero_remaining_pages(&io_ctl);
0cb59c99 969
a67509c3
JB
970 ret = btrfs_dirty_pages(root, inode, io_ctl.pages, io_ctl.num_pages,
971 0, i_size_read(inode), &cached_state);
972 io_ctl_drop_pages(&io_ctl);
0cb59c99
JB
973 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
974 i_size_read(inode) - 1, &cached_state, GFP_NOFS);
975
c09544e0 976 if (ret)
2f356126 977 goto out;
be1a12a0 978
be1a12a0 979
549b4fdb
JB
980 ret = filemap_write_and_wait(inode->i_mapping);
981 if (ret)
982 goto out;
0cb59c99
JB
983
984 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
0414efae 985 key.offset = offset;
0cb59c99
JB
986 key.type = 0;
987
a9b5fcdd 988 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
0cb59c99 989 if (ret < 0) {
a67509c3 990 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
5b0e95bf
JB
991 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL,
992 GFP_NOFS);
2f356126 993 goto out;
0cb59c99
JB
994 }
995 leaf = path->nodes[0];
996 if (ret > 0) {
997 struct btrfs_key found_key;
998 BUG_ON(!path->slots[0]);
999 path->slots[0]--;
1000 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1001 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
0414efae 1002 found_key.offset != offset) {
a67509c3
JB
1003 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0,
1004 inode->i_size - 1,
5b0e95bf
JB
1005 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0,
1006 NULL, GFP_NOFS);
b3b4aa74 1007 btrfs_release_path(path);
2f356126 1008 goto out;
0cb59c99
JB
1009 }
1010 }
549b4fdb
JB
1011
1012 BTRFS_I(inode)->generation = trans->transid;
0cb59c99
JB
1013 header = btrfs_item_ptr(leaf, path->slots[0],
1014 struct btrfs_free_space_header);
1015 btrfs_set_free_space_entries(leaf, header, entries);
1016 btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
1017 btrfs_set_free_space_generation(leaf, header, trans->transid);
1018 btrfs_mark_buffer_dirty(leaf);
b3b4aa74 1019 btrfs_release_path(path);
0cb59c99 1020
c09544e0 1021 err = 0;
2f356126 1022out:
a67509c3 1023 io_ctl_free(&io_ctl);
c09544e0 1024 if (err) {
a67509c3 1025 invalidate_inode_pages2(inode->i_mapping);
0cb59c99
JB
1026 BTRFS_I(inode)->generation = 0;
1027 }
0cb59c99 1028 btrfs_update_inode(trans, root, inode);
c09544e0 1029 return err;
a67509c3
JB
1030
1031out_nospc:
1032 list_for_each_safe(pos, n, &bitmap_list) {
1033 struct btrfs_free_space *entry =
1034 list_entry(pos, struct btrfs_free_space, list);
1035 list_del_init(&entry->list);
1036 }
1037 io_ctl_drop_pages(&io_ctl);
1038 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
1039 i_size_read(inode) - 1, &cached_state, GFP_NOFS);
1040 goto out;
0414efae
LZ
1041}
1042
1043int btrfs_write_out_cache(struct btrfs_root *root,
1044 struct btrfs_trans_handle *trans,
1045 struct btrfs_block_group_cache *block_group,
1046 struct btrfs_path *path)
1047{
1048 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1049 struct inode *inode;
1050 int ret = 0;
1051
1052 root = root->fs_info->tree_root;
1053
1054 spin_lock(&block_group->lock);
1055 if (block_group->disk_cache_state < BTRFS_DC_SETUP) {
1056 spin_unlock(&block_group->lock);
1057 return 0;
1058 }
1059 spin_unlock(&block_group->lock);
1060
1061 inode = lookup_free_space_inode(root, block_group, path);
1062 if (IS_ERR(inode))
1063 return 0;
1064
1065 ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans,
1066 path, block_group->key.objectid);
c09544e0 1067 if (ret) {
0414efae
LZ
1068 spin_lock(&block_group->lock);
1069 block_group->disk_cache_state = BTRFS_DC_ERROR;
1070 spin_unlock(&block_group->lock);
82d5902d 1071 ret = 0;
c09544e0 1072#ifdef DEBUG
0414efae
LZ
1073 printk(KERN_ERR "btrfs: failed to write free space cace "
1074 "for block group %llu\n", block_group->key.objectid);
c09544e0 1075#endif
0414efae
LZ
1076 }
1077
0cb59c99
JB
1078 iput(inode);
1079 return ret;
1080}
1081
34d52cb6 1082static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit,
96303081 1083 u64 offset)
0f9dd46c 1084{
96303081
JB
1085 BUG_ON(offset < bitmap_start);
1086 offset -= bitmap_start;
34d52cb6 1087 return (unsigned long)(div_u64(offset, unit));
96303081 1088}
0f9dd46c 1089
34d52cb6 1090static inline unsigned long bytes_to_bits(u64 bytes, u32 unit)
96303081 1091{
34d52cb6 1092 return (unsigned long)(div_u64(bytes, unit));
96303081 1093}
0f9dd46c 1094
34d52cb6 1095static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl,
96303081
JB
1096 u64 offset)
1097{
1098 u64 bitmap_start;
1099 u64 bytes_per_bitmap;
0f9dd46c 1100
34d52cb6
LZ
1101 bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit;
1102 bitmap_start = offset - ctl->start;
96303081
JB
1103 bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
1104 bitmap_start *= bytes_per_bitmap;
34d52cb6 1105 bitmap_start += ctl->start;
0f9dd46c 1106
96303081 1107 return bitmap_start;
0f9dd46c
JB
1108}
1109
96303081
JB
1110static int tree_insert_offset(struct rb_root *root, u64 offset,
1111 struct rb_node *node, int bitmap)
0f9dd46c
JB
1112{
1113 struct rb_node **p = &root->rb_node;
1114 struct rb_node *parent = NULL;
1115 struct btrfs_free_space *info;
1116
1117 while (*p) {
1118 parent = *p;
96303081 1119 info = rb_entry(parent, struct btrfs_free_space, offset_index);
0f9dd46c 1120
96303081 1121 if (offset < info->offset) {
0f9dd46c 1122 p = &(*p)->rb_left;
96303081 1123 } else if (offset > info->offset) {
0f9dd46c 1124 p = &(*p)->rb_right;
96303081
JB
1125 } else {
1126 /*
1127 * we could have a bitmap entry and an extent entry
1128 * share the same offset. If this is the case, we want
1129 * the extent entry to always be found first if we do a
1130 * linear search through the tree, since we want to have
1131 * the quickest allocation time, and allocating from an
1132 * extent is faster than allocating from a bitmap. So
1133 * if we're inserting a bitmap and we find an entry at
1134 * this offset, we want to go right, or after this entry
1135 * logically. If we are inserting an extent and we've
1136 * found a bitmap, we want to go left, or before
1137 * logically.
1138 */
1139 if (bitmap) {
207dde82
JB
1140 if (info->bitmap) {
1141 WARN_ON_ONCE(1);
1142 return -EEXIST;
1143 }
96303081
JB
1144 p = &(*p)->rb_right;
1145 } else {
207dde82
JB
1146 if (!info->bitmap) {
1147 WARN_ON_ONCE(1);
1148 return -EEXIST;
1149 }
96303081
JB
1150 p = &(*p)->rb_left;
1151 }
1152 }
0f9dd46c
JB
1153 }
1154
1155 rb_link_node(node, parent, p);
1156 rb_insert_color(node, root);
1157
1158 return 0;
1159}
1160
1161/*
70cb0743
JB
1162 * searches the tree for the given offset.
1163 *
96303081
JB
1164 * fuzzy - If this is set, then we are trying to make an allocation, and we just
1165 * want a section that has at least bytes size and comes at or after the given
1166 * offset.
0f9dd46c 1167 */
96303081 1168static struct btrfs_free_space *
34d52cb6 1169tree_search_offset(struct btrfs_free_space_ctl *ctl,
96303081 1170 u64 offset, int bitmap_only, int fuzzy)
0f9dd46c 1171{
34d52cb6 1172 struct rb_node *n = ctl->free_space_offset.rb_node;
96303081
JB
1173 struct btrfs_free_space *entry, *prev = NULL;
1174
1175 /* find entry that is closest to the 'offset' */
1176 while (1) {
1177 if (!n) {
1178 entry = NULL;
1179 break;
1180 }
0f9dd46c 1181
0f9dd46c 1182 entry = rb_entry(n, struct btrfs_free_space, offset_index);
96303081 1183 prev = entry;
0f9dd46c 1184
96303081 1185 if (offset < entry->offset)
0f9dd46c 1186 n = n->rb_left;
96303081 1187 else if (offset > entry->offset)
0f9dd46c 1188 n = n->rb_right;
96303081 1189 else
0f9dd46c 1190 break;
0f9dd46c
JB
1191 }
1192
96303081
JB
1193 if (bitmap_only) {
1194 if (!entry)
1195 return NULL;
1196 if (entry->bitmap)
1197 return entry;
0f9dd46c 1198
96303081
JB
1199 /*
1200 * bitmap entry and extent entry may share same offset,
1201 * in that case, bitmap entry comes after extent entry.
1202 */
1203 n = rb_next(n);
1204 if (!n)
1205 return NULL;
1206 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1207 if (entry->offset != offset)
1208 return NULL;
0f9dd46c 1209
96303081
JB
1210 WARN_ON(!entry->bitmap);
1211 return entry;
1212 } else if (entry) {
1213 if (entry->bitmap) {
0f9dd46c 1214 /*
96303081
JB
1215 * if previous extent entry covers the offset,
1216 * we should return it instead of the bitmap entry
0f9dd46c 1217 */
96303081
JB
1218 n = &entry->offset_index;
1219 while (1) {
1220 n = rb_prev(n);
1221 if (!n)
1222 break;
1223 prev = rb_entry(n, struct btrfs_free_space,
1224 offset_index);
1225 if (!prev->bitmap) {
1226 if (prev->offset + prev->bytes > offset)
1227 entry = prev;
1228 break;
1229 }
0f9dd46c 1230 }
96303081
JB
1231 }
1232 return entry;
1233 }
1234
1235 if (!prev)
1236 return NULL;
1237
1238 /* find last entry before the 'offset' */
1239 entry = prev;
1240 if (entry->offset > offset) {
1241 n = rb_prev(&entry->offset_index);
1242 if (n) {
1243 entry = rb_entry(n, struct btrfs_free_space,
1244 offset_index);
1245 BUG_ON(entry->offset > offset);
0f9dd46c 1246 } else {
96303081
JB
1247 if (fuzzy)
1248 return entry;
1249 else
1250 return NULL;
0f9dd46c
JB
1251 }
1252 }
1253
96303081
JB
1254 if (entry->bitmap) {
1255 n = &entry->offset_index;
1256 while (1) {
1257 n = rb_prev(n);
1258 if (!n)
1259 break;
1260 prev = rb_entry(n, struct btrfs_free_space,
1261 offset_index);
1262 if (!prev->bitmap) {
1263 if (prev->offset + prev->bytes > offset)
1264 return prev;
1265 break;
1266 }
1267 }
34d52cb6 1268 if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset)
96303081
JB
1269 return entry;
1270 } else if (entry->offset + entry->bytes > offset)
1271 return entry;
1272
1273 if (!fuzzy)
1274 return NULL;
1275
1276 while (1) {
1277 if (entry->bitmap) {
1278 if (entry->offset + BITS_PER_BITMAP *
34d52cb6 1279 ctl->unit > offset)
96303081
JB
1280 break;
1281 } else {
1282 if (entry->offset + entry->bytes > offset)
1283 break;
1284 }
1285
1286 n = rb_next(&entry->offset_index);
1287 if (!n)
1288 return NULL;
1289 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1290 }
1291 return entry;
0f9dd46c
JB
1292}
1293
f333adb5 1294static inline void
34d52cb6 1295__unlink_free_space(struct btrfs_free_space_ctl *ctl,
f333adb5 1296 struct btrfs_free_space *info)
0f9dd46c 1297{
34d52cb6
LZ
1298 rb_erase(&info->offset_index, &ctl->free_space_offset);
1299 ctl->free_extents--;
f333adb5
LZ
1300}
1301
34d52cb6 1302static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
f333adb5
LZ
1303 struct btrfs_free_space *info)
1304{
34d52cb6
LZ
1305 __unlink_free_space(ctl, info);
1306 ctl->free_space -= info->bytes;
0f9dd46c
JB
1307}
1308
34d52cb6 1309static int link_free_space(struct btrfs_free_space_ctl *ctl,
0f9dd46c
JB
1310 struct btrfs_free_space *info)
1311{
1312 int ret = 0;
1313
96303081 1314 BUG_ON(!info->bitmap && !info->bytes);
34d52cb6 1315 ret = tree_insert_offset(&ctl->free_space_offset, info->offset,
96303081 1316 &info->offset_index, (info->bitmap != NULL));
0f9dd46c
JB
1317 if (ret)
1318 return ret;
1319
34d52cb6
LZ
1320 ctl->free_space += info->bytes;
1321 ctl->free_extents++;
96303081
JB
1322 return ret;
1323}
1324
34d52cb6 1325static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
96303081 1326{
34d52cb6 1327 struct btrfs_block_group_cache *block_group = ctl->private;
25891f79
JB
1328 u64 max_bytes;
1329 u64 bitmap_bytes;
1330 u64 extent_bytes;
8eb2d829 1331 u64 size = block_group->key.offset;
34d52cb6
LZ
1332 u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize;
1333 int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg);
1334
1335 BUG_ON(ctl->total_bitmaps > max_bitmaps);
96303081
JB
1336
1337 /*
1338 * The goal is to keep the total amount of memory used per 1gb of space
1339 * at or below 32k, so we need to adjust how much memory we allow to be
1340 * used by extent based free space tracking
1341 */
8eb2d829
LZ
1342 if (size < 1024 * 1024 * 1024)
1343 max_bytes = MAX_CACHE_BYTES_PER_GIG;
1344 else
1345 max_bytes = MAX_CACHE_BYTES_PER_GIG *
1346 div64_u64(size, 1024 * 1024 * 1024);
96303081 1347
25891f79
JB
1348 /*
1349 * we want to account for 1 more bitmap than what we have so we can make
1350 * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as
1351 * we add more bitmaps.
1352 */
34d52cb6 1353 bitmap_bytes = (ctl->total_bitmaps + 1) * PAGE_CACHE_SIZE;
96303081 1354
25891f79 1355 if (bitmap_bytes >= max_bytes) {
34d52cb6 1356 ctl->extents_thresh = 0;
25891f79
JB
1357 return;
1358 }
96303081 1359
25891f79
JB
1360 /*
1361 * we want the extent entry threshold to always be at most 1/2 the maxw
1362 * bytes we can have, or whatever is less than that.
1363 */
1364 extent_bytes = max_bytes - bitmap_bytes;
1365 extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2));
96303081 1366
34d52cb6 1367 ctl->extents_thresh =
25891f79 1368 div64_u64(extent_bytes, (sizeof(struct btrfs_free_space)));
96303081
JB
1369}
1370
bb3ac5a4
MX
1371static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1372 struct btrfs_free_space *info,
1373 u64 offset, u64 bytes)
96303081 1374{
f38b6e75 1375 unsigned long start, count;
96303081 1376
34d52cb6
LZ
1377 start = offset_to_bit(info->offset, ctl->unit, offset);
1378 count = bytes_to_bits(bytes, ctl->unit);
f38b6e75 1379 BUG_ON(start + count > BITS_PER_BITMAP);
96303081 1380
f38b6e75 1381 bitmap_clear(info->bitmap, start, count);
96303081
JB
1382
1383 info->bytes -= bytes;
bb3ac5a4
MX
1384}
1385
1386static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1387 struct btrfs_free_space *info, u64 offset,
1388 u64 bytes)
1389{
1390 __bitmap_clear_bits(ctl, info, offset, bytes);
34d52cb6 1391 ctl->free_space -= bytes;
96303081
JB
1392}
1393
34d52cb6 1394static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl,
817d52f8
JB
1395 struct btrfs_free_space *info, u64 offset,
1396 u64 bytes)
96303081 1397{
f38b6e75 1398 unsigned long start, count;
96303081 1399
34d52cb6
LZ
1400 start = offset_to_bit(info->offset, ctl->unit, offset);
1401 count = bytes_to_bits(bytes, ctl->unit);
f38b6e75 1402 BUG_ON(start + count > BITS_PER_BITMAP);
96303081 1403
f38b6e75 1404 bitmap_set(info->bitmap, start, count);
96303081
JB
1405
1406 info->bytes += bytes;
34d52cb6 1407 ctl->free_space += bytes;
96303081
JB
1408}
1409
34d52cb6 1410static int search_bitmap(struct btrfs_free_space_ctl *ctl,
96303081
JB
1411 struct btrfs_free_space *bitmap_info, u64 *offset,
1412 u64 *bytes)
1413{
1414 unsigned long found_bits = 0;
1415 unsigned long bits, i;
1416 unsigned long next_zero;
1417
34d52cb6 1418 i = offset_to_bit(bitmap_info->offset, ctl->unit,
96303081 1419 max_t(u64, *offset, bitmap_info->offset));
34d52cb6 1420 bits = bytes_to_bits(*bytes, ctl->unit);
96303081
JB
1421
1422 for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i);
1423 i < BITS_PER_BITMAP;
1424 i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) {
1425 next_zero = find_next_zero_bit(bitmap_info->bitmap,
1426 BITS_PER_BITMAP, i);
1427 if ((next_zero - i) >= bits) {
1428 found_bits = next_zero - i;
1429 break;
1430 }
1431 i = next_zero;
1432 }
1433
1434 if (found_bits) {
34d52cb6
LZ
1435 *offset = (u64)(i * ctl->unit) + bitmap_info->offset;
1436 *bytes = (u64)(found_bits) * ctl->unit;
96303081
JB
1437 return 0;
1438 }
1439
1440 return -1;
1441}
1442
34d52cb6
LZ
1443static struct btrfs_free_space *
1444find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes)
96303081
JB
1445{
1446 struct btrfs_free_space *entry;
1447 struct rb_node *node;
1448 int ret;
1449
34d52cb6 1450 if (!ctl->free_space_offset.rb_node)
96303081
JB
1451 return NULL;
1452
34d52cb6 1453 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1);
96303081
JB
1454 if (!entry)
1455 return NULL;
1456
1457 for (node = &entry->offset_index; node; node = rb_next(node)) {
1458 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1459 if (entry->bytes < *bytes)
1460 continue;
1461
1462 if (entry->bitmap) {
34d52cb6 1463 ret = search_bitmap(ctl, entry, offset, bytes);
96303081
JB
1464 if (!ret)
1465 return entry;
1466 continue;
1467 }
1468
1469 *offset = entry->offset;
1470 *bytes = entry->bytes;
1471 return entry;
1472 }
1473
1474 return NULL;
1475}
1476
34d52cb6 1477static void add_new_bitmap(struct btrfs_free_space_ctl *ctl,
96303081
JB
1478 struct btrfs_free_space *info, u64 offset)
1479{
34d52cb6 1480 info->offset = offset_to_bitmap(ctl, offset);
f019f426 1481 info->bytes = 0;
f2d0f676 1482 INIT_LIST_HEAD(&info->list);
34d52cb6
LZ
1483 link_free_space(ctl, info);
1484 ctl->total_bitmaps++;
96303081 1485
34d52cb6 1486 ctl->op->recalc_thresholds(ctl);
96303081
JB
1487}
1488
34d52cb6 1489static void free_bitmap(struct btrfs_free_space_ctl *ctl,
edf6e2d1
LZ
1490 struct btrfs_free_space *bitmap_info)
1491{
34d52cb6 1492 unlink_free_space(ctl, bitmap_info);
edf6e2d1 1493 kfree(bitmap_info->bitmap);
dc89e982 1494 kmem_cache_free(btrfs_free_space_cachep, bitmap_info);
34d52cb6
LZ
1495 ctl->total_bitmaps--;
1496 ctl->op->recalc_thresholds(ctl);
edf6e2d1
LZ
1497}
1498
34d52cb6 1499static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl,
96303081
JB
1500 struct btrfs_free_space *bitmap_info,
1501 u64 *offset, u64 *bytes)
1502{
1503 u64 end;
6606bb97
JB
1504 u64 search_start, search_bytes;
1505 int ret;
96303081
JB
1506
1507again:
34d52cb6 1508 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1;
96303081 1509
6606bb97
JB
1510 /*
1511 * XXX - this can go away after a few releases.
1512 *
1513 * since the only user of btrfs_remove_free_space is the tree logging
1514 * stuff, and the only way to test that is under crash conditions, we
1515 * want to have this debug stuff here just in case somethings not
1516 * working. Search the bitmap for the space we are trying to use to
1517 * make sure its actually there. If its not there then we need to stop
1518 * because something has gone wrong.
1519 */
1520 search_start = *offset;
1521 search_bytes = *bytes;
13dbc089 1522 search_bytes = min(search_bytes, end - search_start + 1);
34d52cb6 1523 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes);
6606bb97
JB
1524 BUG_ON(ret < 0 || search_start != *offset);
1525
96303081 1526 if (*offset > bitmap_info->offset && *offset + *bytes > end) {
34d52cb6 1527 bitmap_clear_bits(ctl, bitmap_info, *offset, end - *offset + 1);
96303081
JB
1528 *bytes -= end - *offset + 1;
1529 *offset = end + 1;
1530 } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) {
34d52cb6 1531 bitmap_clear_bits(ctl, bitmap_info, *offset, *bytes);
96303081
JB
1532 *bytes = 0;
1533 }
1534
1535 if (*bytes) {
6606bb97 1536 struct rb_node *next = rb_next(&bitmap_info->offset_index);
edf6e2d1 1537 if (!bitmap_info->bytes)
34d52cb6 1538 free_bitmap(ctl, bitmap_info);
96303081 1539
6606bb97
JB
1540 /*
1541 * no entry after this bitmap, but we still have bytes to
1542 * remove, so something has gone wrong.
1543 */
1544 if (!next)
96303081
JB
1545 return -EINVAL;
1546
6606bb97
JB
1547 bitmap_info = rb_entry(next, struct btrfs_free_space,
1548 offset_index);
1549
1550 /*
1551 * if the next entry isn't a bitmap we need to return to let the
1552 * extent stuff do its work.
1553 */
96303081
JB
1554 if (!bitmap_info->bitmap)
1555 return -EAGAIN;
1556
6606bb97
JB
1557 /*
1558 * Ok the next item is a bitmap, but it may not actually hold
1559 * the information for the rest of this free space stuff, so
1560 * look for it, and if we don't find it return so we can try
1561 * everything over again.
1562 */
1563 search_start = *offset;
1564 search_bytes = *bytes;
34d52cb6 1565 ret = search_bitmap(ctl, bitmap_info, &search_start,
6606bb97
JB
1566 &search_bytes);
1567 if (ret < 0 || search_start != *offset)
1568 return -EAGAIN;
1569
96303081 1570 goto again;
edf6e2d1 1571 } else if (!bitmap_info->bytes)
34d52cb6 1572 free_bitmap(ctl, bitmap_info);
96303081
JB
1573
1574 return 0;
1575}
1576
2cdc342c
JB
1577static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl,
1578 struct btrfs_free_space *info, u64 offset,
1579 u64 bytes)
1580{
1581 u64 bytes_to_set = 0;
1582 u64 end;
1583
1584 end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit);
1585
1586 bytes_to_set = min(end - offset, bytes);
1587
1588 bitmap_set_bits(ctl, info, offset, bytes_to_set);
1589
1590 return bytes_to_set;
1591
1592}
1593
34d52cb6
LZ
1594static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
1595 struct btrfs_free_space *info)
96303081 1596{
34d52cb6 1597 struct btrfs_block_group_cache *block_group = ctl->private;
96303081
JB
1598
1599 /*
1600 * If we are below the extents threshold then we can add this as an
1601 * extent, and don't have to deal with the bitmap
1602 */
34d52cb6 1603 if (ctl->free_extents < ctl->extents_thresh) {
32cb0840
JB
1604 /*
1605 * If this block group has some small extents we don't want to
1606 * use up all of our free slots in the cache with them, we want
1607 * to reserve them to larger extents, however if we have plent
1608 * of cache left then go ahead an dadd them, no sense in adding
1609 * the overhead of a bitmap if we don't have to.
1610 */
1611 if (info->bytes <= block_group->sectorsize * 4) {
34d52cb6
LZ
1612 if (ctl->free_extents * 2 <= ctl->extents_thresh)
1613 return false;
32cb0840 1614 } else {
34d52cb6 1615 return false;
32cb0840
JB
1616 }
1617 }
96303081
JB
1618
1619 /*
1620 * some block groups are so tiny they can't be enveloped by a bitmap, so
1621 * don't even bother to create a bitmap for this
1622 */
1623 if (BITS_PER_BITMAP * block_group->sectorsize >
1624 block_group->key.offset)
34d52cb6
LZ
1625 return false;
1626
1627 return true;
1628}
1629
2cdc342c
JB
1630static struct btrfs_free_space_op free_space_op = {
1631 .recalc_thresholds = recalculate_thresholds,
1632 .use_bitmap = use_bitmap,
1633};
1634
34d52cb6
LZ
1635static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
1636 struct btrfs_free_space *info)
1637{
1638 struct btrfs_free_space *bitmap_info;
2cdc342c 1639 struct btrfs_block_group_cache *block_group = NULL;
34d52cb6 1640 int added = 0;
2cdc342c 1641 u64 bytes, offset, bytes_added;
34d52cb6 1642 int ret;
96303081
JB
1643
1644 bytes = info->bytes;
1645 offset = info->offset;
1646
34d52cb6
LZ
1647 if (!ctl->op->use_bitmap(ctl, info))
1648 return 0;
1649
2cdc342c
JB
1650 if (ctl->op == &free_space_op)
1651 block_group = ctl->private;
38e87880 1652again:
2cdc342c
JB
1653 /*
1654 * Since we link bitmaps right into the cluster we need to see if we
1655 * have a cluster here, and if so and it has our bitmap we need to add
1656 * the free space to that bitmap.
1657 */
1658 if (block_group && !list_empty(&block_group->cluster_list)) {
1659 struct btrfs_free_cluster *cluster;
1660 struct rb_node *node;
1661 struct btrfs_free_space *entry;
1662
1663 cluster = list_entry(block_group->cluster_list.next,
1664 struct btrfs_free_cluster,
1665 block_group_list);
1666 spin_lock(&cluster->lock);
1667 node = rb_first(&cluster->root);
1668 if (!node) {
1669 spin_unlock(&cluster->lock);
38e87880 1670 goto no_cluster_bitmap;
2cdc342c
JB
1671 }
1672
1673 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1674 if (!entry->bitmap) {
1675 spin_unlock(&cluster->lock);
38e87880 1676 goto no_cluster_bitmap;
2cdc342c
JB
1677 }
1678
1679 if (entry->offset == offset_to_bitmap(ctl, offset)) {
1680 bytes_added = add_bytes_to_bitmap(ctl, entry,
1681 offset, bytes);
1682 bytes -= bytes_added;
1683 offset += bytes_added;
1684 }
1685 spin_unlock(&cluster->lock);
1686 if (!bytes) {
1687 ret = 1;
1688 goto out;
1689 }
1690 }
38e87880
CM
1691
1692no_cluster_bitmap:
34d52cb6 1693 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
96303081
JB
1694 1, 0);
1695 if (!bitmap_info) {
1696 BUG_ON(added);
1697 goto new_bitmap;
1698 }
1699
2cdc342c
JB
1700 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes);
1701 bytes -= bytes_added;
1702 offset += bytes_added;
1703 added = 0;
96303081
JB
1704
1705 if (!bytes) {
1706 ret = 1;
1707 goto out;
1708 } else
1709 goto again;
1710
1711new_bitmap:
1712 if (info && info->bitmap) {
34d52cb6 1713 add_new_bitmap(ctl, info, offset);
96303081
JB
1714 added = 1;
1715 info = NULL;
1716 goto again;
1717 } else {
34d52cb6 1718 spin_unlock(&ctl->tree_lock);
96303081
JB
1719
1720 /* no pre-allocated info, allocate a new one */
1721 if (!info) {
dc89e982
JB
1722 info = kmem_cache_zalloc(btrfs_free_space_cachep,
1723 GFP_NOFS);
96303081 1724 if (!info) {
34d52cb6 1725 spin_lock(&ctl->tree_lock);
96303081
JB
1726 ret = -ENOMEM;
1727 goto out;
1728 }
1729 }
1730
1731 /* allocate the bitmap */
1732 info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
34d52cb6 1733 spin_lock(&ctl->tree_lock);
96303081
JB
1734 if (!info->bitmap) {
1735 ret = -ENOMEM;
1736 goto out;
1737 }
1738 goto again;
1739 }
1740
1741out:
1742 if (info) {
1743 if (info->bitmap)
1744 kfree(info->bitmap);
dc89e982 1745 kmem_cache_free(btrfs_free_space_cachep, info);
96303081 1746 }
0f9dd46c
JB
1747
1748 return ret;
1749}
1750
945d8962 1751static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
f333adb5 1752 struct btrfs_free_space *info, bool update_stat)
0f9dd46c 1753{
120d66ee
LZ
1754 struct btrfs_free_space *left_info;
1755 struct btrfs_free_space *right_info;
1756 bool merged = false;
1757 u64 offset = info->offset;
1758 u64 bytes = info->bytes;
6226cb0a 1759
0f9dd46c
JB
1760 /*
1761 * first we want to see if there is free space adjacent to the range we
1762 * are adding, if there is remove that struct and add a new one to
1763 * cover the entire range
1764 */
34d52cb6 1765 right_info = tree_search_offset(ctl, offset + bytes, 0, 0);
96303081
JB
1766 if (right_info && rb_prev(&right_info->offset_index))
1767 left_info = rb_entry(rb_prev(&right_info->offset_index),
1768 struct btrfs_free_space, offset_index);
1769 else
34d52cb6 1770 left_info = tree_search_offset(ctl, offset - 1, 0, 0);
0f9dd46c 1771
96303081 1772 if (right_info && !right_info->bitmap) {
f333adb5 1773 if (update_stat)
34d52cb6 1774 unlink_free_space(ctl, right_info);
f333adb5 1775 else
34d52cb6 1776 __unlink_free_space(ctl, right_info);
6226cb0a 1777 info->bytes += right_info->bytes;
dc89e982 1778 kmem_cache_free(btrfs_free_space_cachep, right_info);
120d66ee 1779 merged = true;
0f9dd46c
JB
1780 }
1781
96303081
JB
1782 if (left_info && !left_info->bitmap &&
1783 left_info->offset + left_info->bytes == offset) {
f333adb5 1784 if (update_stat)
34d52cb6 1785 unlink_free_space(ctl, left_info);
f333adb5 1786 else
34d52cb6 1787 __unlink_free_space(ctl, left_info);
6226cb0a
JB
1788 info->offset = left_info->offset;
1789 info->bytes += left_info->bytes;
dc89e982 1790 kmem_cache_free(btrfs_free_space_cachep, left_info);
120d66ee 1791 merged = true;
0f9dd46c
JB
1792 }
1793
120d66ee
LZ
1794 return merged;
1795}
1796
581bb050
LZ
1797int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl,
1798 u64 offset, u64 bytes)
120d66ee
LZ
1799{
1800 struct btrfs_free_space *info;
1801 int ret = 0;
1802
dc89e982 1803 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
120d66ee
LZ
1804 if (!info)
1805 return -ENOMEM;
1806
1807 info->offset = offset;
1808 info->bytes = bytes;
1809
34d52cb6 1810 spin_lock(&ctl->tree_lock);
120d66ee 1811
34d52cb6 1812 if (try_merge_free_space(ctl, info, true))
120d66ee
LZ
1813 goto link;
1814
1815 /*
1816 * There was no extent directly to the left or right of this new
1817 * extent then we know we're going to have to allocate a new extent, so
1818 * before we do that see if we need to drop this into a bitmap
1819 */
34d52cb6 1820 ret = insert_into_bitmap(ctl, info);
120d66ee
LZ
1821 if (ret < 0) {
1822 goto out;
1823 } else if (ret) {
1824 ret = 0;
1825 goto out;
1826 }
1827link:
34d52cb6 1828 ret = link_free_space(ctl, info);
0f9dd46c 1829 if (ret)
dc89e982 1830 kmem_cache_free(btrfs_free_space_cachep, info);
96303081 1831out:
34d52cb6 1832 spin_unlock(&ctl->tree_lock);
6226cb0a 1833
0f9dd46c 1834 if (ret) {
96303081 1835 printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret);
c293498b 1836 BUG_ON(ret == -EEXIST);
0f9dd46c
JB
1837 }
1838
0f9dd46c
JB
1839 return ret;
1840}
1841
6226cb0a
JB
1842int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
1843 u64 offset, u64 bytes)
0f9dd46c 1844{
34d52cb6 1845 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
0f9dd46c 1846 struct btrfs_free_space *info;
96303081 1847 struct btrfs_free_space *next_info = NULL;
0f9dd46c
JB
1848 int ret = 0;
1849
34d52cb6 1850 spin_lock(&ctl->tree_lock);
6226cb0a 1851
96303081 1852again:
34d52cb6 1853 info = tree_search_offset(ctl, offset, 0, 0);
96303081 1854 if (!info) {
6606bb97
JB
1855 /*
1856 * oops didn't find an extent that matched the space we wanted
1857 * to remove, look for a bitmap instead
1858 */
34d52cb6 1859 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
6606bb97
JB
1860 1, 0);
1861 if (!info) {
24a70313
CM
1862 /* the tree logging code might be calling us before we
1863 * have fully loaded the free space rbtree for this
1864 * block group. So it is possible the entry won't
1865 * be in the rbtree yet at all. The caching code
1866 * will make sure not to put it in the rbtree if
1867 * the logging code has pinned it.
1868 */
6606bb97
JB
1869 goto out_lock;
1870 }
96303081
JB
1871 }
1872
1873 if (info->bytes < bytes && rb_next(&info->offset_index)) {
1874 u64 end;
1875 next_info = rb_entry(rb_next(&info->offset_index),
1876 struct btrfs_free_space,
1877 offset_index);
1878
1879 if (next_info->bitmap)
34d52cb6
LZ
1880 end = next_info->offset +
1881 BITS_PER_BITMAP * ctl->unit - 1;
96303081
JB
1882 else
1883 end = next_info->offset + next_info->bytes;
1884
1885 if (next_info->bytes < bytes ||
1886 next_info->offset > offset || offset > end) {
1887 printk(KERN_CRIT "Found free space at %llu, size %llu,"
1888 " trying to use %llu\n",
1889 (unsigned long long)info->offset,
1890 (unsigned long long)info->bytes,
1891 (unsigned long long)bytes);
0f9dd46c
JB
1892 WARN_ON(1);
1893 ret = -EINVAL;
96303081 1894 goto out_lock;
0f9dd46c 1895 }
0f9dd46c 1896
96303081
JB
1897 info = next_info;
1898 }
1899
1900 if (info->bytes == bytes) {
34d52cb6 1901 unlink_free_space(ctl, info);
96303081
JB
1902 if (info->bitmap) {
1903 kfree(info->bitmap);
34d52cb6 1904 ctl->total_bitmaps--;
0f9dd46c 1905 }
dc89e982 1906 kmem_cache_free(btrfs_free_space_cachep, info);
1eae31e9 1907 ret = 0;
96303081
JB
1908 goto out_lock;
1909 }
0f9dd46c 1910
96303081 1911 if (!info->bitmap && info->offset == offset) {
34d52cb6 1912 unlink_free_space(ctl, info);
0f9dd46c
JB
1913 info->offset += bytes;
1914 info->bytes -= bytes;
1eae31e9
CM
1915 ret = link_free_space(ctl, info);
1916 WARN_ON(ret);
96303081
JB
1917 goto out_lock;
1918 }
0f9dd46c 1919
96303081
JB
1920 if (!info->bitmap && info->offset <= offset &&
1921 info->offset + info->bytes >= offset + bytes) {
9b49c9b9
CM
1922 u64 old_start = info->offset;
1923 /*
1924 * we're freeing space in the middle of the info,
1925 * this can happen during tree log replay
1926 *
1927 * first unlink the old info and then
1928 * insert it again after the hole we're creating
1929 */
34d52cb6 1930 unlink_free_space(ctl, info);
9b49c9b9
CM
1931 if (offset + bytes < info->offset + info->bytes) {
1932 u64 old_end = info->offset + info->bytes;
1933
1934 info->offset = offset + bytes;
1935 info->bytes = old_end - info->offset;
34d52cb6 1936 ret = link_free_space(ctl, info);
96303081
JB
1937 WARN_ON(ret);
1938 if (ret)
1939 goto out_lock;
9b49c9b9
CM
1940 } else {
1941 /* the hole we're creating ends at the end
1942 * of the info struct, just free the info
1943 */
dc89e982 1944 kmem_cache_free(btrfs_free_space_cachep, info);
9b49c9b9 1945 }
34d52cb6 1946 spin_unlock(&ctl->tree_lock);
96303081
JB
1947
1948 /* step two, insert a new info struct to cover
1949 * anything before the hole
9b49c9b9 1950 */
6226cb0a
JB
1951 ret = btrfs_add_free_space(block_group, old_start,
1952 offset - old_start);
79787eaa 1953 WARN_ON(ret); /* -ENOMEM */
96303081 1954 goto out;
0f9dd46c 1955 }
96303081 1956
34d52cb6 1957 ret = remove_from_bitmap(ctl, info, &offset, &bytes);
96303081
JB
1958 if (ret == -EAGAIN)
1959 goto again;
79787eaa 1960 BUG_ON(ret); /* logic error */
96303081 1961out_lock:
34d52cb6 1962 spin_unlock(&ctl->tree_lock);
0f9dd46c 1963out:
25179201
JB
1964 return ret;
1965}
1966
0f9dd46c
JB
1967void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
1968 u64 bytes)
1969{
34d52cb6 1970 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
0f9dd46c
JB
1971 struct btrfs_free_space *info;
1972 struct rb_node *n;
1973 int count = 0;
1974
34d52cb6 1975 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
0f9dd46c
JB
1976 info = rb_entry(n, struct btrfs_free_space, offset_index);
1977 if (info->bytes >= bytes)
1978 count++;
96303081 1979 printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n",
21380931 1980 (unsigned long long)info->offset,
96303081
JB
1981 (unsigned long long)info->bytes,
1982 (info->bitmap) ? "yes" : "no");
0f9dd46c 1983 }
96303081
JB
1984 printk(KERN_INFO "block group has cluster?: %s\n",
1985 list_empty(&block_group->cluster_list) ? "no" : "yes");
0f9dd46c
JB
1986 printk(KERN_INFO "%d blocks of free space at or bigger than bytes is"
1987 "\n", count);
1988}
1989
34d52cb6 1990void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group)
0f9dd46c 1991{
34d52cb6 1992 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
0f9dd46c 1993
34d52cb6
LZ
1994 spin_lock_init(&ctl->tree_lock);
1995 ctl->unit = block_group->sectorsize;
1996 ctl->start = block_group->key.objectid;
1997 ctl->private = block_group;
1998 ctl->op = &free_space_op;
0f9dd46c 1999
34d52cb6
LZ
2000 /*
2001 * we only want to have 32k of ram per block group for keeping
2002 * track of free space, and if we pass 1/2 of that we want to
2003 * start converting things over to using bitmaps
2004 */
2005 ctl->extents_thresh = ((1024 * 32) / 2) /
2006 sizeof(struct btrfs_free_space);
0f9dd46c
JB
2007}
2008
fa9c0d79
CM
2009/*
2010 * for a given cluster, put all of its extents back into the free
2011 * space cache. If the block group passed doesn't match the block group
2012 * pointed to by the cluster, someone else raced in and freed the
2013 * cluster already. In that case, we just return without changing anything
2014 */
2015static int
2016__btrfs_return_cluster_to_free_space(
2017 struct btrfs_block_group_cache *block_group,
2018 struct btrfs_free_cluster *cluster)
2019{
34d52cb6 2020 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
fa9c0d79
CM
2021 struct btrfs_free_space *entry;
2022 struct rb_node *node;
2023
2024 spin_lock(&cluster->lock);
2025 if (cluster->block_group != block_group)
2026 goto out;
2027
96303081 2028 cluster->block_group = NULL;
fa9c0d79 2029 cluster->window_start = 0;
96303081 2030 list_del_init(&cluster->block_group_list);
96303081 2031
fa9c0d79 2032 node = rb_first(&cluster->root);
96303081 2033 while (node) {
4e69b598
JB
2034 bool bitmap;
2035
fa9c0d79
CM
2036 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2037 node = rb_next(&entry->offset_index);
2038 rb_erase(&entry->offset_index, &cluster->root);
4e69b598
JB
2039
2040 bitmap = (entry->bitmap != NULL);
2041 if (!bitmap)
34d52cb6
LZ
2042 try_merge_free_space(ctl, entry, false);
2043 tree_insert_offset(&ctl->free_space_offset,
4e69b598 2044 entry->offset, &entry->offset_index, bitmap);
fa9c0d79 2045 }
6bef4d31 2046 cluster->root = RB_ROOT;
96303081 2047
fa9c0d79
CM
2048out:
2049 spin_unlock(&cluster->lock);
96303081 2050 btrfs_put_block_group(block_group);
fa9c0d79
CM
2051 return 0;
2052}
2053
09655373 2054void __btrfs_remove_free_space_cache_locked(struct btrfs_free_space_ctl *ctl)
0f9dd46c
JB
2055{
2056 struct btrfs_free_space *info;
2057 struct rb_node *node;
581bb050 2058
581bb050
LZ
2059 while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
2060 info = rb_entry(node, struct btrfs_free_space, offset_index);
9b90f513
JB
2061 if (!info->bitmap) {
2062 unlink_free_space(ctl, info);
2063 kmem_cache_free(btrfs_free_space_cachep, info);
2064 } else {
2065 free_bitmap(ctl, info);
2066 }
581bb050
LZ
2067 if (need_resched()) {
2068 spin_unlock(&ctl->tree_lock);
2069 cond_resched();
2070 spin_lock(&ctl->tree_lock);
2071 }
2072 }
09655373
CM
2073}
2074
2075void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl)
2076{
2077 spin_lock(&ctl->tree_lock);
2078 __btrfs_remove_free_space_cache_locked(ctl);
581bb050
LZ
2079 spin_unlock(&ctl->tree_lock);
2080}
2081
2082void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
2083{
2084 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
fa9c0d79 2085 struct btrfs_free_cluster *cluster;
96303081 2086 struct list_head *head;
0f9dd46c 2087
34d52cb6 2088 spin_lock(&ctl->tree_lock);
96303081
JB
2089 while ((head = block_group->cluster_list.next) !=
2090 &block_group->cluster_list) {
2091 cluster = list_entry(head, struct btrfs_free_cluster,
2092 block_group_list);
fa9c0d79
CM
2093
2094 WARN_ON(cluster->block_group != block_group);
2095 __btrfs_return_cluster_to_free_space(block_group, cluster);
96303081 2096 if (need_resched()) {
34d52cb6 2097 spin_unlock(&ctl->tree_lock);
96303081 2098 cond_resched();
34d52cb6 2099 spin_lock(&ctl->tree_lock);
96303081 2100 }
fa9c0d79 2101 }
09655373 2102 __btrfs_remove_free_space_cache_locked(ctl);
34d52cb6 2103 spin_unlock(&ctl->tree_lock);
fa9c0d79 2104
0f9dd46c
JB
2105}
2106
6226cb0a
JB
2107u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
2108 u64 offset, u64 bytes, u64 empty_size)
0f9dd46c 2109{
34d52cb6 2110 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
6226cb0a 2111 struct btrfs_free_space *entry = NULL;
96303081 2112 u64 bytes_search = bytes + empty_size;
6226cb0a 2113 u64 ret = 0;
0f9dd46c 2114
34d52cb6
LZ
2115 spin_lock(&ctl->tree_lock);
2116 entry = find_free_space(ctl, &offset, &bytes_search);
6226cb0a 2117 if (!entry)
96303081
JB
2118 goto out;
2119
2120 ret = offset;
2121 if (entry->bitmap) {
34d52cb6 2122 bitmap_clear_bits(ctl, entry, offset, bytes);
edf6e2d1 2123 if (!entry->bytes)
34d52cb6 2124 free_bitmap(ctl, entry);
96303081 2125 } else {
34d52cb6 2126 unlink_free_space(ctl, entry);
6226cb0a
JB
2127 entry->offset += bytes;
2128 entry->bytes -= bytes;
6226cb0a 2129 if (!entry->bytes)
dc89e982 2130 kmem_cache_free(btrfs_free_space_cachep, entry);
6226cb0a 2131 else
34d52cb6 2132 link_free_space(ctl, entry);
6226cb0a 2133 }
0f9dd46c 2134
96303081 2135out:
34d52cb6 2136 spin_unlock(&ctl->tree_lock);
817d52f8 2137
0f9dd46c
JB
2138 return ret;
2139}
fa9c0d79
CM
2140
2141/*
2142 * given a cluster, put all of its extents back into the free space
2143 * cache. If a block group is passed, this function will only free
2144 * a cluster that belongs to the passed block group.
2145 *
2146 * Otherwise, it'll get a reference on the block group pointed to by the
2147 * cluster and remove the cluster from it.
2148 */
2149int btrfs_return_cluster_to_free_space(
2150 struct btrfs_block_group_cache *block_group,
2151 struct btrfs_free_cluster *cluster)
2152{
34d52cb6 2153 struct btrfs_free_space_ctl *ctl;
fa9c0d79
CM
2154 int ret;
2155
2156 /* first, get a safe pointer to the block group */
2157 spin_lock(&cluster->lock);
2158 if (!block_group) {
2159 block_group = cluster->block_group;
2160 if (!block_group) {
2161 spin_unlock(&cluster->lock);
2162 return 0;
2163 }
2164 } else if (cluster->block_group != block_group) {
2165 /* someone else has already freed it don't redo their work */
2166 spin_unlock(&cluster->lock);
2167 return 0;
2168 }
2169 atomic_inc(&block_group->count);
2170 spin_unlock(&cluster->lock);
2171
34d52cb6
LZ
2172 ctl = block_group->free_space_ctl;
2173
fa9c0d79 2174 /* now return any extents the cluster had on it */
34d52cb6 2175 spin_lock(&ctl->tree_lock);
fa9c0d79 2176 ret = __btrfs_return_cluster_to_free_space(block_group, cluster);
34d52cb6 2177 spin_unlock(&ctl->tree_lock);
fa9c0d79
CM
2178
2179 /* finally drop our ref */
2180 btrfs_put_block_group(block_group);
2181 return ret;
2182}
2183
96303081
JB
2184static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
2185 struct btrfs_free_cluster *cluster,
4e69b598 2186 struct btrfs_free_space *entry,
96303081
JB
2187 u64 bytes, u64 min_start)
2188{
34d52cb6 2189 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
96303081
JB
2190 int err;
2191 u64 search_start = cluster->window_start;
2192 u64 search_bytes = bytes;
2193 u64 ret = 0;
2194
96303081
JB
2195 search_start = min_start;
2196 search_bytes = bytes;
2197
34d52cb6 2198 err = search_bitmap(ctl, entry, &search_start, &search_bytes);
96303081 2199 if (err)
4e69b598 2200 return 0;
96303081
JB
2201
2202 ret = search_start;
bb3ac5a4 2203 __bitmap_clear_bits(ctl, entry, ret, bytes);
96303081
JB
2204
2205 return ret;
2206}
2207
fa9c0d79
CM
2208/*
2209 * given a cluster, try to allocate 'bytes' from it, returns 0
2210 * if it couldn't find anything suitably large, or a logical disk offset
2211 * if things worked out
2212 */
2213u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
2214 struct btrfs_free_cluster *cluster, u64 bytes,
2215 u64 min_start)
2216{
34d52cb6 2217 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
fa9c0d79
CM
2218 struct btrfs_free_space *entry = NULL;
2219 struct rb_node *node;
2220 u64 ret = 0;
2221
2222 spin_lock(&cluster->lock);
2223 if (bytes > cluster->max_size)
2224 goto out;
2225
2226 if (cluster->block_group != block_group)
2227 goto out;
2228
2229 node = rb_first(&cluster->root);
2230 if (!node)
2231 goto out;
2232
2233 entry = rb_entry(node, struct btrfs_free_space, offset_index);
fa9c0d79 2234 while(1) {
4e69b598
JB
2235 if (entry->bytes < bytes ||
2236 (!entry->bitmap && entry->offset < min_start)) {
fa9c0d79
CM
2237 node = rb_next(&entry->offset_index);
2238 if (!node)
2239 break;
2240 entry = rb_entry(node, struct btrfs_free_space,
2241 offset_index);
2242 continue;
2243 }
fa9c0d79 2244
4e69b598
JB
2245 if (entry->bitmap) {
2246 ret = btrfs_alloc_from_bitmap(block_group,
2247 cluster, entry, bytes,
0b4a9d24 2248 cluster->window_start);
4e69b598 2249 if (ret == 0) {
4e69b598
JB
2250 node = rb_next(&entry->offset_index);
2251 if (!node)
2252 break;
2253 entry = rb_entry(node, struct btrfs_free_space,
2254 offset_index);
2255 continue;
2256 }
9b230628 2257 cluster->window_start += bytes;
4e69b598 2258 } else {
4e69b598
JB
2259 ret = entry->offset;
2260
2261 entry->offset += bytes;
2262 entry->bytes -= bytes;
2263 }
fa9c0d79 2264
5e71b5d5 2265 if (entry->bytes == 0)
fa9c0d79 2266 rb_erase(&entry->offset_index, &cluster->root);
fa9c0d79
CM
2267 break;
2268 }
2269out:
2270 spin_unlock(&cluster->lock);
96303081 2271
5e71b5d5
LZ
2272 if (!ret)
2273 return 0;
2274
34d52cb6 2275 spin_lock(&ctl->tree_lock);
5e71b5d5 2276
34d52cb6 2277 ctl->free_space -= bytes;
5e71b5d5 2278 if (entry->bytes == 0) {
34d52cb6 2279 ctl->free_extents--;
4e69b598
JB
2280 if (entry->bitmap) {
2281 kfree(entry->bitmap);
34d52cb6
LZ
2282 ctl->total_bitmaps--;
2283 ctl->op->recalc_thresholds(ctl);
4e69b598 2284 }
dc89e982 2285 kmem_cache_free(btrfs_free_space_cachep, entry);
5e71b5d5
LZ
2286 }
2287
34d52cb6 2288 spin_unlock(&ctl->tree_lock);
5e71b5d5 2289
fa9c0d79
CM
2290 return ret;
2291}
2292
96303081
JB
2293static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
2294 struct btrfs_free_space *entry,
2295 struct btrfs_free_cluster *cluster,
1bb91902
AO
2296 u64 offset, u64 bytes,
2297 u64 cont1_bytes, u64 min_bytes)
96303081 2298{
34d52cb6 2299 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
96303081
JB
2300 unsigned long next_zero;
2301 unsigned long i;
1bb91902
AO
2302 unsigned long want_bits;
2303 unsigned long min_bits;
96303081
JB
2304 unsigned long found_bits;
2305 unsigned long start = 0;
2306 unsigned long total_found = 0;
4e69b598 2307 int ret;
96303081
JB
2308
2309 i = offset_to_bit(entry->offset, block_group->sectorsize,
2310 max_t(u64, offset, entry->offset));
1bb91902
AO
2311 want_bits = bytes_to_bits(bytes, block_group->sectorsize);
2312 min_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
96303081
JB
2313
2314again:
2315 found_bits = 0;
2316 for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i);
2317 i < BITS_PER_BITMAP;
2318 i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) {
2319 next_zero = find_next_zero_bit(entry->bitmap,
2320 BITS_PER_BITMAP, i);
1bb91902 2321 if (next_zero - i >= min_bits) {
96303081
JB
2322 found_bits = next_zero - i;
2323 break;
2324 }
2325 i = next_zero;
2326 }
2327
2328 if (!found_bits)
4e69b598 2329 return -ENOSPC;
96303081 2330
1bb91902 2331 if (!total_found) {
96303081 2332 start = i;
b78d09bc 2333 cluster->max_size = 0;
96303081
JB
2334 }
2335
2336 total_found += found_bits;
2337
2338 if (cluster->max_size < found_bits * block_group->sectorsize)
2339 cluster->max_size = found_bits * block_group->sectorsize;
2340
1bb91902
AO
2341 if (total_found < want_bits || cluster->max_size < cont1_bytes) {
2342 i = next_zero + 1;
96303081
JB
2343 goto again;
2344 }
2345
2346 cluster->window_start = start * block_group->sectorsize +
2347 entry->offset;
34d52cb6 2348 rb_erase(&entry->offset_index, &ctl->free_space_offset);
4e69b598
JB
2349 ret = tree_insert_offset(&cluster->root, entry->offset,
2350 &entry->offset_index, 1);
79787eaa 2351 BUG_ON(ret); /* -EEXIST; Logic error */
96303081 2352
3f7de037
JB
2353 trace_btrfs_setup_cluster(block_group, cluster,
2354 total_found * block_group->sectorsize, 1);
96303081
JB
2355 return 0;
2356}
2357
4e69b598
JB
2358/*
2359 * This searches the block group for just extents to fill the cluster with.
1bb91902
AO
2360 * Try to find a cluster with at least bytes total bytes, at least one
2361 * extent of cont1_bytes, and other clusters of at least min_bytes.
4e69b598 2362 */
3de85bb9
JB
2363static noinline int
2364setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2365 struct btrfs_free_cluster *cluster,
2366 struct list_head *bitmaps, u64 offset, u64 bytes,
1bb91902 2367 u64 cont1_bytes, u64 min_bytes)
4e69b598 2368{
34d52cb6 2369 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
4e69b598
JB
2370 struct btrfs_free_space *first = NULL;
2371 struct btrfs_free_space *entry = NULL;
4e69b598
JB
2372 struct btrfs_free_space *last;
2373 struct rb_node *node;
2374 u64 window_start;
2375 u64 window_free;
2376 u64 max_extent;
3f7de037 2377 u64 total_size = 0;
4e69b598 2378
34d52cb6 2379 entry = tree_search_offset(ctl, offset, 0, 1);
4e69b598
JB
2380 if (!entry)
2381 return -ENOSPC;
2382
2383 /*
2384 * We don't want bitmaps, so just move along until we find a normal
2385 * extent entry.
2386 */
1bb91902
AO
2387 while (entry->bitmap || entry->bytes < min_bytes) {
2388 if (entry->bitmap && list_empty(&entry->list))
86d4a77b 2389 list_add_tail(&entry->list, bitmaps);
4e69b598
JB
2390 node = rb_next(&entry->offset_index);
2391 if (!node)
2392 return -ENOSPC;
2393 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2394 }
2395
2396 window_start = entry->offset;
2397 window_free = entry->bytes;
2398 max_extent = entry->bytes;
2399 first = entry;
2400 last = entry;
4e69b598 2401
1bb91902
AO
2402 for (node = rb_next(&entry->offset_index); node;
2403 node = rb_next(&entry->offset_index)) {
4e69b598
JB
2404 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2405
86d4a77b
JB
2406 if (entry->bitmap) {
2407 if (list_empty(&entry->list))
2408 list_add_tail(&entry->list, bitmaps);
4e69b598 2409 continue;
86d4a77b
JB
2410 }
2411
1bb91902
AO
2412 if (entry->bytes < min_bytes)
2413 continue;
2414
2415 last = entry;
2416 window_free += entry->bytes;
2417 if (entry->bytes > max_extent)
4e69b598 2418 max_extent = entry->bytes;
4e69b598
JB
2419 }
2420
1bb91902
AO
2421 if (window_free < bytes || max_extent < cont1_bytes)
2422 return -ENOSPC;
2423
4e69b598
JB
2424 cluster->window_start = first->offset;
2425
2426 node = &first->offset_index;
2427
2428 /*
2429 * now we've found our entries, pull them out of the free space
2430 * cache and put them into the cluster rbtree
2431 */
2432 do {
2433 int ret;
2434
2435 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2436 node = rb_next(&entry->offset_index);
1bb91902 2437 if (entry->bitmap || entry->bytes < min_bytes)
4e69b598
JB
2438 continue;
2439
34d52cb6 2440 rb_erase(&entry->offset_index, &ctl->free_space_offset);
4e69b598
JB
2441 ret = tree_insert_offset(&cluster->root, entry->offset,
2442 &entry->offset_index, 0);
3f7de037 2443 total_size += entry->bytes;
79787eaa 2444 BUG_ON(ret); /* -EEXIST; Logic error */
4e69b598
JB
2445 } while (node && entry != last);
2446
2447 cluster->max_size = max_extent;
3f7de037 2448 trace_btrfs_setup_cluster(block_group, cluster, total_size, 0);
4e69b598
JB
2449 return 0;
2450}
2451
2452/*
2453 * This specifically looks for bitmaps that may work in the cluster, we assume
2454 * that we have already failed to find extents that will work.
2455 */
3de85bb9
JB
2456static noinline int
2457setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
2458 struct btrfs_free_cluster *cluster,
2459 struct list_head *bitmaps, u64 offset, u64 bytes,
1bb91902 2460 u64 cont1_bytes, u64 min_bytes)
4e69b598 2461{
34d52cb6 2462 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
4e69b598 2463 struct btrfs_free_space *entry;
4e69b598 2464 int ret = -ENOSPC;
0f0fbf1d 2465 u64 bitmap_offset = offset_to_bitmap(ctl, offset);
4e69b598 2466
34d52cb6 2467 if (ctl->total_bitmaps == 0)
4e69b598
JB
2468 return -ENOSPC;
2469
0f0fbf1d
LZ
2470 /*
2471 * The bitmap that covers offset won't be in the list unless offset
2472 * is just its start offset.
2473 */
2474 entry = list_first_entry(bitmaps, struct btrfs_free_space, list);
2475 if (entry->offset != bitmap_offset) {
2476 entry = tree_search_offset(ctl, bitmap_offset, 1, 0);
2477 if (entry && list_empty(&entry->list))
2478 list_add(&entry->list, bitmaps);
2479 }
2480
86d4a77b 2481 list_for_each_entry(entry, bitmaps, list) {
357b9784 2482 if (entry->bytes < bytes)
86d4a77b
JB
2483 continue;
2484 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
1bb91902 2485 bytes, cont1_bytes, min_bytes);
86d4a77b
JB
2486 if (!ret)
2487 return 0;
2488 }
2489
2490 /*
52621cb6
LZ
2491 * The bitmaps list has all the bitmaps that record free space
2492 * starting after offset, so no more search is required.
86d4a77b 2493 */
52621cb6 2494 return -ENOSPC;
4e69b598
JB
2495}
2496
fa9c0d79
CM
2497/*
2498 * here we try to find a cluster of blocks in a block group. The goal
1bb91902 2499 * is to find at least bytes+empty_size.
fa9c0d79
CM
2500 * We might not find them all in one contiguous area.
2501 *
2502 * returns zero and sets up cluster if things worked out, otherwise
2503 * it returns -enospc
2504 */
2505int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
451d7585 2506 struct btrfs_root *root,
fa9c0d79
CM
2507 struct btrfs_block_group_cache *block_group,
2508 struct btrfs_free_cluster *cluster,
2509 u64 offset, u64 bytes, u64 empty_size)
2510{
34d52cb6 2511 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
86d4a77b 2512 struct btrfs_free_space *entry, *tmp;
52621cb6 2513 LIST_HEAD(bitmaps);
fa9c0d79 2514 u64 min_bytes;
1bb91902 2515 u64 cont1_bytes;
fa9c0d79
CM
2516 int ret;
2517
1bb91902
AO
2518 /*
2519 * Choose the minimum extent size we'll require for this
2520 * cluster. For SSD_SPREAD, don't allow any fragmentation.
2521 * For metadata, allow allocates with smaller extents. For
2522 * data, keep it dense.
2523 */
451d7585 2524 if (btrfs_test_opt(root, SSD_SPREAD)) {
1bb91902 2525 cont1_bytes = min_bytes = bytes + empty_size;
451d7585 2526 } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
1bb91902
AO
2527 cont1_bytes = bytes;
2528 min_bytes = block_group->sectorsize;
2529 } else {
2530 cont1_bytes = max(bytes, (bytes + empty_size) >> 2);
2531 min_bytes = block_group->sectorsize;
2532 }
fa9c0d79 2533
34d52cb6 2534 spin_lock(&ctl->tree_lock);
7d0d2e8e
JB
2535
2536 /*
2537 * If we know we don't have enough space to make a cluster don't even
2538 * bother doing all the work to try and find one.
2539 */
1bb91902 2540 if (ctl->free_space < bytes) {
34d52cb6 2541 spin_unlock(&ctl->tree_lock);
7d0d2e8e
JB
2542 return -ENOSPC;
2543 }
2544
fa9c0d79
CM
2545 spin_lock(&cluster->lock);
2546
2547 /* someone already found a cluster, hooray */
2548 if (cluster->block_group) {
2549 ret = 0;
2550 goto out;
2551 }
fa9c0d79 2552
3f7de037
JB
2553 trace_btrfs_find_cluster(block_group, offset, bytes, empty_size,
2554 min_bytes);
2555
2556 INIT_LIST_HEAD(&bitmaps);
86d4a77b 2557 ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset,
1bb91902
AO
2558 bytes + empty_size,
2559 cont1_bytes, min_bytes);
4e69b598 2560 if (ret)
86d4a77b 2561 ret = setup_cluster_bitmap(block_group, cluster, &bitmaps,
1bb91902
AO
2562 offset, bytes + empty_size,
2563 cont1_bytes, min_bytes);
86d4a77b
JB
2564
2565 /* Clear our temporary list */
2566 list_for_each_entry_safe(entry, tmp, &bitmaps, list)
2567 list_del_init(&entry->list);
fa9c0d79 2568
4e69b598
JB
2569 if (!ret) {
2570 atomic_inc(&block_group->count);
2571 list_add_tail(&cluster->block_group_list,
2572 &block_group->cluster_list);
2573 cluster->block_group = block_group;
3f7de037
JB
2574 } else {
2575 trace_btrfs_failed_cluster_setup(block_group);
fa9c0d79 2576 }
fa9c0d79
CM
2577out:
2578 spin_unlock(&cluster->lock);
34d52cb6 2579 spin_unlock(&ctl->tree_lock);
fa9c0d79
CM
2580
2581 return ret;
2582}
2583
2584/*
2585 * simple code to zero out a cluster
2586 */
2587void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
2588{
2589 spin_lock_init(&cluster->lock);
2590 spin_lock_init(&cluster->refill_lock);
6bef4d31 2591 cluster->root = RB_ROOT;
fa9c0d79
CM
2592 cluster->max_size = 0;
2593 INIT_LIST_HEAD(&cluster->block_group_list);
2594 cluster->block_group = NULL;
2595}
2596
7fe1e641
LZ
2597static int do_trimming(struct btrfs_block_group_cache *block_group,
2598 u64 *total_trimmed, u64 start, u64 bytes,
2599 u64 reserved_start, u64 reserved_bytes)
f7039b1d 2600{
7fe1e641 2601 struct btrfs_space_info *space_info = block_group->space_info;
f7039b1d 2602 struct btrfs_fs_info *fs_info = block_group->fs_info;
7fe1e641
LZ
2603 int ret;
2604 int update = 0;
2605 u64 trimmed = 0;
f7039b1d 2606
7fe1e641
LZ
2607 spin_lock(&space_info->lock);
2608 spin_lock(&block_group->lock);
2609 if (!block_group->ro) {
2610 block_group->reserved += reserved_bytes;
2611 space_info->bytes_reserved += reserved_bytes;
2612 update = 1;
2613 }
2614 spin_unlock(&block_group->lock);
2615 spin_unlock(&space_info->lock);
2616
2617 ret = btrfs_error_discard_extent(fs_info->extent_root,
2618 start, bytes, &trimmed);
2619 if (!ret)
2620 *total_trimmed += trimmed;
2621
2622 btrfs_add_free_space(block_group, reserved_start, reserved_bytes);
2623
2624 if (update) {
2625 spin_lock(&space_info->lock);
2626 spin_lock(&block_group->lock);
2627 if (block_group->ro)
2628 space_info->bytes_readonly += reserved_bytes;
2629 block_group->reserved -= reserved_bytes;
2630 space_info->bytes_reserved -= reserved_bytes;
2631 spin_unlock(&space_info->lock);
2632 spin_unlock(&block_group->lock);
2633 }
2634
2635 return ret;
2636}
2637
2638static int trim_no_bitmap(struct btrfs_block_group_cache *block_group,
2639 u64 *total_trimmed, u64 start, u64 end, u64 minlen)
2640{
2641 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2642 struct btrfs_free_space *entry;
2643 struct rb_node *node;
2644 int ret = 0;
2645 u64 extent_start;
2646 u64 extent_bytes;
2647 u64 bytes;
f7039b1d
LD
2648
2649 while (start < end) {
34d52cb6 2650 spin_lock(&ctl->tree_lock);
f7039b1d 2651
34d52cb6
LZ
2652 if (ctl->free_space < minlen) {
2653 spin_unlock(&ctl->tree_lock);
f7039b1d
LD
2654 break;
2655 }
2656
34d52cb6 2657 entry = tree_search_offset(ctl, start, 0, 1);
7fe1e641 2658 if (!entry) {
34d52cb6 2659 spin_unlock(&ctl->tree_lock);
f7039b1d
LD
2660 break;
2661 }
2662
7fe1e641
LZ
2663 /* skip bitmaps */
2664 while (entry->bitmap) {
2665 node = rb_next(&entry->offset_index);
2666 if (!node) {
34d52cb6 2667 spin_unlock(&ctl->tree_lock);
7fe1e641 2668 goto out;
f7039b1d 2669 }
7fe1e641
LZ
2670 entry = rb_entry(node, struct btrfs_free_space,
2671 offset_index);
f7039b1d
LD
2672 }
2673
7fe1e641
LZ
2674 if (entry->offset >= end) {
2675 spin_unlock(&ctl->tree_lock);
2676 break;
f7039b1d
LD
2677 }
2678
7fe1e641
LZ
2679 extent_start = entry->offset;
2680 extent_bytes = entry->bytes;
2681 start = max(start, extent_start);
2682 bytes = min(extent_start + extent_bytes, end) - start;
2683 if (bytes < minlen) {
2684 spin_unlock(&ctl->tree_lock);
2685 goto next;
f7039b1d
LD
2686 }
2687
7fe1e641
LZ
2688 unlink_free_space(ctl, entry);
2689 kmem_cache_free(btrfs_free_space_cachep, entry);
2690
34d52cb6 2691 spin_unlock(&ctl->tree_lock);
f7039b1d 2692
7fe1e641
LZ
2693 ret = do_trimming(block_group, total_trimmed, start, bytes,
2694 extent_start, extent_bytes);
2695 if (ret)
2696 break;
2697next:
2698 start += bytes;
f7039b1d 2699
7fe1e641
LZ
2700 if (fatal_signal_pending(current)) {
2701 ret = -ERESTARTSYS;
2702 break;
2703 }
2704
2705 cond_resched();
2706 }
2707out:
2708 return ret;
2709}
2710
2711static int trim_bitmaps(struct btrfs_block_group_cache *block_group,
2712 u64 *total_trimmed, u64 start, u64 end, u64 minlen)
2713{
2714 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2715 struct btrfs_free_space *entry;
2716 int ret = 0;
2717 int ret2;
2718 u64 bytes;
2719 u64 offset = offset_to_bitmap(ctl, start);
2720
2721 while (offset < end) {
2722 bool next_bitmap = false;
2723
2724 spin_lock(&ctl->tree_lock);
2725
2726 if (ctl->free_space < minlen) {
2727 spin_unlock(&ctl->tree_lock);
2728 break;
2729 }
2730
2731 entry = tree_search_offset(ctl, offset, 1, 0);
2732 if (!entry) {
2733 spin_unlock(&ctl->tree_lock);
2734 next_bitmap = true;
2735 goto next;
2736 }
2737
2738 bytes = minlen;
2739 ret2 = search_bitmap(ctl, entry, &start, &bytes);
2740 if (ret2 || start >= end) {
2741 spin_unlock(&ctl->tree_lock);
2742 next_bitmap = true;
2743 goto next;
2744 }
2745
2746 bytes = min(bytes, end - start);
2747 if (bytes < minlen) {
2748 spin_unlock(&ctl->tree_lock);
2749 goto next;
2750 }
2751
2752 bitmap_clear_bits(ctl, entry, start, bytes);
2753 if (entry->bytes == 0)
2754 free_bitmap(ctl, entry);
2755
2756 spin_unlock(&ctl->tree_lock);
2757
2758 ret = do_trimming(block_group, total_trimmed, start, bytes,
2759 start, bytes);
2760 if (ret)
2761 break;
2762next:
2763 if (next_bitmap) {
2764 offset += BITS_PER_BITMAP * ctl->unit;
2765 } else {
2766 start += bytes;
2767 if (start >= offset + BITS_PER_BITMAP * ctl->unit)
2768 offset += BITS_PER_BITMAP * ctl->unit;
f7039b1d 2769 }
f7039b1d
LD
2770
2771 if (fatal_signal_pending(current)) {
2772 ret = -ERESTARTSYS;
2773 break;
2774 }
2775
2776 cond_resched();
2777 }
2778
2779 return ret;
2780}
581bb050 2781
7fe1e641
LZ
2782int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
2783 u64 *trimmed, u64 start, u64 end, u64 minlen)
2784{
2785 int ret;
2786
2787 *trimmed = 0;
2788
2789 ret = trim_no_bitmap(block_group, trimmed, start, end, minlen);
2790 if (ret)
2791 return ret;
2792
2793 ret = trim_bitmaps(block_group, trimmed, start, end, minlen);
2794
2795 return ret;
2796}
2797
581bb050
LZ
2798/*
2799 * Find the left-most item in the cache tree, and then return the
2800 * smallest inode number in the item.
2801 *
2802 * Note: the returned inode number may not be the smallest one in
2803 * the tree, if the left-most item is a bitmap.
2804 */
2805u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root)
2806{
2807 struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl;
2808 struct btrfs_free_space *entry = NULL;
2809 u64 ino = 0;
2810
2811 spin_lock(&ctl->tree_lock);
2812
2813 if (RB_EMPTY_ROOT(&ctl->free_space_offset))
2814 goto out;
2815
2816 entry = rb_entry(rb_first(&ctl->free_space_offset),
2817 struct btrfs_free_space, offset_index);
2818
2819 if (!entry->bitmap) {
2820 ino = entry->offset;
2821
2822 unlink_free_space(ctl, entry);
2823 entry->offset++;
2824 entry->bytes--;
2825 if (!entry->bytes)
2826 kmem_cache_free(btrfs_free_space_cachep, entry);
2827 else
2828 link_free_space(ctl, entry);
2829 } else {
2830 u64 offset = 0;
2831 u64 count = 1;
2832 int ret;
2833
2834 ret = search_bitmap(ctl, entry, &offset, &count);
79787eaa 2835 /* Logic error; Should be empty if it can't find anything */
581bb050
LZ
2836 BUG_ON(ret);
2837
2838 ino = offset;
2839 bitmap_clear_bits(ctl, entry, offset, 1);
2840 if (entry->bytes == 0)
2841 free_bitmap(ctl, entry);
2842 }
2843out:
2844 spin_unlock(&ctl->tree_lock);
2845
2846 return ino;
2847}
82d5902d
LZ
2848
2849struct inode *lookup_free_ino_inode(struct btrfs_root *root,
2850 struct btrfs_path *path)
2851{
2852 struct inode *inode = NULL;
2853
2854 spin_lock(&root->cache_lock);
2855 if (root->cache_inode)
2856 inode = igrab(root->cache_inode);
2857 spin_unlock(&root->cache_lock);
2858 if (inode)
2859 return inode;
2860
2861 inode = __lookup_free_space_inode(root, path, 0);
2862 if (IS_ERR(inode))
2863 return inode;
2864
2865 spin_lock(&root->cache_lock);
7841cb28 2866 if (!btrfs_fs_closing(root->fs_info))
82d5902d
LZ
2867 root->cache_inode = igrab(inode);
2868 spin_unlock(&root->cache_lock);
2869
2870 return inode;
2871}
2872
2873int create_free_ino_inode(struct btrfs_root *root,
2874 struct btrfs_trans_handle *trans,
2875 struct btrfs_path *path)
2876{
2877 return __create_free_space_inode(root, trans, path,
2878 BTRFS_FREE_INO_OBJECTID, 0);
2879}
2880
2881int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
2882{
2883 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
2884 struct btrfs_path *path;
2885 struct inode *inode;
2886 int ret = 0;
2887 u64 root_gen = btrfs_root_generation(&root->root_item);
2888
4b9465cb
CM
2889 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
2890 return 0;
2891
82d5902d
LZ
2892 /*
2893 * If we're unmounting then just return, since this does a search on the
2894 * normal root and not the commit root and we could deadlock.
2895 */
7841cb28 2896 if (btrfs_fs_closing(fs_info))
82d5902d
LZ
2897 return 0;
2898
2899 path = btrfs_alloc_path();
2900 if (!path)
2901 return 0;
2902
2903 inode = lookup_free_ino_inode(root, path);
2904 if (IS_ERR(inode))
2905 goto out;
2906
2907 if (root_gen != BTRFS_I(inode)->generation)
2908 goto out_put;
2909
2910 ret = __load_free_space_cache(root, inode, ctl, path, 0);
2911
2912 if (ret < 0)
2913 printk(KERN_ERR "btrfs: failed to load free ino cache for "
2914 "root %llu\n", root->root_key.objectid);
2915out_put:
2916 iput(inode);
2917out:
2918 btrfs_free_path(path);
2919 return ret;
2920}
2921
2922int btrfs_write_out_ino_cache(struct btrfs_root *root,
2923 struct btrfs_trans_handle *trans,
2924 struct btrfs_path *path)
2925{
2926 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
2927 struct inode *inode;
2928 int ret;
2929
4b9465cb
CM
2930 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
2931 return 0;
2932
82d5902d
LZ
2933 inode = lookup_free_ino_inode(root, path);
2934 if (IS_ERR(inode))
2935 return 0;
2936
2937 ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0);
c09544e0
JB
2938 if (ret) {
2939 btrfs_delalloc_release_metadata(inode, inode->i_size);
2940#ifdef DEBUG
82d5902d
LZ
2941 printk(KERN_ERR "btrfs: failed to write free ino cache "
2942 "for root %llu\n", root->root_key.objectid);
c09544e0
JB
2943#endif
2944 }
82d5902d
LZ
2945
2946 iput(inode);
2947 return ret;
2948}
This page took 0.3842 seconds and 5 git commands to generate.