2 * Copyright (C) STRATO AG 2011. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
20 * This module can be used to catch cases when the btrfs kernel
21 * code executes write requests to the disk that bring the file
22 * system in an inconsistent state. In such a state, a power-loss
23 * or kernel panic event would cause that the data on disk is
24 * lost or at least damaged.
26 * Code is added that examines all block write requests during
27 * runtime (including writes of the super block). Three rules
28 * are verified and an error is printed on violation of the
30 * 1. It is not allowed to write a disk block which is
31 * currently referenced by the super block (either directly
33 * 2. When a super block is written, it is verified that all
34 * referenced (directly or indirectly) blocks fulfill the
35 * following requirements:
36 * 2a. All referenced blocks have either been present when
37 * the file system was mounted, (i.e., they have been
38 * referenced by the super block) or they have been
39 * written since then and the write completion callback
40 * was called and no write error was indicated and a
41 * FLUSH request to the device where these blocks are
42 * located was received and completed.
43 * 2b. All referenced blocks need to have a generation
44 * number which is equal to the parent's number.
46 * One issue that was found using this module was that the log
47 * tree on disk became temporarily corrupted because disk blocks
48 * that had been in use for the log tree had been freed and
49 * reused too early, while being referenced by the written super
52 * The search term in the kernel log that can be used to filter
53 * on the existence of detected integrity issues is
56 * The integrity check is enabled via mount options. These
57 * mount options are only supported if the integrity check
58 * tool is compiled by defining BTRFS_FS_CHECK_INTEGRITY.
60 * Example #1, apply integrity checks to all metadata:
61 * mount /dev/sdb1 /mnt -o check_int
63 * Example #2, apply integrity checks to all metadata and
65 * mount /dev/sdb1 /mnt -o check_int_data
67 * Example #3, apply integrity checks to all metadata and dump
68 * the tree that the super block references to kernel messages
69 * each time after a super block was written:
70 * mount /dev/sdb1 /mnt -o check_int,check_int_print_mask=263
72 * If the integrity check tool is included and activated in
73 * the mount options, plenty of kernel memory is used, and
74 * plenty of additional CPU cycles are spent. Enabling this
75 * functionality is not intended for normal use. In most
76 * cases, unless you are a btrfs developer who needs to verify
77 * the integrity of (super)-block write requests, do not
78 * enable the config option BTRFS_FS_CHECK_INTEGRITY to
79 * include and compile the integrity check tool.
81 * Expect millions of lines of information in the kernel log with an
82 * enabled check_int_print_mask. Therefore set LOG_BUF_SHIFT in the
83 * kernel config to at least 26 (which is 64MB). Usually the value is
84 * limited to 21 (which is 2MB) in init/Kconfig. The file needs to be
85 * changed like this before LOG_BUF_SHIFT can be set to a high value:
86 * config LOG_BUF_SHIFT
87 * int "Kernel log buffer size (16 => 64KB, 17 => 128KB)"
91 #include <linux/sched.h>
92 #include <linux/slab.h>
93 #include <linux/buffer_head.h>
94 #include <linux/mutex.h>
95 #include <linux/genhd.h>
96 #include <linux/blkdev.h>
97 #include <linux/vmalloc.h>
101 #include "transaction.h"
102 #include "extent_io.h"
104 #include "print-tree.h"
106 #include "check-integrity.h"
107 #include "rcu-string.h"
109 #define BTRFSIC_BLOCK_HASHTABLE_SIZE 0x10000
110 #define BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE 0x10000
111 #define BTRFSIC_DEV2STATE_HASHTABLE_SIZE 0x100
112 #define BTRFSIC_BLOCK_MAGIC_NUMBER 0x14491051
113 #define BTRFSIC_BLOCK_LINK_MAGIC_NUMBER 0x11070807
114 #define BTRFSIC_DEV2STATE_MAGIC_NUMBER 0x20111530
115 #define BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER 20111300
116 #define BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL (200 - 6) /* in characters,
117 * excluding " [...]" */
118 #define BTRFSIC_GENERATION_UNKNOWN ((u64)-1)
121 * The definition of the bitmask fields for the print_mask.
122 * They are specified with the mount option check_integrity_print_mask.
124 #define BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE 0x00000001
125 #define BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION 0x00000002
126 #define BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE 0x00000004
127 #define BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE 0x00000008
128 #define BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH 0x00000010
129 #define BTRFSIC_PRINT_MASK_END_IO_BIO_BH 0x00000020
130 #define BTRFSIC_PRINT_MASK_VERBOSE 0x00000040
131 #define BTRFSIC_PRINT_MASK_VERY_VERBOSE 0x00000080
132 #define BTRFSIC_PRINT_MASK_INITIAL_TREE 0x00000100
133 #define BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES 0x00000200
134 #define BTRFSIC_PRINT_MASK_INITIAL_DATABASE 0x00000400
135 #define BTRFSIC_PRINT_MASK_NUM_COPIES 0x00000800
136 #define BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS 0x00001000
137 #define BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH_VERBOSE 0x00002000
139 struct btrfsic_dev_state
;
140 struct btrfsic_state
;
142 struct btrfsic_block
{
143 u32 magic_num
; /* only used for debug purposes */
144 unsigned int is_metadata
:1; /* if it is meta-data, not data-data */
145 unsigned int is_superblock
:1; /* if it is one of the superblocks */
146 unsigned int is_iodone
:1; /* if is done by lower subsystem */
147 unsigned int iodone_w_error
:1; /* error was indicated to endio */
148 unsigned int never_written
:1; /* block was added because it was
149 * referenced, not because it was
151 unsigned int mirror_num
; /* large enough to hold
152 * BTRFS_SUPER_MIRROR_MAX */
153 struct btrfsic_dev_state
*dev_state
;
154 u64 dev_bytenr
; /* key, physical byte num on disk */
155 u64 logical_bytenr
; /* logical byte num on disk */
157 struct btrfs_disk_key disk_key
; /* extra info to print in case of
158 * issues, will not always be correct */
159 struct list_head collision_resolving_node
; /* list node */
160 struct list_head all_blocks_node
; /* list node */
162 /* the following two lists contain block_link items */
163 struct list_head ref_to_list
; /* list */
164 struct list_head ref_from_list
; /* list */
165 struct btrfsic_block
*next_in_same_bio
;
166 void *orig_bio_bh_private
;
170 } orig_bio_bh_end_io
;
171 int submit_bio_bh_rw
;
172 u64 flush_gen
; /* only valid if !never_written */
176 * Elements of this type are allocated dynamically and required because
177 * each block object can refer to and can be ref from multiple blocks.
178 * The key to lookup them in the hashtable is the dev_bytenr of
179 * the block ref to plus the one from the block refered from.
180 * The fact that they are searchable via a hashtable and that a
181 * ref_cnt is maintained is not required for the btrfs integrity
182 * check algorithm itself, it is only used to make the output more
183 * beautiful in case that an error is detected (an error is defined
184 * as a write operation to a block while that block is still referenced).
186 struct btrfsic_block_link
{
187 u32 magic_num
; /* only used for debug purposes */
189 struct list_head node_ref_to
; /* list node */
190 struct list_head node_ref_from
; /* list node */
191 struct list_head collision_resolving_node
; /* list node */
192 struct btrfsic_block
*block_ref_to
;
193 struct btrfsic_block
*block_ref_from
;
194 u64 parent_generation
;
197 struct btrfsic_dev_state
{
198 u32 magic_num
; /* only used for debug purposes */
199 struct block_device
*bdev
;
200 struct btrfsic_state
*state
;
201 struct list_head collision_resolving_node
; /* list node */
202 struct btrfsic_block dummy_block_for_bio_bh_flush
;
204 char name
[BDEVNAME_SIZE
];
207 struct btrfsic_block_hashtable
{
208 struct list_head table
[BTRFSIC_BLOCK_HASHTABLE_SIZE
];
211 struct btrfsic_block_link_hashtable
{
212 struct list_head table
[BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE
];
215 struct btrfsic_dev_state_hashtable
{
216 struct list_head table
[BTRFSIC_DEV2STATE_HASHTABLE_SIZE
];
219 struct btrfsic_block_data_ctx
{
220 u64 start
; /* virtual bytenr */
221 u64 dev_bytenr
; /* physical bytenr on device */
223 struct btrfsic_dev_state
*dev
;
229 /* This structure is used to implement recursion without occupying
230 * any stack space, refer to btrfsic_process_metablock() */
231 struct btrfsic_stack_frame
{
239 struct btrfsic_block
*block
;
240 struct btrfsic_block_data_ctx
*block_ctx
;
241 struct btrfsic_block
*next_block
;
242 struct btrfsic_block_data_ctx next_block_ctx
;
243 struct btrfs_header
*hdr
;
244 struct btrfsic_stack_frame
*prev
;
247 /* Some state per mounted filesystem */
248 struct btrfsic_state
{
250 int include_extent_data
;
252 struct list_head all_blocks_list
;
253 struct btrfsic_block_hashtable block_hashtable
;
254 struct btrfsic_block_link_hashtable block_link_hashtable
;
255 struct btrfs_root
*root
;
256 u64 max_superblock_generation
;
257 struct btrfsic_block
*latest_superblock
;
262 static void btrfsic_block_init(struct btrfsic_block
*b
);
263 static struct btrfsic_block
*btrfsic_block_alloc(void);
264 static void btrfsic_block_free(struct btrfsic_block
*b
);
265 static void btrfsic_block_link_init(struct btrfsic_block_link
*n
);
266 static struct btrfsic_block_link
*btrfsic_block_link_alloc(void);
267 static void btrfsic_block_link_free(struct btrfsic_block_link
*n
);
268 static void btrfsic_dev_state_init(struct btrfsic_dev_state
*ds
);
269 static struct btrfsic_dev_state
*btrfsic_dev_state_alloc(void);
270 static void btrfsic_dev_state_free(struct btrfsic_dev_state
*ds
);
271 static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable
*h
);
272 static void btrfsic_block_hashtable_add(struct btrfsic_block
*b
,
273 struct btrfsic_block_hashtable
*h
);
274 static void btrfsic_block_hashtable_remove(struct btrfsic_block
*b
);
275 static struct btrfsic_block
*btrfsic_block_hashtable_lookup(
276 struct block_device
*bdev
,
278 struct btrfsic_block_hashtable
*h
);
279 static void btrfsic_block_link_hashtable_init(
280 struct btrfsic_block_link_hashtable
*h
);
281 static void btrfsic_block_link_hashtable_add(
282 struct btrfsic_block_link
*l
,
283 struct btrfsic_block_link_hashtable
*h
);
284 static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link
*l
);
285 static struct btrfsic_block_link
*btrfsic_block_link_hashtable_lookup(
286 struct block_device
*bdev_ref_to
,
287 u64 dev_bytenr_ref_to
,
288 struct block_device
*bdev_ref_from
,
289 u64 dev_bytenr_ref_from
,
290 struct btrfsic_block_link_hashtable
*h
);
291 static void btrfsic_dev_state_hashtable_init(
292 struct btrfsic_dev_state_hashtable
*h
);
293 static void btrfsic_dev_state_hashtable_add(
294 struct btrfsic_dev_state
*ds
,
295 struct btrfsic_dev_state_hashtable
*h
);
296 static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state
*ds
);
297 static struct btrfsic_dev_state
*btrfsic_dev_state_hashtable_lookup(
298 struct block_device
*bdev
,
299 struct btrfsic_dev_state_hashtable
*h
);
300 static struct btrfsic_stack_frame
*btrfsic_stack_frame_alloc(void);
301 static void btrfsic_stack_frame_free(struct btrfsic_stack_frame
*sf
);
302 static int btrfsic_process_superblock(struct btrfsic_state
*state
,
303 struct btrfs_fs_devices
*fs_devices
);
304 static int btrfsic_process_metablock(struct btrfsic_state
*state
,
305 struct btrfsic_block
*block
,
306 struct btrfsic_block_data_ctx
*block_ctx
,
307 int limit_nesting
, int force_iodone_flag
);
308 static void btrfsic_read_from_block_data(
309 struct btrfsic_block_data_ctx
*block_ctx
,
310 void *dst
, u32 offset
, size_t len
);
311 static int btrfsic_create_link_to_next_block(
312 struct btrfsic_state
*state
,
313 struct btrfsic_block
*block
,
314 struct btrfsic_block_data_ctx
315 *block_ctx
, u64 next_bytenr
,
317 struct btrfsic_block_data_ctx
*next_block_ctx
,
318 struct btrfsic_block
**next_blockp
,
319 int force_iodone_flag
,
320 int *num_copiesp
, int *mirror_nump
,
321 struct btrfs_disk_key
*disk_key
,
322 u64 parent_generation
);
323 static int btrfsic_handle_extent_data(struct btrfsic_state
*state
,
324 struct btrfsic_block
*block
,
325 struct btrfsic_block_data_ctx
*block_ctx
,
326 u32 item_offset
, int force_iodone_flag
);
327 static int btrfsic_map_block(struct btrfsic_state
*state
, u64 bytenr
, u32 len
,
328 struct btrfsic_block_data_ctx
*block_ctx_out
,
330 static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx
*block_ctx
);
331 static int btrfsic_read_block(struct btrfsic_state
*state
,
332 struct btrfsic_block_data_ctx
*block_ctx
);
333 static void btrfsic_dump_database(struct btrfsic_state
*state
);
334 static int btrfsic_test_for_metadata(struct btrfsic_state
*state
,
335 char **datav
, unsigned int num_pages
);
336 static void btrfsic_process_written_block(struct btrfsic_dev_state
*dev_state
,
337 u64 dev_bytenr
, char **mapped_datav
,
338 unsigned int num_pages
,
339 struct bio
*bio
, int *bio_is_patched
,
340 struct buffer_head
*bh
,
341 int submit_bio_bh_rw
);
342 static int btrfsic_process_written_superblock(
343 struct btrfsic_state
*state
,
344 struct btrfsic_block
*const block
,
345 struct btrfs_super_block
*const super_hdr
);
346 static void btrfsic_bio_end_io(struct bio
*bp
);
347 static void btrfsic_bh_end_io(struct buffer_head
*bh
, int uptodate
);
348 static int btrfsic_is_block_ref_by_superblock(const struct btrfsic_state
*state
,
349 const struct btrfsic_block
*block
,
350 int recursion_level
);
351 static int btrfsic_check_all_ref_blocks(struct btrfsic_state
*state
,
352 struct btrfsic_block
*const block
,
353 int recursion_level
);
354 static void btrfsic_print_add_link(const struct btrfsic_state
*state
,
355 const struct btrfsic_block_link
*l
);
356 static void btrfsic_print_rem_link(const struct btrfsic_state
*state
,
357 const struct btrfsic_block_link
*l
);
358 static char btrfsic_get_block_type(const struct btrfsic_state
*state
,
359 const struct btrfsic_block
*block
);
360 static void btrfsic_dump_tree(const struct btrfsic_state
*state
);
361 static void btrfsic_dump_tree_sub(const struct btrfsic_state
*state
,
362 const struct btrfsic_block
*block
,
364 static struct btrfsic_block_link
*btrfsic_block_link_lookup_or_add(
365 struct btrfsic_state
*state
,
366 struct btrfsic_block_data_ctx
*next_block_ctx
,
367 struct btrfsic_block
*next_block
,
368 struct btrfsic_block
*from_block
,
369 u64 parent_generation
);
370 static struct btrfsic_block
*btrfsic_block_lookup_or_add(
371 struct btrfsic_state
*state
,
372 struct btrfsic_block_data_ctx
*block_ctx
,
373 const char *additional_string
,
379 static int btrfsic_process_superblock_dev_mirror(
380 struct btrfsic_state
*state
,
381 struct btrfsic_dev_state
*dev_state
,
382 struct btrfs_device
*device
,
383 int superblock_mirror_num
,
384 struct btrfsic_dev_state
**selected_dev_state
,
385 struct btrfs_super_block
*selected_super
);
386 static struct btrfsic_dev_state
*btrfsic_dev_state_lookup(
387 struct block_device
*bdev
);
388 static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state
*state
,
390 struct btrfsic_dev_state
*dev_state
,
393 static struct mutex btrfsic_mutex
;
394 static int btrfsic_is_initialized
;
395 static struct btrfsic_dev_state_hashtable btrfsic_dev_state_hashtable
;
398 static void btrfsic_block_init(struct btrfsic_block
*b
)
400 b
->magic_num
= BTRFSIC_BLOCK_MAGIC_NUMBER
;
403 b
->logical_bytenr
= 0;
404 b
->generation
= BTRFSIC_GENERATION_UNKNOWN
;
405 b
->disk_key
.objectid
= 0;
406 b
->disk_key
.type
= 0;
407 b
->disk_key
.offset
= 0;
409 b
->is_superblock
= 0;
411 b
->iodone_w_error
= 0;
412 b
->never_written
= 0;
414 b
->next_in_same_bio
= NULL
;
415 b
->orig_bio_bh_private
= NULL
;
416 b
->orig_bio_bh_end_io
.bio
= NULL
;
417 INIT_LIST_HEAD(&b
->collision_resolving_node
);
418 INIT_LIST_HEAD(&b
->all_blocks_node
);
419 INIT_LIST_HEAD(&b
->ref_to_list
);
420 INIT_LIST_HEAD(&b
->ref_from_list
);
421 b
->submit_bio_bh_rw
= 0;
425 static struct btrfsic_block
*btrfsic_block_alloc(void)
427 struct btrfsic_block
*b
;
429 b
= kzalloc(sizeof(*b
), GFP_NOFS
);
431 btrfsic_block_init(b
);
436 static void btrfsic_block_free(struct btrfsic_block
*b
)
438 BUG_ON(!(NULL
== b
|| BTRFSIC_BLOCK_MAGIC_NUMBER
== b
->magic_num
));
442 static void btrfsic_block_link_init(struct btrfsic_block_link
*l
)
444 l
->magic_num
= BTRFSIC_BLOCK_LINK_MAGIC_NUMBER
;
446 INIT_LIST_HEAD(&l
->node_ref_to
);
447 INIT_LIST_HEAD(&l
->node_ref_from
);
448 INIT_LIST_HEAD(&l
->collision_resolving_node
);
449 l
->block_ref_to
= NULL
;
450 l
->block_ref_from
= NULL
;
453 static struct btrfsic_block_link
*btrfsic_block_link_alloc(void)
455 struct btrfsic_block_link
*l
;
457 l
= kzalloc(sizeof(*l
), GFP_NOFS
);
459 btrfsic_block_link_init(l
);
464 static void btrfsic_block_link_free(struct btrfsic_block_link
*l
)
466 BUG_ON(!(NULL
== l
|| BTRFSIC_BLOCK_LINK_MAGIC_NUMBER
== l
->magic_num
));
470 static void btrfsic_dev_state_init(struct btrfsic_dev_state
*ds
)
472 ds
->magic_num
= BTRFSIC_DEV2STATE_MAGIC_NUMBER
;
476 INIT_LIST_HEAD(&ds
->collision_resolving_node
);
477 ds
->last_flush_gen
= 0;
478 btrfsic_block_init(&ds
->dummy_block_for_bio_bh_flush
);
479 ds
->dummy_block_for_bio_bh_flush
.is_iodone
= 1;
480 ds
->dummy_block_for_bio_bh_flush
.dev_state
= ds
;
483 static struct btrfsic_dev_state
*btrfsic_dev_state_alloc(void)
485 struct btrfsic_dev_state
*ds
;
487 ds
= kzalloc(sizeof(*ds
), GFP_NOFS
);
489 btrfsic_dev_state_init(ds
);
494 static void btrfsic_dev_state_free(struct btrfsic_dev_state
*ds
)
496 BUG_ON(!(NULL
== ds
||
497 BTRFSIC_DEV2STATE_MAGIC_NUMBER
== ds
->magic_num
));
501 static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable
*h
)
505 for (i
= 0; i
< BTRFSIC_BLOCK_HASHTABLE_SIZE
; i
++)
506 INIT_LIST_HEAD(h
->table
+ i
);
509 static void btrfsic_block_hashtable_add(struct btrfsic_block
*b
,
510 struct btrfsic_block_hashtable
*h
)
512 const unsigned int hashval
=
513 (((unsigned int)(b
->dev_bytenr
>> 16)) ^
514 ((unsigned int)((uintptr_t)b
->dev_state
->bdev
))) &
515 (BTRFSIC_BLOCK_HASHTABLE_SIZE
- 1);
517 list_add(&b
->collision_resolving_node
, h
->table
+ hashval
);
520 static void btrfsic_block_hashtable_remove(struct btrfsic_block
*b
)
522 list_del(&b
->collision_resolving_node
);
525 static struct btrfsic_block
*btrfsic_block_hashtable_lookup(
526 struct block_device
*bdev
,
528 struct btrfsic_block_hashtable
*h
)
530 const unsigned int hashval
=
531 (((unsigned int)(dev_bytenr
>> 16)) ^
532 ((unsigned int)((uintptr_t)bdev
))) &
533 (BTRFSIC_BLOCK_HASHTABLE_SIZE
- 1);
534 struct btrfsic_block
*b
;
536 list_for_each_entry(b
, h
->table
+ hashval
, collision_resolving_node
) {
537 if (b
->dev_state
->bdev
== bdev
&& b
->dev_bytenr
== dev_bytenr
)
544 static void btrfsic_block_link_hashtable_init(
545 struct btrfsic_block_link_hashtable
*h
)
549 for (i
= 0; i
< BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE
; i
++)
550 INIT_LIST_HEAD(h
->table
+ i
);
553 static void btrfsic_block_link_hashtable_add(
554 struct btrfsic_block_link
*l
,
555 struct btrfsic_block_link_hashtable
*h
)
557 const unsigned int hashval
=
558 (((unsigned int)(l
->block_ref_to
->dev_bytenr
>> 16)) ^
559 ((unsigned int)(l
->block_ref_from
->dev_bytenr
>> 16)) ^
560 ((unsigned int)((uintptr_t)l
->block_ref_to
->dev_state
->bdev
)) ^
561 ((unsigned int)((uintptr_t)l
->block_ref_from
->dev_state
->bdev
)))
562 & (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE
- 1);
564 BUG_ON(NULL
== l
->block_ref_to
);
565 BUG_ON(NULL
== l
->block_ref_from
);
566 list_add(&l
->collision_resolving_node
, h
->table
+ hashval
);
569 static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link
*l
)
571 list_del(&l
->collision_resolving_node
);
574 static struct btrfsic_block_link
*btrfsic_block_link_hashtable_lookup(
575 struct block_device
*bdev_ref_to
,
576 u64 dev_bytenr_ref_to
,
577 struct block_device
*bdev_ref_from
,
578 u64 dev_bytenr_ref_from
,
579 struct btrfsic_block_link_hashtable
*h
)
581 const unsigned int hashval
=
582 (((unsigned int)(dev_bytenr_ref_to
>> 16)) ^
583 ((unsigned int)(dev_bytenr_ref_from
>> 16)) ^
584 ((unsigned int)((uintptr_t)bdev_ref_to
)) ^
585 ((unsigned int)((uintptr_t)bdev_ref_from
))) &
586 (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE
- 1);
587 struct btrfsic_block_link
*l
;
589 list_for_each_entry(l
, h
->table
+ hashval
, collision_resolving_node
) {
590 BUG_ON(NULL
== l
->block_ref_to
);
591 BUG_ON(NULL
== l
->block_ref_from
);
592 if (l
->block_ref_to
->dev_state
->bdev
== bdev_ref_to
&&
593 l
->block_ref_to
->dev_bytenr
== dev_bytenr_ref_to
&&
594 l
->block_ref_from
->dev_state
->bdev
== bdev_ref_from
&&
595 l
->block_ref_from
->dev_bytenr
== dev_bytenr_ref_from
)
602 static void btrfsic_dev_state_hashtable_init(
603 struct btrfsic_dev_state_hashtable
*h
)
607 for (i
= 0; i
< BTRFSIC_DEV2STATE_HASHTABLE_SIZE
; i
++)
608 INIT_LIST_HEAD(h
->table
+ i
);
611 static void btrfsic_dev_state_hashtable_add(
612 struct btrfsic_dev_state
*ds
,
613 struct btrfsic_dev_state_hashtable
*h
)
615 const unsigned int hashval
=
616 (((unsigned int)((uintptr_t)ds
->bdev
)) &
617 (BTRFSIC_DEV2STATE_HASHTABLE_SIZE
- 1));
619 list_add(&ds
->collision_resolving_node
, h
->table
+ hashval
);
622 static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state
*ds
)
624 list_del(&ds
->collision_resolving_node
);
627 static struct btrfsic_dev_state
*btrfsic_dev_state_hashtable_lookup(
628 struct block_device
*bdev
,
629 struct btrfsic_dev_state_hashtable
*h
)
631 const unsigned int hashval
=
632 (((unsigned int)((uintptr_t)bdev
)) &
633 (BTRFSIC_DEV2STATE_HASHTABLE_SIZE
- 1));
634 struct btrfsic_dev_state
*ds
;
636 list_for_each_entry(ds
, h
->table
+ hashval
, collision_resolving_node
) {
637 if (ds
->bdev
== bdev
)
644 static int btrfsic_process_superblock(struct btrfsic_state
*state
,
645 struct btrfs_fs_devices
*fs_devices
)
648 struct btrfs_super_block
*selected_super
;
649 struct list_head
*dev_head
= &fs_devices
->devices
;
650 struct btrfs_device
*device
;
651 struct btrfsic_dev_state
*selected_dev_state
= NULL
;
654 BUG_ON(NULL
== state
);
655 selected_super
= kzalloc(sizeof(*selected_super
), GFP_NOFS
);
656 if (NULL
== selected_super
) {
657 printk(KERN_INFO
"btrfsic: error, kmalloc failed!\n");
661 list_for_each_entry(device
, dev_head
, dev_list
) {
663 struct btrfsic_dev_state
*dev_state
;
665 if (!device
->bdev
|| !device
->name
)
668 dev_state
= btrfsic_dev_state_lookup(device
->bdev
);
669 BUG_ON(NULL
== dev_state
);
670 for (i
= 0; i
< BTRFS_SUPER_MIRROR_MAX
; i
++) {
671 ret
= btrfsic_process_superblock_dev_mirror(
672 state
, dev_state
, device
, i
,
673 &selected_dev_state
, selected_super
);
674 if (0 != ret
&& 0 == i
) {
675 kfree(selected_super
);
681 if (NULL
== state
->latest_superblock
) {
682 printk(KERN_INFO
"btrfsic: no superblock found!\n");
683 kfree(selected_super
);
687 state
->csum_size
= btrfs_super_csum_size(selected_super
);
689 for (pass
= 0; pass
< 3; pass
++) {
696 next_bytenr
= btrfs_super_root(selected_super
);
697 if (state
->print_mask
&
698 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION
)
699 printk(KERN_INFO
"root@%llu\n", next_bytenr
);
702 next_bytenr
= btrfs_super_chunk_root(selected_super
);
703 if (state
->print_mask
&
704 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION
)
705 printk(KERN_INFO
"chunk@%llu\n", next_bytenr
);
708 next_bytenr
= btrfs_super_log_root(selected_super
);
709 if (0 == next_bytenr
)
711 if (state
->print_mask
&
712 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION
)
713 printk(KERN_INFO
"log@%llu\n", next_bytenr
);
718 btrfs_num_copies(state
->root
->fs_info
,
719 next_bytenr
, state
->metablock_size
);
720 if (state
->print_mask
& BTRFSIC_PRINT_MASK_NUM_COPIES
)
721 printk(KERN_INFO
"num_copies(log_bytenr=%llu) = %d\n",
722 next_bytenr
, num_copies
);
724 for (mirror_num
= 1; mirror_num
<= num_copies
; mirror_num
++) {
725 struct btrfsic_block
*next_block
;
726 struct btrfsic_block_data_ctx tmp_next_block_ctx
;
727 struct btrfsic_block_link
*l
;
729 ret
= btrfsic_map_block(state
, next_bytenr
,
730 state
->metablock_size
,
734 printk(KERN_INFO
"btrfsic:"
735 " btrfsic_map_block(root @%llu,"
736 " mirror %d) failed!\n",
737 next_bytenr
, mirror_num
);
738 kfree(selected_super
);
742 next_block
= btrfsic_block_hashtable_lookup(
743 tmp_next_block_ctx
.dev
->bdev
,
744 tmp_next_block_ctx
.dev_bytenr
,
745 &state
->block_hashtable
);
746 BUG_ON(NULL
== next_block
);
748 l
= btrfsic_block_link_hashtable_lookup(
749 tmp_next_block_ctx
.dev
->bdev
,
750 tmp_next_block_ctx
.dev_bytenr
,
751 state
->latest_superblock
->dev_state
->
753 state
->latest_superblock
->dev_bytenr
,
754 &state
->block_link_hashtable
);
757 ret
= btrfsic_read_block(state
, &tmp_next_block_ctx
);
758 if (ret
< (int)PAGE_CACHE_SIZE
) {
760 "btrfsic: read @logical %llu failed!\n",
761 tmp_next_block_ctx
.start
);
762 btrfsic_release_block_ctx(&tmp_next_block_ctx
);
763 kfree(selected_super
);
767 ret
= btrfsic_process_metablock(state
,
770 BTRFS_MAX_LEVEL
+ 3, 1);
771 btrfsic_release_block_ctx(&tmp_next_block_ctx
);
775 kfree(selected_super
);
779 static int btrfsic_process_superblock_dev_mirror(
780 struct btrfsic_state
*state
,
781 struct btrfsic_dev_state
*dev_state
,
782 struct btrfs_device
*device
,
783 int superblock_mirror_num
,
784 struct btrfsic_dev_state
**selected_dev_state
,
785 struct btrfs_super_block
*selected_super
)
787 struct btrfs_super_block
*super_tmp
;
789 struct buffer_head
*bh
;
790 struct btrfsic_block
*superblock_tmp
;
792 struct block_device
*const superblock_bdev
= device
->bdev
;
794 /* super block bytenr is always the unmapped device bytenr */
795 dev_bytenr
= btrfs_sb_offset(superblock_mirror_num
);
796 if (dev_bytenr
+ BTRFS_SUPER_INFO_SIZE
> device
->commit_total_bytes
)
798 bh
= __bread(superblock_bdev
, dev_bytenr
/ 4096,
799 BTRFS_SUPER_INFO_SIZE
);
802 super_tmp
= (struct btrfs_super_block
*)
803 (bh
->b_data
+ (dev_bytenr
& 4095));
805 if (btrfs_super_bytenr(super_tmp
) != dev_bytenr
||
806 btrfs_super_magic(super_tmp
) != BTRFS_MAGIC
||
807 memcmp(device
->uuid
, super_tmp
->dev_item
.uuid
, BTRFS_UUID_SIZE
) ||
808 btrfs_super_nodesize(super_tmp
) != state
->metablock_size
||
809 btrfs_super_sectorsize(super_tmp
) != state
->datablock_size
) {
815 btrfsic_block_hashtable_lookup(superblock_bdev
,
817 &state
->block_hashtable
);
818 if (NULL
== superblock_tmp
) {
819 superblock_tmp
= btrfsic_block_alloc();
820 if (NULL
== superblock_tmp
) {
821 printk(KERN_INFO
"btrfsic: error, kmalloc failed!\n");
825 /* for superblock, only the dev_bytenr makes sense */
826 superblock_tmp
->dev_bytenr
= dev_bytenr
;
827 superblock_tmp
->dev_state
= dev_state
;
828 superblock_tmp
->logical_bytenr
= dev_bytenr
;
829 superblock_tmp
->generation
= btrfs_super_generation(super_tmp
);
830 superblock_tmp
->is_metadata
= 1;
831 superblock_tmp
->is_superblock
= 1;
832 superblock_tmp
->is_iodone
= 1;
833 superblock_tmp
->never_written
= 0;
834 superblock_tmp
->mirror_num
= 1 + superblock_mirror_num
;
835 if (state
->print_mask
& BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE
)
836 btrfs_info_in_rcu(device
->dev_root
->fs_info
,
837 "new initial S-block (bdev %p, %s) @%llu (%s/%llu/%d)",
839 rcu_str_deref(device
->name
), dev_bytenr
,
840 dev_state
->name
, dev_bytenr
,
841 superblock_mirror_num
);
842 list_add(&superblock_tmp
->all_blocks_node
,
843 &state
->all_blocks_list
);
844 btrfsic_block_hashtable_add(superblock_tmp
,
845 &state
->block_hashtable
);
848 /* select the one with the highest generation field */
849 if (btrfs_super_generation(super_tmp
) >
850 state
->max_superblock_generation
||
851 0 == state
->max_superblock_generation
) {
852 memcpy(selected_super
, super_tmp
, sizeof(*selected_super
));
853 *selected_dev_state
= dev_state
;
854 state
->max_superblock_generation
=
855 btrfs_super_generation(super_tmp
);
856 state
->latest_superblock
= superblock_tmp
;
859 for (pass
= 0; pass
< 3; pass
++) {
863 const char *additional_string
= NULL
;
864 struct btrfs_disk_key tmp_disk_key
;
866 tmp_disk_key
.type
= BTRFS_ROOT_ITEM_KEY
;
867 tmp_disk_key
.offset
= 0;
870 btrfs_set_disk_key_objectid(&tmp_disk_key
,
871 BTRFS_ROOT_TREE_OBJECTID
);
872 additional_string
= "initial root ";
873 next_bytenr
= btrfs_super_root(super_tmp
);
876 btrfs_set_disk_key_objectid(&tmp_disk_key
,
877 BTRFS_CHUNK_TREE_OBJECTID
);
878 additional_string
= "initial chunk ";
879 next_bytenr
= btrfs_super_chunk_root(super_tmp
);
882 btrfs_set_disk_key_objectid(&tmp_disk_key
,
883 BTRFS_TREE_LOG_OBJECTID
);
884 additional_string
= "initial log ";
885 next_bytenr
= btrfs_super_log_root(super_tmp
);
886 if (0 == next_bytenr
)
892 btrfs_num_copies(state
->root
->fs_info
,
893 next_bytenr
, state
->metablock_size
);
894 if (state
->print_mask
& BTRFSIC_PRINT_MASK_NUM_COPIES
)
895 printk(KERN_INFO
"num_copies(log_bytenr=%llu) = %d\n",
896 next_bytenr
, num_copies
);
897 for (mirror_num
= 1; mirror_num
<= num_copies
; mirror_num
++) {
898 struct btrfsic_block
*next_block
;
899 struct btrfsic_block_data_ctx tmp_next_block_ctx
;
900 struct btrfsic_block_link
*l
;
902 if (btrfsic_map_block(state
, next_bytenr
,
903 state
->metablock_size
,
906 printk(KERN_INFO
"btrfsic: btrfsic_map_block("
907 "bytenr @%llu, mirror %d) failed!\n",
908 next_bytenr
, mirror_num
);
913 next_block
= btrfsic_block_lookup_or_add(
914 state
, &tmp_next_block_ctx
,
915 additional_string
, 1, 1, 0,
917 if (NULL
== next_block
) {
918 btrfsic_release_block_ctx(&tmp_next_block_ctx
);
923 next_block
->disk_key
= tmp_disk_key
;
924 next_block
->generation
= BTRFSIC_GENERATION_UNKNOWN
;
925 l
= btrfsic_block_link_lookup_or_add(
926 state
, &tmp_next_block_ctx
,
927 next_block
, superblock_tmp
,
928 BTRFSIC_GENERATION_UNKNOWN
);
929 btrfsic_release_block_ctx(&tmp_next_block_ctx
);
936 if (state
->print_mask
& BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES
)
937 btrfsic_dump_tree_sub(state
, superblock_tmp
, 0);
943 static struct btrfsic_stack_frame
*btrfsic_stack_frame_alloc(void)
945 struct btrfsic_stack_frame
*sf
;
947 sf
= kzalloc(sizeof(*sf
), GFP_NOFS
);
949 printk(KERN_INFO
"btrfsic: alloc memory failed!\n");
951 sf
->magic
= BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER
;
955 static void btrfsic_stack_frame_free(struct btrfsic_stack_frame
*sf
)
957 BUG_ON(!(NULL
== sf
||
958 BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER
== sf
->magic
));
962 static int btrfsic_process_metablock(
963 struct btrfsic_state
*state
,
964 struct btrfsic_block
*const first_block
,
965 struct btrfsic_block_data_ctx
*const first_block_ctx
,
966 int first_limit_nesting
, int force_iodone_flag
)
968 struct btrfsic_stack_frame initial_stack_frame
= { 0 };
969 struct btrfsic_stack_frame
*sf
;
970 struct btrfsic_stack_frame
*next_stack
;
971 struct btrfs_header
*const first_hdr
=
972 (struct btrfs_header
*)first_block_ctx
->datav
[0];
975 sf
= &initial_stack_frame
;
978 sf
->limit_nesting
= first_limit_nesting
;
979 sf
->block
= first_block
;
980 sf
->block_ctx
= first_block_ctx
;
981 sf
->next_block
= NULL
;
985 continue_with_new_stack_frame
:
986 sf
->block
->generation
= le64_to_cpu(sf
->hdr
->generation
);
987 if (0 == sf
->hdr
->level
) {
988 struct btrfs_leaf
*const leafhdr
=
989 (struct btrfs_leaf
*)sf
->hdr
;
992 sf
->nr
= btrfs_stack_header_nritems(&leafhdr
->header
);
994 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
996 "leaf %llu items %d generation %llu"
998 sf
->block_ctx
->start
, sf
->nr
,
999 btrfs_stack_header_generation(
1001 btrfs_stack_header_owner(
1005 continue_with_current_leaf_stack_frame
:
1006 if (0 == sf
->num_copies
|| sf
->mirror_num
> sf
->num_copies
) {
1011 if (sf
->i
< sf
->nr
) {
1012 struct btrfs_item disk_item
;
1013 u32 disk_item_offset
=
1014 (uintptr_t)(leafhdr
->items
+ sf
->i
) -
1016 struct btrfs_disk_key
*disk_key
;
1021 if (disk_item_offset
+ sizeof(struct btrfs_item
) >
1022 sf
->block_ctx
->len
) {
1023 leaf_item_out_of_bounce_error
:
1025 "btrfsic: leaf item out of bounce at logical %llu, dev %s\n",
1026 sf
->block_ctx
->start
,
1027 sf
->block_ctx
->dev
->name
);
1028 goto one_stack_frame_backwards
;
1030 btrfsic_read_from_block_data(sf
->block_ctx
,
1033 sizeof(struct btrfs_item
));
1034 item_offset
= btrfs_stack_item_offset(&disk_item
);
1035 item_size
= btrfs_stack_item_size(&disk_item
);
1036 disk_key
= &disk_item
.key
;
1037 type
= btrfs_disk_key_type(disk_key
);
1039 if (BTRFS_ROOT_ITEM_KEY
== type
) {
1040 struct btrfs_root_item root_item
;
1041 u32 root_item_offset
;
1044 root_item_offset
= item_offset
+
1045 offsetof(struct btrfs_leaf
, items
);
1046 if (root_item_offset
+ item_size
>
1048 goto leaf_item_out_of_bounce_error
;
1049 btrfsic_read_from_block_data(
1050 sf
->block_ctx
, &root_item
,
1053 next_bytenr
= btrfs_root_bytenr(&root_item
);
1056 btrfsic_create_link_to_next_block(
1062 &sf
->next_block_ctx
,
1068 btrfs_root_generation(
1071 goto one_stack_frame_backwards
;
1073 if (NULL
!= sf
->next_block
) {
1074 struct btrfs_header
*const next_hdr
=
1075 (struct btrfs_header
*)
1076 sf
->next_block_ctx
.datav
[0];
1079 btrfsic_stack_frame_alloc();
1080 if (NULL
== next_stack
) {
1082 btrfsic_release_block_ctx(
1085 goto one_stack_frame_backwards
;
1089 next_stack
->block
= sf
->next_block
;
1090 next_stack
->block_ctx
=
1091 &sf
->next_block_ctx
;
1092 next_stack
->next_block
= NULL
;
1093 next_stack
->hdr
= next_hdr
;
1094 next_stack
->limit_nesting
=
1095 sf
->limit_nesting
- 1;
1096 next_stack
->prev
= sf
;
1098 goto continue_with_new_stack_frame
;
1100 } else if (BTRFS_EXTENT_DATA_KEY
== type
&&
1101 state
->include_extent_data
) {
1102 sf
->error
= btrfsic_handle_extent_data(
1109 goto one_stack_frame_backwards
;
1112 goto continue_with_current_leaf_stack_frame
;
1115 struct btrfs_node
*const nodehdr
= (struct btrfs_node
*)sf
->hdr
;
1118 sf
->nr
= btrfs_stack_header_nritems(&nodehdr
->header
);
1120 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
1121 printk(KERN_INFO
"node %llu level %d items %d"
1122 " generation %llu owner %llu\n",
1123 sf
->block_ctx
->start
,
1124 nodehdr
->header
.level
, sf
->nr
,
1125 btrfs_stack_header_generation(
1127 btrfs_stack_header_owner(
1131 continue_with_current_node_stack_frame
:
1132 if (0 == sf
->num_copies
|| sf
->mirror_num
> sf
->num_copies
) {
1137 if (sf
->i
< sf
->nr
) {
1138 struct btrfs_key_ptr key_ptr
;
1142 key_ptr_offset
= (uintptr_t)(nodehdr
->ptrs
+ sf
->i
) -
1144 if (key_ptr_offset
+ sizeof(struct btrfs_key_ptr
) >
1145 sf
->block_ctx
->len
) {
1147 "btrfsic: node item out of bounce at logical %llu, dev %s\n",
1148 sf
->block_ctx
->start
,
1149 sf
->block_ctx
->dev
->name
);
1150 goto one_stack_frame_backwards
;
1152 btrfsic_read_from_block_data(
1153 sf
->block_ctx
, &key_ptr
, key_ptr_offset
,
1154 sizeof(struct btrfs_key_ptr
));
1155 next_bytenr
= btrfs_stack_key_blockptr(&key_ptr
);
1157 sf
->error
= btrfsic_create_link_to_next_block(
1163 &sf
->next_block_ctx
,
1169 btrfs_stack_key_generation(&key_ptr
));
1171 goto one_stack_frame_backwards
;
1173 if (NULL
!= sf
->next_block
) {
1174 struct btrfs_header
*const next_hdr
=
1175 (struct btrfs_header
*)
1176 sf
->next_block_ctx
.datav
[0];
1178 next_stack
= btrfsic_stack_frame_alloc();
1179 if (NULL
== next_stack
) {
1181 goto one_stack_frame_backwards
;
1185 next_stack
->block
= sf
->next_block
;
1186 next_stack
->block_ctx
= &sf
->next_block_ctx
;
1187 next_stack
->next_block
= NULL
;
1188 next_stack
->hdr
= next_hdr
;
1189 next_stack
->limit_nesting
=
1190 sf
->limit_nesting
- 1;
1191 next_stack
->prev
= sf
;
1193 goto continue_with_new_stack_frame
;
1196 goto continue_with_current_node_stack_frame
;
1200 one_stack_frame_backwards
:
1201 if (NULL
!= sf
->prev
) {
1202 struct btrfsic_stack_frame
*const prev
= sf
->prev
;
1204 /* the one for the initial block is freed in the caller */
1205 btrfsic_release_block_ctx(sf
->block_ctx
);
1208 prev
->error
= sf
->error
;
1209 btrfsic_stack_frame_free(sf
);
1211 goto one_stack_frame_backwards
;
1214 btrfsic_stack_frame_free(sf
);
1216 goto continue_with_new_stack_frame
;
1218 BUG_ON(&initial_stack_frame
!= sf
);
1224 static void btrfsic_read_from_block_data(
1225 struct btrfsic_block_data_ctx
*block_ctx
,
1226 void *dstv
, u32 offset
, size_t len
)
1229 size_t offset_in_page
;
1231 char *dst
= (char *)dstv
;
1232 size_t start_offset
= block_ctx
->start
& ((u64
)PAGE_CACHE_SIZE
- 1);
1233 unsigned long i
= (start_offset
+ offset
) >> PAGE_CACHE_SHIFT
;
1235 WARN_ON(offset
+ len
> block_ctx
->len
);
1236 offset_in_page
= (start_offset
+ offset
) & (PAGE_CACHE_SIZE
- 1);
1239 cur
= min(len
, ((size_t)PAGE_CACHE_SIZE
- offset_in_page
));
1240 BUG_ON(i
>= DIV_ROUND_UP(block_ctx
->len
, PAGE_CACHE_SIZE
));
1241 kaddr
= block_ctx
->datav
[i
];
1242 memcpy(dst
, kaddr
+ offset_in_page
, cur
);
1251 static int btrfsic_create_link_to_next_block(
1252 struct btrfsic_state
*state
,
1253 struct btrfsic_block
*block
,
1254 struct btrfsic_block_data_ctx
*block_ctx
,
1257 struct btrfsic_block_data_ctx
*next_block_ctx
,
1258 struct btrfsic_block
**next_blockp
,
1259 int force_iodone_flag
,
1260 int *num_copiesp
, int *mirror_nump
,
1261 struct btrfs_disk_key
*disk_key
,
1262 u64 parent_generation
)
1264 struct btrfsic_block
*next_block
= NULL
;
1266 struct btrfsic_block_link
*l
;
1267 int did_alloc_block_link
;
1268 int block_was_created
;
1270 *next_blockp
= NULL
;
1271 if (0 == *num_copiesp
) {
1273 btrfs_num_copies(state
->root
->fs_info
,
1274 next_bytenr
, state
->metablock_size
);
1275 if (state
->print_mask
& BTRFSIC_PRINT_MASK_NUM_COPIES
)
1276 printk(KERN_INFO
"num_copies(log_bytenr=%llu) = %d\n",
1277 next_bytenr
, *num_copiesp
);
1281 if (*mirror_nump
> *num_copiesp
)
1284 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
1286 "btrfsic_create_link_to_next_block(mirror_num=%d)\n",
1288 ret
= btrfsic_map_block(state
, next_bytenr
,
1289 state
->metablock_size
,
1290 next_block_ctx
, *mirror_nump
);
1293 "btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n",
1294 next_bytenr
, *mirror_nump
);
1295 btrfsic_release_block_ctx(next_block_ctx
);
1296 *next_blockp
= NULL
;
1300 next_block
= btrfsic_block_lookup_or_add(state
,
1301 next_block_ctx
, "referenced ",
1302 1, force_iodone_flag
,
1305 &block_was_created
);
1306 if (NULL
== next_block
) {
1307 btrfsic_release_block_ctx(next_block_ctx
);
1308 *next_blockp
= NULL
;
1311 if (block_was_created
) {
1313 next_block
->generation
= BTRFSIC_GENERATION_UNKNOWN
;
1315 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
) {
1316 if (next_block
->logical_bytenr
!= next_bytenr
&&
1317 !(!next_block
->is_metadata
&&
1318 0 == next_block
->logical_bytenr
))
1320 "Referenced block @%llu (%s/%llu/%d) found in hash table, %c, bytenr mismatch (!= stored %llu).\n",
1321 next_bytenr
, next_block_ctx
->dev
->name
,
1322 next_block_ctx
->dev_bytenr
, *mirror_nump
,
1323 btrfsic_get_block_type(state
,
1325 next_block
->logical_bytenr
);
1328 "Referenced block @%llu (%s/%llu/%d) found in hash table, %c.\n",
1329 next_bytenr
, next_block_ctx
->dev
->name
,
1330 next_block_ctx
->dev_bytenr
, *mirror_nump
,
1331 btrfsic_get_block_type(state
,
1334 next_block
->logical_bytenr
= next_bytenr
;
1336 next_block
->mirror_num
= *mirror_nump
;
1337 l
= btrfsic_block_link_hashtable_lookup(
1338 next_block_ctx
->dev
->bdev
,
1339 next_block_ctx
->dev_bytenr
,
1340 block_ctx
->dev
->bdev
,
1341 block_ctx
->dev_bytenr
,
1342 &state
->block_link_hashtable
);
1345 next_block
->disk_key
= *disk_key
;
1347 l
= btrfsic_block_link_alloc();
1349 printk(KERN_INFO
"btrfsic: error, kmalloc failed!\n");
1350 btrfsic_release_block_ctx(next_block_ctx
);
1351 *next_blockp
= NULL
;
1355 did_alloc_block_link
= 1;
1356 l
->block_ref_to
= next_block
;
1357 l
->block_ref_from
= block
;
1359 l
->parent_generation
= parent_generation
;
1361 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
1362 btrfsic_print_add_link(state
, l
);
1364 list_add(&l
->node_ref_to
, &block
->ref_to_list
);
1365 list_add(&l
->node_ref_from
, &next_block
->ref_from_list
);
1367 btrfsic_block_link_hashtable_add(l
,
1368 &state
->block_link_hashtable
);
1370 did_alloc_block_link
= 0;
1371 if (0 == limit_nesting
) {
1373 l
->parent_generation
= parent_generation
;
1374 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
1375 btrfsic_print_add_link(state
, l
);
1379 if (limit_nesting
> 0 && did_alloc_block_link
) {
1380 ret
= btrfsic_read_block(state
, next_block_ctx
);
1381 if (ret
< (int)next_block_ctx
->len
) {
1383 "btrfsic: read block @logical %llu failed!\n",
1385 btrfsic_release_block_ctx(next_block_ctx
);
1386 *next_blockp
= NULL
;
1390 *next_blockp
= next_block
;
1392 *next_blockp
= NULL
;
1399 static int btrfsic_handle_extent_data(
1400 struct btrfsic_state
*state
,
1401 struct btrfsic_block
*block
,
1402 struct btrfsic_block_data_ctx
*block_ctx
,
1403 u32 item_offset
, int force_iodone_flag
)
1406 struct btrfs_file_extent_item file_extent_item
;
1407 u64 file_extent_item_offset
;
1411 struct btrfsic_block_link
*l
;
1413 file_extent_item_offset
= offsetof(struct btrfs_leaf
, items
) +
1415 if (file_extent_item_offset
+
1416 offsetof(struct btrfs_file_extent_item
, disk_num_bytes
) >
1419 "btrfsic: file item out of bounce at logical %llu, dev %s\n",
1420 block_ctx
->start
, block_ctx
->dev
->name
);
1424 btrfsic_read_from_block_data(block_ctx
, &file_extent_item
,
1425 file_extent_item_offset
,
1426 offsetof(struct btrfs_file_extent_item
, disk_num_bytes
));
1427 if (BTRFS_FILE_EXTENT_REG
!= file_extent_item
.type
||
1428 btrfs_stack_file_extent_disk_bytenr(&file_extent_item
) == 0) {
1429 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERY_VERBOSE
)
1430 printk(KERN_INFO
"extent_data: type %u, disk_bytenr = %llu\n",
1431 file_extent_item
.type
,
1432 btrfs_stack_file_extent_disk_bytenr(
1433 &file_extent_item
));
1437 if (file_extent_item_offset
+ sizeof(struct btrfs_file_extent_item
) >
1440 "btrfsic: file item out of bounce at logical %llu, dev %s\n",
1441 block_ctx
->start
, block_ctx
->dev
->name
);
1444 btrfsic_read_from_block_data(block_ctx
, &file_extent_item
,
1445 file_extent_item_offset
,
1446 sizeof(struct btrfs_file_extent_item
));
1447 next_bytenr
= btrfs_stack_file_extent_disk_bytenr(&file_extent_item
);
1448 if (btrfs_stack_file_extent_compression(&file_extent_item
) ==
1449 BTRFS_COMPRESS_NONE
) {
1450 next_bytenr
+= btrfs_stack_file_extent_offset(&file_extent_item
);
1451 num_bytes
= btrfs_stack_file_extent_num_bytes(&file_extent_item
);
1453 num_bytes
= btrfs_stack_file_extent_disk_num_bytes(&file_extent_item
);
1455 generation
= btrfs_stack_file_extent_generation(&file_extent_item
);
1457 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERY_VERBOSE
)
1458 printk(KERN_INFO
"extent_data: type %u, disk_bytenr = %llu,"
1459 " offset = %llu, num_bytes = %llu\n",
1460 file_extent_item
.type
,
1461 btrfs_stack_file_extent_disk_bytenr(&file_extent_item
),
1462 btrfs_stack_file_extent_offset(&file_extent_item
),
1464 while (num_bytes
> 0) {
1469 if (num_bytes
> state
->datablock_size
)
1470 chunk_len
= state
->datablock_size
;
1472 chunk_len
= num_bytes
;
1475 btrfs_num_copies(state
->root
->fs_info
,
1476 next_bytenr
, state
->datablock_size
);
1477 if (state
->print_mask
& BTRFSIC_PRINT_MASK_NUM_COPIES
)
1478 printk(KERN_INFO
"num_copies(log_bytenr=%llu) = %d\n",
1479 next_bytenr
, num_copies
);
1480 for (mirror_num
= 1; mirror_num
<= num_copies
; mirror_num
++) {
1481 struct btrfsic_block_data_ctx next_block_ctx
;
1482 struct btrfsic_block
*next_block
;
1483 int block_was_created
;
1485 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
1486 printk(KERN_INFO
"btrfsic_handle_extent_data("
1487 "mirror_num=%d)\n", mirror_num
);
1488 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERY_VERBOSE
)
1490 "\tdisk_bytenr = %llu, num_bytes %u\n",
1491 next_bytenr
, chunk_len
);
1492 ret
= btrfsic_map_block(state
, next_bytenr
,
1493 chunk_len
, &next_block_ctx
,
1497 "btrfsic: btrfsic_map_block(@%llu,"
1498 " mirror=%d) failed!\n",
1499 next_bytenr
, mirror_num
);
1503 next_block
= btrfsic_block_lookup_or_add(
1511 &block_was_created
);
1512 if (NULL
== next_block
) {
1514 "btrfsic: error, kmalloc failed!\n");
1515 btrfsic_release_block_ctx(&next_block_ctx
);
1518 if (!block_was_created
) {
1519 if ((state
->print_mask
&
1520 BTRFSIC_PRINT_MASK_VERBOSE
) &&
1521 next_block
->logical_bytenr
!= next_bytenr
&&
1522 !(!next_block
->is_metadata
&&
1523 0 == next_block
->logical_bytenr
)) {
1526 " @%llu (%s/%llu/%d)"
1527 " found in hash table, D,"
1529 " (!= stored %llu).\n",
1531 next_block_ctx
.dev
->name
,
1532 next_block_ctx
.dev_bytenr
,
1534 next_block
->logical_bytenr
);
1536 next_block
->logical_bytenr
= next_bytenr
;
1537 next_block
->mirror_num
= mirror_num
;
1540 l
= btrfsic_block_link_lookup_or_add(state
,
1544 btrfsic_release_block_ctx(&next_block_ctx
);
1549 next_bytenr
+= chunk_len
;
1550 num_bytes
-= chunk_len
;
1556 static int btrfsic_map_block(struct btrfsic_state
*state
, u64 bytenr
, u32 len
,
1557 struct btrfsic_block_data_ctx
*block_ctx_out
,
1562 struct btrfs_bio
*multi
= NULL
;
1563 struct btrfs_device
*device
;
1566 ret
= btrfs_map_block(state
->root
->fs_info
, READ
,
1567 bytenr
, &length
, &multi
, mirror_num
);
1570 block_ctx_out
->start
= 0;
1571 block_ctx_out
->dev_bytenr
= 0;
1572 block_ctx_out
->len
= 0;
1573 block_ctx_out
->dev
= NULL
;
1574 block_ctx_out
->datav
= NULL
;
1575 block_ctx_out
->pagev
= NULL
;
1576 block_ctx_out
->mem_to_free
= NULL
;
1581 device
= multi
->stripes
[0].dev
;
1582 block_ctx_out
->dev
= btrfsic_dev_state_lookup(device
->bdev
);
1583 block_ctx_out
->dev_bytenr
= multi
->stripes
[0].physical
;
1584 block_ctx_out
->start
= bytenr
;
1585 block_ctx_out
->len
= len
;
1586 block_ctx_out
->datav
= NULL
;
1587 block_ctx_out
->pagev
= NULL
;
1588 block_ctx_out
->mem_to_free
= NULL
;
1591 if (NULL
== block_ctx_out
->dev
) {
1593 printk(KERN_INFO
"btrfsic: error, cannot lookup dev (#1)!\n");
1599 static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx
*block_ctx
)
1601 if (block_ctx
->mem_to_free
) {
1602 unsigned int num_pages
;
1604 BUG_ON(!block_ctx
->datav
);
1605 BUG_ON(!block_ctx
->pagev
);
1606 num_pages
= (block_ctx
->len
+ (u64
)PAGE_CACHE_SIZE
- 1) >>
1608 while (num_pages
> 0) {
1610 if (block_ctx
->datav
[num_pages
]) {
1611 kunmap(block_ctx
->pagev
[num_pages
]);
1612 block_ctx
->datav
[num_pages
] = NULL
;
1614 if (block_ctx
->pagev
[num_pages
]) {
1615 __free_page(block_ctx
->pagev
[num_pages
]);
1616 block_ctx
->pagev
[num_pages
] = NULL
;
1620 kfree(block_ctx
->mem_to_free
);
1621 block_ctx
->mem_to_free
= NULL
;
1622 block_ctx
->pagev
= NULL
;
1623 block_ctx
->datav
= NULL
;
1627 static int btrfsic_read_block(struct btrfsic_state
*state
,
1628 struct btrfsic_block_data_ctx
*block_ctx
)
1630 unsigned int num_pages
;
1635 BUG_ON(block_ctx
->datav
);
1636 BUG_ON(block_ctx
->pagev
);
1637 BUG_ON(block_ctx
->mem_to_free
);
1638 if (block_ctx
->dev_bytenr
& ((u64
)PAGE_CACHE_SIZE
- 1)) {
1640 "btrfsic: read_block() with unaligned bytenr %llu\n",
1641 block_ctx
->dev_bytenr
);
1645 num_pages
= (block_ctx
->len
+ (u64
)PAGE_CACHE_SIZE
- 1) >>
1647 block_ctx
->mem_to_free
= kzalloc((sizeof(*block_ctx
->datav
) +
1648 sizeof(*block_ctx
->pagev
)) *
1649 num_pages
, GFP_NOFS
);
1650 if (!block_ctx
->mem_to_free
)
1652 block_ctx
->datav
= block_ctx
->mem_to_free
;
1653 block_ctx
->pagev
= (struct page
**)(block_ctx
->datav
+ num_pages
);
1654 for (i
= 0; i
< num_pages
; i
++) {
1655 block_ctx
->pagev
[i
] = alloc_page(GFP_NOFS
);
1656 if (!block_ctx
->pagev
[i
])
1660 dev_bytenr
= block_ctx
->dev_bytenr
;
1661 for (i
= 0; i
< num_pages
;) {
1665 bio
= btrfs_io_bio_alloc(GFP_NOFS
, num_pages
- i
);
1668 "btrfsic: bio_alloc() for %u pages failed!\n",
1672 bio
->bi_bdev
= block_ctx
->dev
->bdev
;
1673 bio
->bi_iter
.bi_sector
= dev_bytenr
>> 9;
1675 for (j
= i
; j
< num_pages
; j
++) {
1676 ret
= bio_add_page(bio
, block_ctx
->pagev
[j
],
1677 PAGE_CACHE_SIZE
, 0);
1678 if (PAGE_CACHE_SIZE
!= ret
)
1683 "btrfsic: error, failed to add a single page!\n");
1686 if (submit_bio_wait(READ
, bio
)) {
1688 "btrfsic: read error at logical %llu dev %s!\n",
1689 block_ctx
->start
, block_ctx
->dev
->name
);
1694 dev_bytenr
+= (j
- i
) * PAGE_CACHE_SIZE
;
1697 for (i
= 0; i
< num_pages
; i
++) {
1698 block_ctx
->datav
[i
] = kmap(block_ctx
->pagev
[i
]);
1699 if (!block_ctx
->datav
[i
]) {
1700 printk(KERN_INFO
"btrfsic: kmap() failed (dev %s)!\n",
1701 block_ctx
->dev
->name
);
1706 return block_ctx
->len
;
1709 static void btrfsic_dump_database(struct btrfsic_state
*state
)
1711 const struct btrfsic_block
*b_all
;
1713 BUG_ON(NULL
== state
);
1715 printk(KERN_INFO
"all_blocks_list:\n");
1716 list_for_each_entry(b_all
, &state
->all_blocks_list
, all_blocks_node
) {
1717 const struct btrfsic_block_link
*l
;
1719 printk(KERN_INFO
"%c-block @%llu (%s/%llu/%d)\n",
1720 btrfsic_get_block_type(state
, b_all
),
1721 b_all
->logical_bytenr
, b_all
->dev_state
->name
,
1722 b_all
->dev_bytenr
, b_all
->mirror_num
);
1724 list_for_each_entry(l
, &b_all
->ref_to_list
, node_ref_to
) {
1725 printk(KERN_INFO
" %c @%llu (%s/%llu/%d)"
1727 " %c @%llu (%s/%llu/%d)\n",
1728 btrfsic_get_block_type(state
, b_all
),
1729 b_all
->logical_bytenr
, b_all
->dev_state
->name
,
1730 b_all
->dev_bytenr
, b_all
->mirror_num
,
1732 btrfsic_get_block_type(state
, l
->block_ref_to
),
1733 l
->block_ref_to
->logical_bytenr
,
1734 l
->block_ref_to
->dev_state
->name
,
1735 l
->block_ref_to
->dev_bytenr
,
1736 l
->block_ref_to
->mirror_num
);
1739 list_for_each_entry(l
, &b_all
->ref_from_list
, node_ref_from
) {
1740 printk(KERN_INFO
" %c @%llu (%s/%llu/%d)"
1742 " %c @%llu (%s/%llu/%d)\n",
1743 btrfsic_get_block_type(state
, b_all
),
1744 b_all
->logical_bytenr
, b_all
->dev_state
->name
,
1745 b_all
->dev_bytenr
, b_all
->mirror_num
,
1747 btrfsic_get_block_type(state
, l
->block_ref_from
),
1748 l
->block_ref_from
->logical_bytenr
,
1749 l
->block_ref_from
->dev_state
->name
,
1750 l
->block_ref_from
->dev_bytenr
,
1751 l
->block_ref_from
->mirror_num
);
1754 printk(KERN_INFO
"\n");
1759 * Test whether the disk block contains a tree block (leaf or node)
1760 * (note that this test fails for the super block)
1762 static int btrfsic_test_for_metadata(struct btrfsic_state
*state
,
1763 char **datav
, unsigned int num_pages
)
1765 struct btrfs_header
*h
;
1766 u8 csum
[BTRFS_CSUM_SIZE
];
1770 if (num_pages
* PAGE_CACHE_SIZE
< state
->metablock_size
)
1771 return 1; /* not metadata */
1772 num_pages
= state
->metablock_size
>> PAGE_CACHE_SHIFT
;
1773 h
= (struct btrfs_header
*)datav
[0];
1775 if (memcmp(h
->fsid
, state
->root
->fs_info
->fsid
, BTRFS_UUID_SIZE
))
1778 for (i
= 0; i
< num_pages
; i
++) {
1779 u8
*data
= i
? datav
[i
] : (datav
[i
] + BTRFS_CSUM_SIZE
);
1780 size_t sublen
= i
? PAGE_CACHE_SIZE
:
1781 (PAGE_CACHE_SIZE
- BTRFS_CSUM_SIZE
);
1783 crc
= btrfs_crc32c(crc
, data
, sublen
);
1785 btrfs_csum_final(crc
, csum
);
1786 if (memcmp(csum
, h
->csum
, state
->csum_size
))
1789 return 0; /* is metadata */
1792 static void btrfsic_process_written_block(struct btrfsic_dev_state
*dev_state
,
1793 u64 dev_bytenr
, char **mapped_datav
,
1794 unsigned int num_pages
,
1795 struct bio
*bio
, int *bio_is_patched
,
1796 struct buffer_head
*bh
,
1797 int submit_bio_bh_rw
)
1800 struct btrfsic_block
*block
;
1801 struct btrfsic_block_data_ctx block_ctx
;
1803 struct btrfsic_state
*state
= dev_state
->state
;
1804 struct block_device
*bdev
= dev_state
->bdev
;
1805 unsigned int processed_len
;
1807 if (NULL
!= bio_is_patched
)
1808 *bio_is_patched
= 0;
1815 is_metadata
= (0 == btrfsic_test_for_metadata(state
, mapped_datav
,
1818 block
= btrfsic_block_hashtable_lookup(bdev
, dev_bytenr
,
1819 &state
->block_hashtable
);
1820 if (NULL
!= block
) {
1822 struct btrfsic_block_link
*l
, *tmp
;
1824 if (block
->is_superblock
) {
1825 bytenr
= btrfs_super_bytenr((struct btrfs_super_block
*)
1827 if (num_pages
* PAGE_CACHE_SIZE
<
1828 BTRFS_SUPER_INFO_SIZE
) {
1830 "btrfsic: cannot work with too short bios!\n");
1834 BUG_ON(BTRFS_SUPER_INFO_SIZE
& (PAGE_CACHE_SIZE
- 1));
1835 processed_len
= BTRFS_SUPER_INFO_SIZE
;
1836 if (state
->print_mask
&
1837 BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE
) {
1839 "[before new superblock is written]:\n");
1840 btrfsic_dump_tree_sub(state
, block
, 0);
1844 if (!block
->is_superblock
) {
1845 if (num_pages
* PAGE_CACHE_SIZE
<
1846 state
->metablock_size
) {
1848 "btrfsic: cannot work with too short bios!\n");
1851 processed_len
= state
->metablock_size
;
1852 bytenr
= btrfs_stack_header_bytenr(
1853 (struct btrfs_header
*)
1855 btrfsic_cmp_log_and_dev_bytenr(state
, bytenr
,
1859 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
) {
1860 if (block
->logical_bytenr
!= bytenr
&&
1861 !(!block
->is_metadata
&&
1862 block
->logical_bytenr
== 0))
1864 "Written block @%llu (%s/%llu/%d) found in hash table, %c, bytenr mismatch (!= stored %llu).\n",
1865 bytenr
, dev_state
->name
,
1868 btrfsic_get_block_type(state
,
1870 block
->logical_bytenr
);
1873 "Written block @%llu (%s/%llu/%d) found in hash table, %c.\n",
1874 bytenr
, dev_state
->name
,
1875 dev_bytenr
, block
->mirror_num
,
1876 btrfsic_get_block_type(state
,
1879 block
->logical_bytenr
= bytenr
;
1881 if (num_pages
* PAGE_CACHE_SIZE
<
1882 state
->datablock_size
) {
1884 "btrfsic: cannot work with too short bios!\n");
1887 processed_len
= state
->datablock_size
;
1888 bytenr
= block
->logical_bytenr
;
1889 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
1891 "Written block @%llu (%s/%llu/%d)"
1892 " found in hash table, %c.\n",
1893 bytenr
, dev_state
->name
, dev_bytenr
,
1895 btrfsic_get_block_type(state
, block
));
1898 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
1900 "ref_to_list: %cE, ref_from_list: %cE\n",
1901 list_empty(&block
->ref_to_list
) ? ' ' : '!',
1902 list_empty(&block
->ref_from_list
) ? ' ' : '!');
1903 if (btrfsic_is_block_ref_by_superblock(state
, block
, 0)) {
1904 printk(KERN_INFO
"btrfs: attempt to overwrite %c-block"
1905 " @%llu (%s/%llu/%d), old(gen=%llu,"
1906 " objectid=%llu, type=%d, offset=%llu),"
1908 " which is referenced by most recent superblock"
1909 " (superblockgen=%llu)!\n",
1910 btrfsic_get_block_type(state
, block
), bytenr
,
1911 dev_state
->name
, dev_bytenr
, block
->mirror_num
,
1913 btrfs_disk_key_objectid(&block
->disk_key
),
1914 block
->disk_key
.type
,
1915 btrfs_disk_key_offset(&block
->disk_key
),
1916 btrfs_stack_header_generation(
1917 (struct btrfs_header
*) mapped_datav
[0]),
1918 state
->max_superblock_generation
);
1919 btrfsic_dump_tree(state
);
1922 if (!block
->is_iodone
&& !block
->never_written
) {
1923 printk(KERN_INFO
"btrfs: attempt to overwrite %c-block"
1924 " @%llu (%s/%llu/%d), oldgen=%llu, newgen=%llu,"
1925 " which is not yet iodone!\n",
1926 btrfsic_get_block_type(state
, block
), bytenr
,
1927 dev_state
->name
, dev_bytenr
, block
->mirror_num
,
1929 btrfs_stack_header_generation(
1930 (struct btrfs_header
*)
1932 /* it would not be safe to go on */
1933 btrfsic_dump_tree(state
);
1938 * Clear all references of this block. Do not free
1939 * the block itself even if is not referenced anymore
1940 * because it still carries valueable information
1941 * like whether it was ever written and IO completed.
1943 list_for_each_entry_safe(l
, tmp
, &block
->ref_to_list
,
1945 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
1946 btrfsic_print_rem_link(state
, l
);
1948 if (0 == l
->ref_cnt
) {
1949 list_del(&l
->node_ref_to
);
1950 list_del(&l
->node_ref_from
);
1951 btrfsic_block_link_hashtable_remove(l
);
1952 btrfsic_block_link_free(l
);
1956 block_ctx
.dev
= dev_state
;
1957 block_ctx
.dev_bytenr
= dev_bytenr
;
1958 block_ctx
.start
= bytenr
;
1959 block_ctx
.len
= processed_len
;
1960 block_ctx
.pagev
= NULL
;
1961 block_ctx
.mem_to_free
= NULL
;
1962 block_ctx
.datav
= mapped_datav
;
1964 if (is_metadata
|| state
->include_extent_data
) {
1965 block
->never_written
= 0;
1966 block
->iodone_w_error
= 0;
1968 block
->is_iodone
= 0;
1969 BUG_ON(NULL
== bio_is_patched
);
1970 if (!*bio_is_patched
) {
1971 block
->orig_bio_bh_private
=
1973 block
->orig_bio_bh_end_io
.bio
=
1975 block
->next_in_same_bio
= NULL
;
1976 bio
->bi_private
= block
;
1977 bio
->bi_end_io
= btrfsic_bio_end_io
;
1978 *bio_is_patched
= 1;
1980 struct btrfsic_block
*chained_block
=
1981 (struct btrfsic_block
*)
1984 BUG_ON(NULL
== chained_block
);
1985 block
->orig_bio_bh_private
=
1986 chained_block
->orig_bio_bh_private
;
1987 block
->orig_bio_bh_end_io
.bio
=
1988 chained_block
->orig_bio_bh_end_io
.
1990 block
->next_in_same_bio
= chained_block
;
1991 bio
->bi_private
= block
;
1993 } else if (NULL
!= bh
) {
1994 block
->is_iodone
= 0;
1995 block
->orig_bio_bh_private
= bh
->b_private
;
1996 block
->orig_bio_bh_end_io
.bh
= bh
->b_end_io
;
1997 block
->next_in_same_bio
= NULL
;
1998 bh
->b_private
= block
;
1999 bh
->b_end_io
= btrfsic_bh_end_io
;
2001 block
->is_iodone
= 1;
2002 block
->orig_bio_bh_private
= NULL
;
2003 block
->orig_bio_bh_end_io
.bio
= NULL
;
2004 block
->next_in_same_bio
= NULL
;
2008 block
->flush_gen
= dev_state
->last_flush_gen
+ 1;
2009 block
->submit_bio_bh_rw
= submit_bio_bh_rw
;
2011 block
->logical_bytenr
= bytenr
;
2012 block
->is_metadata
= 1;
2013 if (block
->is_superblock
) {
2014 BUG_ON(PAGE_CACHE_SIZE
!=
2015 BTRFS_SUPER_INFO_SIZE
);
2016 ret
= btrfsic_process_written_superblock(
2019 (struct btrfs_super_block
*)
2021 if (state
->print_mask
&
2022 BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE
) {
2024 "[after new superblock is written]:\n");
2025 btrfsic_dump_tree_sub(state
, block
, 0);
2028 block
->mirror_num
= 0; /* unknown */
2029 ret
= btrfsic_process_metablock(
2037 "btrfsic: btrfsic_process_metablock"
2038 "(root @%llu) failed!\n",
2041 block
->is_metadata
= 0;
2042 block
->mirror_num
= 0; /* unknown */
2043 block
->generation
= BTRFSIC_GENERATION_UNKNOWN
;
2044 if (!state
->include_extent_data
2045 && list_empty(&block
->ref_from_list
)) {
2047 * disk block is overwritten with extent
2048 * data (not meta data) and we are configured
2049 * to not include extent data: take the
2050 * chance and free the block's memory
2052 btrfsic_block_hashtable_remove(block
);
2053 list_del(&block
->all_blocks_node
);
2054 btrfsic_block_free(block
);
2057 btrfsic_release_block_ctx(&block_ctx
);
2059 /* block has not been found in hash table */
2063 processed_len
= state
->datablock_size
;
2064 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
2065 printk(KERN_INFO
"Written block (%s/%llu/?)"
2066 " !found in hash table, D.\n",
2067 dev_state
->name
, dev_bytenr
);
2068 if (!state
->include_extent_data
) {
2069 /* ignore that written D block */
2073 /* this is getting ugly for the
2074 * include_extent_data case... */
2075 bytenr
= 0; /* unknown */
2077 processed_len
= state
->metablock_size
;
2078 bytenr
= btrfs_stack_header_bytenr(
2079 (struct btrfs_header
*)
2081 btrfsic_cmp_log_and_dev_bytenr(state
, bytenr
, dev_state
,
2083 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
2085 "Written block @%llu (%s/%llu/?)"
2086 " !found in hash table, M.\n",
2087 bytenr
, dev_state
->name
, dev_bytenr
);
2090 block_ctx
.dev
= dev_state
;
2091 block_ctx
.dev_bytenr
= dev_bytenr
;
2092 block_ctx
.start
= bytenr
;
2093 block_ctx
.len
= processed_len
;
2094 block_ctx
.pagev
= NULL
;
2095 block_ctx
.mem_to_free
= NULL
;
2096 block_ctx
.datav
= mapped_datav
;
2098 block
= btrfsic_block_alloc();
2099 if (NULL
== block
) {
2100 printk(KERN_INFO
"btrfsic: error, kmalloc failed!\n");
2101 btrfsic_release_block_ctx(&block_ctx
);
2104 block
->dev_state
= dev_state
;
2105 block
->dev_bytenr
= dev_bytenr
;
2106 block
->logical_bytenr
= bytenr
;
2107 block
->is_metadata
= is_metadata
;
2108 block
->never_written
= 0;
2109 block
->iodone_w_error
= 0;
2110 block
->mirror_num
= 0; /* unknown */
2111 block
->flush_gen
= dev_state
->last_flush_gen
+ 1;
2112 block
->submit_bio_bh_rw
= submit_bio_bh_rw
;
2114 block
->is_iodone
= 0;
2115 BUG_ON(NULL
== bio_is_patched
);
2116 if (!*bio_is_patched
) {
2117 block
->orig_bio_bh_private
= bio
->bi_private
;
2118 block
->orig_bio_bh_end_io
.bio
= bio
->bi_end_io
;
2119 block
->next_in_same_bio
= NULL
;
2120 bio
->bi_private
= block
;
2121 bio
->bi_end_io
= btrfsic_bio_end_io
;
2122 *bio_is_patched
= 1;
2124 struct btrfsic_block
*chained_block
=
2125 (struct btrfsic_block
*)
2128 BUG_ON(NULL
== chained_block
);
2129 block
->orig_bio_bh_private
=
2130 chained_block
->orig_bio_bh_private
;
2131 block
->orig_bio_bh_end_io
.bio
=
2132 chained_block
->orig_bio_bh_end_io
.bio
;
2133 block
->next_in_same_bio
= chained_block
;
2134 bio
->bi_private
= block
;
2136 } else if (NULL
!= bh
) {
2137 block
->is_iodone
= 0;
2138 block
->orig_bio_bh_private
= bh
->b_private
;
2139 block
->orig_bio_bh_end_io
.bh
= bh
->b_end_io
;
2140 block
->next_in_same_bio
= NULL
;
2141 bh
->b_private
= block
;
2142 bh
->b_end_io
= btrfsic_bh_end_io
;
2144 block
->is_iodone
= 1;
2145 block
->orig_bio_bh_private
= NULL
;
2146 block
->orig_bio_bh_end_io
.bio
= NULL
;
2147 block
->next_in_same_bio
= NULL
;
2149 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
2151 "New written %c-block @%llu (%s/%llu/%d)\n",
2152 is_metadata
? 'M' : 'D',
2153 block
->logical_bytenr
, block
->dev_state
->name
,
2154 block
->dev_bytenr
, block
->mirror_num
);
2155 list_add(&block
->all_blocks_node
, &state
->all_blocks_list
);
2156 btrfsic_block_hashtable_add(block
, &state
->block_hashtable
);
2159 ret
= btrfsic_process_metablock(state
, block
,
2163 "btrfsic: process_metablock(root @%llu)"
2167 btrfsic_release_block_ctx(&block_ctx
);
2171 BUG_ON(!processed_len
);
2172 dev_bytenr
+= processed_len
;
2173 mapped_datav
+= processed_len
>> PAGE_CACHE_SHIFT
;
2174 num_pages
-= processed_len
>> PAGE_CACHE_SHIFT
;
2178 static void btrfsic_bio_end_io(struct bio
*bp
)
2180 struct btrfsic_block
*block
= (struct btrfsic_block
*)bp
->bi_private
;
2183 /* mutex is not held! This is not save if IO is not yet completed
2189 BUG_ON(NULL
== block
);
2190 bp
->bi_private
= block
->orig_bio_bh_private
;
2191 bp
->bi_end_io
= block
->orig_bio_bh_end_io
.bio
;
2194 struct btrfsic_block
*next_block
;
2195 struct btrfsic_dev_state
*const dev_state
= block
->dev_state
;
2197 if ((dev_state
->state
->print_mask
&
2198 BTRFSIC_PRINT_MASK_END_IO_BIO_BH
))
2200 "bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n",
2202 btrfsic_get_block_type(dev_state
->state
, block
),
2203 block
->logical_bytenr
, dev_state
->name
,
2204 block
->dev_bytenr
, block
->mirror_num
);
2205 next_block
= block
->next_in_same_bio
;
2206 block
->iodone_w_error
= iodone_w_error
;
2207 if (block
->submit_bio_bh_rw
& REQ_FLUSH
) {
2208 dev_state
->last_flush_gen
++;
2209 if ((dev_state
->state
->print_mask
&
2210 BTRFSIC_PRINT_MASK_END_IO_BIO_BH
))
2212 "bio_end_io() new %s flush_gen=%llu\n",
2214 dev_state
->last_flush_gen
);
2216 if (block
->submit_bio_bh_rw
& REQ_FUA
)
2217 block
->flush_gen
= 0; /* FUA completed means block is
2219 block
->is_iodone
= 1; /* for FLUSH, this releases the block */
2221 } while (NULL
!= block
);
2226 static void btrfsic_bh_end_io(struct buffer_head
*bh
, int uptodate
)
2228 struct btrfsic_block
*block
= (struct btrfsic_block
*)bh
->b_private
;
2229 int iodone_w_error
= !uptodate
;
2230 struct btrfsic_dev_state
*dev_state
;
2232 BUG_ON(NULL
== block
);
2233 dev_state
= block
->dev_state
;
2234 if ((dev_state
->state
->print_mask
& BTRFSIC_PRINT_MASK_END_IO_BIO_BH
))
2236 "bh_end_io(error=%d) for %c @%llu (%s/%llu/%d)\n",
2238 btrfsic_get_block_type(dev_state
->state
, block
),
2239 block
->logical_bytenr
, block
->dev_state
->name
,
2240 block
->dev_bytenr
, block
->mirror_num
);
2242 block
->iodone_w_error
= iodone_w_error
;
2243 if (block
->submit_bio_bh_rw
& REQ_FLUSH
) {
2244 dev_state
->last_flush_gen
++;
2245 if ((dev_state
->state
->print_mask
&
2246 BTRFSIC_PRINT_MASK_END_IO_BIO_BH
))
2248 "bh_end_io() new %s flush_gen=%llu\n",
2249 dev_state
->name
, dev_state
->last_flush_gen
);
2251 if (block
->submit_bio_bh_rw
& REQ_FUA
)
2252 block
->flush_gen
= 0; /* FUA completed means block is on disk */
2254 bh
->b_private
= block
->orig_bio_bh_private
;
2255 bh
->b_end_io
= block
->orig_bio_bh_end_io
.bh
;
2256 block
->is_iodone
= 1; /* for FLUSH, this releases the block */
2257 bh
->b_end_io(bh
, uptodate
);
2260 static int btrfsic_process_written_superblock(
2261 struct btrfsic_state
*state
,
2262 struct btrfsic_block
*const superblock
,
2263 struct btrfs_super_block
*const super_hdr
)
2267 superblock
->generation
= btrfs_super_generation(super_hdr
);
2268 if (!(superblock
->generation
> state
->max_superblock_generation
||
2269 0 == state
->max_superblock_generation
)) {
2270 if (state
->print_mask
& BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE
)
2272 "btrfsic: superblock @%llu (%s/%llu/%d)"
2273 " with old gen %llu <= %llu\n",
2274 superblock
->logical_bytenr
,
2275 superblock
->dev_state
->name
,
2276 superblock
->dev_bytenr
, superblock
->mirror_num
,
2277 btrfs_super_generation(super_hdr
),
2278 state
->max_superblock_generation
);
2280 if (state
->print_mask
& BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE
)
2282 "btrfsic: got new superblock @%llu (%s/%llu/%d)"
2283 " with new gen %llu > %llu\n",
2284 superblock
->logical_bytenr
,
2285 superblock
->dev_state
->name
,
2286 superblock
->dev_bytenr
, superblock
->mirror_num
,
2287 btrfs_super_generation(super_hdr
),
2288 state
->max_superblock_generation
);
2290 state
->max_superblock_generation
=
2291 btrfs_super_generation(super_hdr
);
2292 state
->latest_superblock
= superblock
;
2295 for (pass
= 0; pass
< 3; pass
++) {
2298 struct btrfsic_block
*next_block
;
2299 struct btrfsic_block_data_ctx tmp_next_block_ctx
;
2300 struct btrfsic_block_link
*l
;
2303 const char *additional_string
= NULL
;
2304 struct btrfs_disk_key tmp_disk_key
= {0};
2306 btrfs_set_disk_key_objectid(&tmp_disk_key
,
2307 BTRFS_ROOT_ITEM_KEY
);
2308 btrfs_set_disk_key_objectid(&tmp_disk_key
, 0);
2312 btrfs_set_disk_key_objectid(&tmp_disk_key
,
2313 BTRFS_ROOT_TREE_OBJECTID
);
2314 additional_string
= "root ";
2315 next_bytenr
= btrfs_super_root(super_hdr
);
2316 if (state
->print_mask
&
2317 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION
)
2318 printk(KERN_INFO
"root@%llu\n", next_bytenr
);
2321 btrfs_set_disk_key_objectid(&tmp_disk_key
,
2322 BTRFS_CHUNK_TREE_OBJECTID
);
2323 additional_string
= "chunk ";
2324 next_bytenr
= btrfs_super_chunk_root(super_hdr
);
2325 if (state
->print_mask
&
2326 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION
)
2327 printk(KERN_INFO
"chunk@%llu\n", next_bytenr
);
2330 btrfs_set_disk_key_objectid(&tmp_disk_key
,
2331 BTRFS_TREE_LOG_OBJECTID
);
2332 additional_string
= "log ";
2333 next_bytenr
= btrfs_super_log_root(super_hdr
);
2334 if (0 == next_bytenr
)
2336 if (state
->print_mask
&
2337 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION
)
2338 printk(KERN_INFO
"log@%llu\n", next_bytenr
);
2343 btrfs_num_copies(state
->root
->fs_info
,
2344 next_bytenr
, BTRFS_SUPER_INFO_SIZE
);
2345 if (state
->print_mask
& BTRFSIC_PRINT_MASK_NUM_COPIES
)
2346 printk(KERN_INFO
"num_copies(log_bytenr=%llu) = %d\n",
2347 next_bytenr
, num_copies
);
2348 for (mirror_num
= 1; mirror_num
<= num_copies
; mirror_num
++) {
2351 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
2353 "btrfsic_process_written_superblock("
2354 "mirror_num=%d)\n", mirror_num
);
2355 ret
= btrfsic_map_block(state
, next_bytenr
,
2356 BTRFS_SUPER_INFO_SIZE
,
2357 &tmp_next_block_ctx
,
2361 "btrfsic: btrfsic_map_block(@%llu,"
2362 " mirror=%d) failed!\n",
2363 next_bytenr
, mirror_num
);
2367 next_block
= btrfsic_block_lookup_or_add(
2369 &tmp_next_block_ctx
,
2374 if (NULL
== next_block
) {
2376 "btrfsic: error, kmalloc failed!\n");
2377 btrfsic_release_block_ctx(&tmp_next_block_ctx
);
2381 next_block
->disk_key
= tmp_disk_key
;
2383 next_block
->generation
=
2384 BTRFSIC_GENERATION_UNKNOWN
;
2385 l
= btrfsic_block_link_lookup_or_add(
2387 &tmp_next_block_ctx
,
2390 BTRFSIC_GENERATION_UNKNOWN
);
2391 btrfsic_release_block_ctx(&tmp_next_block_ctx
);
2397 if (WARN_ON(-1 == btrfsic_check_all_ref_blocks(state
, superblock
, 0)))
2398 btrfsic_dump_tree(state
);
2403 static int btrfsic_check_all_ref_blocks(struct btrfsic_state
*state
,
2404 struct btrfsic_block
*const block
,
2405 int recursion_level
)
2407 const struct btrfsic_block_link
*l
;
2410 if (recursion_level
>= 3 + BTRFS_MAX_LEVEL
) {
2412 * Note that this situation can happen and does not
2413 * indicate an error in regular cases. It happens
2414 * when disk blocks are freed and later reused.
2415 * The check-integrity module is not aware of any
2416 * block free operations, it just recognizes block
2417 * write operations. Therefore it keeps the linkage
2418 * information for a block until a block is
2419 * rewritten. This can temporarily cause incorrect
2420 * and even circular linkage informations. This
2421 * causes no harm unless such blocks are referenced
2422 * by the most recent super block.
2424 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
2426 "btrfsic: abort cyclic linkage (case 1).\n");
2432 * This algorithm is recursive because the amount of used stack
2433 * space is very small and the max recursion depth is limited.
2435 list_for_each_entry(l
, &block
->ref_to_list
, node_ref_to
) {
2436 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
2438 "rl=%d, %c @%llu (%s/%llu/%d)"
2439 " %u* refers to %c @%llu (%s/%llu/%d)\n",
2441 btrfsic_get_block_type(state
, block
),
2442 block
->logical_bytenr
, block
->dev_state
->name
,
2443 block
->dev_bytenr
, block
->mirror_num
,
2445 btrfsic_get_block_type(state
, l
->block_ref_to
),
2446 l
->block_ref_to
->logical_bytenr
,
2447 l
->block_ref_to
->dev_state
->name
,
2448 l
->block_ref_to
->dev_bytenr
,
2449 l
->block_ref_to
->mirror_num
);
2450 if (l
->block_ref_to
->never_written
) {
2451 printk(KERN_INFO
"btrfs: attempt to write superblock"
2452 " which references block %c @%llu (%s/%llu/%d)"
2453 " which is never written!\n",
2454 btrfsic_get_block_type(state
, l
->block_ref_to
),
2455 l
->block_ref_to
->logical_bytenr
,
2456 l
->block_ref_to
->dev_state
->name
,
2457 l
->block_ref_to
->dev_bytenr
,
2458 l
->block_ref_to
->mirror_num
);
2460 } else if (!l
->block_ref_to
->is_iodone
) {
2461 printk(KERN_INFO
"btrfs: attempt to write superblock"
2462 " which references block %c @%llu (%s/%llu/%d)"
2463 " which is not yet iodone!\n",
2464 btrfsic_get_block_type(state
, l
->block_ref_to
),
2465 l
->block_ref_to
->logical_bytenr
,
2466 l
->block_ref_to
->dev_state
->name
,
2467 l
->block_ref_to
->dev_bytenr
,
2468 l
->block_ref_to
->mirror_num
);
2470 } else if (l
->block_ref_to
->iodone_w_error
) {
2471 printk(KERN_INFO
"btrfs: attempt to write superblock"
2472 " which references block %c @%llu (%s/%llu/%d)"
2473 " which has write error!\n",
2474 btrfsic_get_block_type(state
, l
->block_ref_to
),
2475 l
->block_ref_to
->logical_bytenr
,
2476 l
->block_ref_to
->dev_state
->name
,
2477 l
->block_ref_to
->dev_bytenr
,
2478 l
->block_ref_to
->mirror_num
);
2480 } else if (l
->parent_generation
!=
2481 l
->block_ref_to
->generation
&&
2482 BTRFSIC_GENERATION_UNKNOWN
!=
2483 l
->parent_generation
&&
2484 BTRFSIC_GENERATION_UNKNOWN
!=
2485 l
->block_ref_to
->generation
) {
2486 printk(KERN_INFO
"btrfs: attempt to write superblock"
2487 " which references block %c @%llu (%s/%llu/%d)"
2488 " with generation %llu !="
2489 " parent generation %llu!\n",
2490 btrfsic_get_block_type(state
, l
->block_ref_to
),
2491 l
->block_ref_to
->logical_bytenr
,
2492 l
->block_ref_to
->dev_state
->name
,
2493 l
->block_ref_to
->dev_bytenr
,
2494 l
->block_ref_to
->mirror_num
,
2495 l
->block_ref_to
->generation
,
2496 l
->parent_generation
);
2498 } else if (l
->block_ref_to
->flush_gen
>
2499 l
->block_ref_to
->dev_state
->last_flush_gen
) {
2500 printk(KERN_INFO
"btrfs: attempt to write superblock"
2501 " which references block %c @%llu (%s/%llu/%d)"
2502 " which is not flushed out of disk's write cache"
2503 " (block flush_gen=%llu,"
2504 " dev->flush_gen=%llu)!\n",
2505 btrfsic_get_block_type(state
, l
->block_ref_to
),
2506 l
->block_ref_to
->logical_bytenr
,
2507 l
->block_ref_to
->dev_state
->name
,
2508 l
->block_ref_to
->dev_bytenr
,
2509 l
->block_ref_to
->mirror_num
, block
->flush_gen
,
2510 l
->block_ref_to
->dev_state
->last_flush_gen
);
2512 } else if (-1 == btrfsic_check_all_ref_blocks(state
,
2523 static int btrfsic_is_block_ref_by_superblock(
2524 const struct btrfsic_state
*state
,
2525 const struct btrfsic_block
*block
,
2526 int recursion_level
)
2528 const struct btrfsic_block_link
*l
;
2530 if (recursion_level
>= 3 + BTRFS_MAX_LEVEL
) {
2531 /* refer to comment at "abort cyclic linkage (case 1)" */
2532 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
2534 "btrfsic: abort cyclic linkage (case 2).\n");
2540 * This algorithm is recursive because the amount of used stack space
2541 * is very small and the max recursion depth is limited.
2543 list_for_each_entry(l
, &block
->ref_from_list
, node_ref_from
) {
2544 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
2546 "rl=%d, %c @%llu (%s/%llu/%d)"
2547 " is ref %u* from %c @%llu (%s/%llu/%d)\n",
2549 btrfsic_get_block_type(state
, block
),
2550 block
->logical_bytenr
, block
->dev_state
->name
,
2551 block
->dev_bytenr
, block
->mirror_num
,
2553 btrfsic_get_block_type(state
, l
->block_ref_from
),
2554 l
->block_ref_from
->logical_bytenr
,
2555 l
->block_ref_from
->dev_state
->name
,
2556 l
->block_ref_from
->dev_bytenr
,
2557 l
->block_ref_from
->mirror_num
);
2558 if (l
->block_ref_from
->is_superblock
&&
2559 state
->latest_superblock
->dev_bytenr
==
2560 l
->block_ref_from
->dev_bytenr
&&
2561 state
->latest_superblock
->dev_state
->bdev
==
2562 l
->block_ref_from
->dev_state
->bdev
)
2564 else if (btrfsic_is_block_ref_by_superblock(state
,
2574 static void btrfsic_print_add_link(const struct btrfsic_state
*state
,
2575 const struct btrfsic_block_link
*l
)
2578 "Add %u* link from %c @%llu (%s/%llu/%d)"
2579 " to %c @%llu (%s/%llu/%d).\n",
2581 btrfsic_get_block_type(state
, l
->block_ref_from
),
2582 l
->block_ref_from
->logical_bytenr
,
2583 l
->block_ref_from
->dev_state
->name
,
2584 l
->block_ref_from
->dev_bytenr
, l
->block_ref_from
->mirror_num
,
2585 btrfsic_get_block_type(state
, l
->block_ref_to
),
2586 l
->block_ref_to
->logical_bytenr
,
2587 l
->block_ref_to
->dev_state
->name
, l
->block_ref_to
->dev_bytenr
,
2588 l
->block_ref_to
->mirror_num
);
2591 static void btrfsic_print_rem_link(const struct btrfsic_state
*state
,
2592 const struct btrfsic_block_link
*l
)
2595 "Rem %u* link from %c @%llu (%s/%llu/%d)"
2596 " to %c @%llu (%s/%llu/%d).\n",
2598 btrfsic_get_block_type(state
, l
->block_ref_from
),
2599 l
->block_ref_from
->logical_bytenr
,
2600 l
->block_ref_from
->dev_state
->name
,
2601 l
->block_ref_from
->dev_bytenr
, l
->block_ref_from
->mirror_num
,
2602 btrfsic_get_block_type(state
, l
->block_ref_to
),
2603 l
->block_ref_to
->logical_bytenr
,
2604 l
->block_ref_to
->dev_state
->name
, l
->block_ref_to
->dev_bytenr
,
2605 l
->block_ref_to
->mirror_num
);
2608 static char btrfsic_get_block_type(const struct btrfsic_state
*state
,
2609 const struct btrfsic_block
*block
)
2611 if (block
->is_superblock
&&
2612 state
->latest_superblock
->dev_bytenr
== block
->dev_bytenr
&&
2613 state
->latest_superblock
->dev_state
->bdev
== block
->dev_state
->bdev
)
2615 else if (block
->is_superblock
)
2617 else if (block
->is_metadata
)
2623 static void btrfsic_dump_tree(const struct btrfsic_state
*state
)
2625 btrfsic_dump_tree_sub(state
, state
->latest_superblock
, 0);
2628 static void btrfsic_dump_tree_sub(const struct btrfsic_state
*state
,
2629 const struct btrfsic_block
*block
,
2632 const struct btrfsic_block_link
*l
;
2634 static char buf
[80];
2635 int cursor_position
;
2638 * Should better fill an on-stack buffer with a complete line and
2639 * dump it at once when it is time to print a newline character.
2643 * This algorithm is recursive because the amount of used stack space
2644 * is very small and the max recursion depth is limited.
2646 indent_add
= sprintf(buf
, "%c-%llu(%s/%llu/%d)",
2647 btrfsic_get_block_type(state
, block
),
2648 block
->logical_bytenr
, block
->dev_state
->name
,
2649 block
->dev_bytenr
, block
->mirror_num
);
2650 if (indent_level
+ indent_add
> BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL
) {
2655 indent_level
+= indent_add
;
2656 if (list_empty(&block
->ref_to_list
)) {
2660 if (block
->mirror_num
> 1 &&
2661 !(state
->print_mask
& BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS
)) {
2666 cursor_position
= indent_level
;
2667 list_for_each_entry(l
, &block
->ref_to_list
, node_ref_to
) {
2668 while (cursor_position
< indent_level
) {
2673 indent_add
= sprintf(buf
, " %d*--> ", l
->ref_cnt
);
2675 indent_add
= sprintf(buf
, " --> ");
2676 if (indent_level
+ indent_add
>
2677 BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL
) {
2679 cursor_position
= 0;
2685 btrfsic_dump_tree_sub(state
, l
->block_ref_to
,
2686 indent_level
+ indent_add
);
2687 cursor_position
= 0;
2691 static struct btrfsic_block_link
*btrfsic_block_link_lookup_or_add(
2692 struct btrfsic_state
*state
,
2693 struct btrfsic_block_data_ctx
*next_block_ctx
,
2694 struct btrfsic_block
*next_block
,
2695 struct btrfsic_block
*from_block
,
2696 u64 parent_generation
)
2698 struct btrfsic_block_link
*l
;
2700 l
= btrfsic_block_link_hashtable_lookup(next_block_ctx
->dev
->bdev
,
2701 next_block_ctx
->dev_bytenr
,
2702 from_block
->dev_state
->bdev
,
2703 from_block
->dev_bytenr
,
2704 &state
->block_link_hashtable
);
2706 l
= btrfsic_block_link_alloc();
2709 "btrfsic: error, kmalloc" " failed!\n");
2713 l
->block_ref_to
= next_block
;
2714 l
->block_ref_from
= from_block
;
2716 l
->parent_generation
= parent_generation
;
2718 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
2719 btrfsic_print_add_link(state
, l
);
2721 list_add(&l
->node_ref_to
, &from_block
->ref_to_list
);
2722 list_add(&l
->node_ref_from
, &next_block
->ref_from_list
);
2724 btrfsic_block_link_hashtable_add(l
,
2725 &state
->block_link_hashtable
);
2728 l
->parent_generation
= parent_generation
;
2729 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
2730 btrfsic_print_add_link(state
, l
);
2736 static struct btrfsic_block
*btrfsic_block_lookup_or_add(
2737 struct btrfsic_state
*state
,
2738 struct btrfsic_block_data_ctx
*block_ctx
,
2739 const char *additional_string
,
2746 struct btrfsic_block
*block
;
2748 block
= btrfsic_block_hashtable_lookup(block_ctx
->dev
->bdev
,
2749 block_ctx
->dev_bytenr
,
2750 &state
->block_hashtable
);
2751 if (NULL
== block
) {
2752 struct btrfsic_dev_state
*dev_state
;
2754 block
= btrfsic_block_alloc();
2755 if (NULL
== block
) {
2756 printk(KERN_INFO
"btrfsic: error, kmalloc failed!\n");
2759 dev_state
= btrfsic_dev_state_lookup(block_ctx
->dev
->bdev
);
2760 if (NULL
== dev_state
) {
2762 "btrfsic: error, lookup dev_state failed!\n");
2763 btrfsic_block_free(block
);
2766 block
->dev_state
= dev_state
;
2767 block
->dev_bytenr
= block_ctx
->dev_bytenr
;
2768 block
->logical_bytenr
= block_ctx
->start
;
2769 block
->is_metadata
= is_metadata
;
2770 block
->is_iodone
= is_iodone
;
2771 block
->never_written
= never_written
;
2772 block
->mirror_num
= mirror_num
;
2773 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
2775 "New %s%c-block @%llu (%s/%llu/%d)\n",
2777 btrfsic_get_block_type(state
, block
),
2778 block
->logical_bytenr
, dev_state
->name
,
2779 block
->dev_bytenr
, mirror_num
);
2780 list_add(&block
->all_blocks_node
, &state
->all_blocks_list
);
2781 btrfsic_block_hashtable_add(block
, &state
->block_hashtable
);
2782 if (NULL
!= was_created
)
2785 if (NULL
!= was_created
)
2792 static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state
*state
,
2794 struct btrfsic_dev_state
*dev_state
,
2800 struct btrfsic_block_data_ctx block_ctx
;
2803 num_copies
= btrfs_num_copies(state
->root
->fs_info
,
2804 bytenr
, state
->metablock_size
);
2806 for (mirror_num
= 1; mirror_num
<= num_copies
; mirror_num
++) {
2807 ret
= btrfsic_map_block(state
, bytenr
, state
->metablock_size
,
2808 &block_ctx
, mirror_num
);
2810 printk(KERN_INFO
"btrfsic:"
2811 " btrfsic_map_block(logical @%llu,"
2812 " mirror %d) failed!\n",
2813 bytenr
, mirror_num
);
2817 if (dev_state
->bdev
== block_ctx
.dev
->bdev
&&
2818 dev_bytenr
== block_ctx
.dev_bytenr
) {
2820 btrfsic_release_block_ctx(&block_ctx
);
2823 btrfsic_release_block_ctx(&block_ctx
);
2826 if (WARN_ON(!match
)) {
2827 printk(KERN_INFO
"btrfs: attempt to write M-block which contains logical bytenr that doesn't map to dev+physical bytenr of submit_bio,"
2828 " buffer->log_bytenr=%llu, submit_bio(bdev=%s,"
2829 " phys_bytenr=%llu)!\n",
2830 bytenr
, dev_state
->name
, dev_bytenr
);
2831 for (mirror_num
= 1; mirror_num
<= num_copies
; mirror_num
++) {
2832 ret
= btrfsic_map_block(state
, bytenr
,
2833 state
->metablock_size
,
2834 &block_ctx
, mirror_num
);
2838 printk(KERN_INFO
"Read logical bytenr @%llu maps to"
2840 bytenr
, block_ctx
.dev
->name
,
2841 block_ctx
.dev_bytenr
, mirror_num
);
2846 static struct btrfsic_dev_state
*btrfsic_dev_state_lookup(
2847 struct block_device
*bdev
)
2849 struct btrfsic_dev_state
*ds
;
2851 ds
= btrfsic_dev_state_hashtable_lookup(bdev
,
2852 &btrfsic_dev_state_hashtable
);
2856 int btrfsic_submit_bh(int rw
, struct buffer_head
*bh
)
2858 struct btrfsic_dev_state
*dev_state
;
2860 if (!btrfsic_is_initialized
)
2861 return submit_bh(rw
, bh
);
2863 mutex_lock(&btrfsic_mutex
);
2864 /* since btrfsic_submit_bh() might also be called before
2865 * btrfsic_mount(), this might return NULL */
2866 dev_state
= btrfsic_dev_state_lookup(bh
->b_bdev
);
2868 /* Only called to write the superblock (incl. FLUSH/FUA) */
2869 if (NULL
!= dev_state
&&
2870 (rw
& WRITE
) && bh
->b_size
> 0) {
2873 dev_bytenr
= 4096 * bh
->b_blocknr
;
2874 if (dev_state
->state
->print_mask
&
2875 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH
)
2877 "submit_bh(rw=0x%x, blocknr=%llu (bytenr %llu),"
2878 " size=%zu, data=%p, bdev=%p)\n",
2879 rw
, (unsigned long long)bh
->b_blocknr
,
2880 dev_bytenr
, bh
->b_size
, bh
->b_data
, bh
->b_bdev
);
2881 btrfsic_process_written_block(dev_state
, dev_bytenr
,
2882 &bh
->b_data
, 1, NULL
,
2884 } else if (NULL
!= dev_state
&& (rw
& REQ_FLUSH
)) {
2885 if (dev_state
->state
->print_mask
&
2886 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH
)
2888 "submit_bh(rw=0x%x FLUSH, bdev=%p)\n",
2890 if (!dev_state
->dummy_block_for_bio_bh_flush
.is_iodone
) {
2891 if ((dev_state
->state
->print_mask
&
2892 (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH
|
2893 BTRFSIC_PRINT_MASK_VERBOSE
)))
2895 "btrfsic_submit_bh(%s) with FLUSH"
2896 " but dummy block already in use"
2900 struct btrfsic_block
*const block
=
2901 &dev_state
->dummy_block_for_bio_bh_flush
;
2903 block
->is_iodone
= 0;
2904 block
->never_written
= 0;
2905 block
->iodone_w_error
= 0;
2906 block
->flush_gen
= dev_state
->last_flush_gen
+ 1;
2907 block
->submit_bio_bh_rw
= rw
;
2908 block
->orig_bio_bh_private
= bh
->b_private
;
2909 block
->orig_bio_bh_end_io
.bh
= bh
->b_end_io
;
2910 block
->next_in_same_bio
= NULL
;
2911 bh
->b_private
= block
;
2912 bh
->b_end_io
= btrfsic_bh_end_io
;
2915 mutex_unlock(&btrfsic_mutex
);
2916 return submit_bh(rw
, bh
);
2919 static void __btrfsic_submit_bio(int rw
, struct bio
*bio
)
2921 struct btrfsic_dev_state
*dev_state
;
2923 if (!btrfsic_is_initialized
)
2926 mutex_lock(&btrfsic_mutex
);
2927 /* since btrfsic_submit_bio() is also called before
2928 * btrfsic_mount(), this might return NULL */
2929 dev_state
= btrfsic_dev_state_lookup(bio
->bi_bdev
);
2930 if (NULL
!= dev_state
&&
2931 (rw
& WRITE
) && NULL
!= bio
->bi_io_vec
) {
2936 char **mapped_datav
;
2938 dev_bytenr
= 512 * bio
->bi_iter
.bi_sector
;
2940 if (dev_state
->state
->print_mask
&
2941 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH
)
2943 "submit_bio(rw=0x%x, bi_vcnt=%u,"
2944 " bi_sector=%llu (bytenr %llu), bi_bdev=%p)\n",
2946 (unsigned long long)bio
->bi_iter
.bi_sector
,
2947 dev_bytenr
, bio
->bi_bdev
);
2949 mapped_datav
= kmalloc_array(bio
->bi_vcnt
,
2950 sizeof(*mapped_datav
), GFP_NOFS
);
2953 cur_bytenr
= dev_bytenr
;
2954 for (i
= 0; i
< bio
->bi_vcnt
; i
++) {
2955 BUG_ON(bio
->bi_io_vec
[i
].bv_len
!= PAGE_CACHE_SIZE
);
2956 mapped_datav
[i
] = kmap(bio
->bi_io_vec
[i
].bv_page
);
2957 if (!mapped_datav
[i
]) {
2960 kunmap(bio
->bi_io_vec
[i
].bv_page
);
2962 kfree(mapped_datav
);
2965 if (dev_state
->state
->print_mask
&
2966 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH_VERBOSE
)
2968 "#%u: bytenr=%llu, len=%u, offset=%u\n",
2969 i
, cur_bytenr
, bio
->bi_io_vec
[i
].bv_len
,
2970 bio
->bi_io_vec
[i
].bv_offset
);
2971 cur_bytenr
+= bio
->bi_io_vec
[i
].bv_len
;
2973 btrfsic_process_written_block(dev_state
, dev_bytenr
,
2974 mapped_datav
, bio
->bi_vcnt
,
2975 bio
, &bio_is_patched
,
2979 kunmap(bio
->bi_io_vec
[i
].bv_page
);
2981 kfree(mapped_datav
);
2982 } else if (NULL
!= dev_state
&& (rw
& REQ_FLUSH
)) {
2983 if (dev_state
->state
->print_mask
&
2984 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH
)
2986 "submit_bio(rw=0x%x FLUSH, bdev=%p)\n",
2988 if (!dev_state
->dummy_block_for_bio_bh_flush
.is_iodone
) {
2989 if ((dev_state
->state
->print_mask
&
2990 (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH
|
2991 BTRFSIC_PRINT_MASK_VERBOSE
)))
2993 "btrfsic_submit_bio(%s) with FLUSH"
2994 " but dummy block already in use"
2998 struct btrfsic_block
*const block
=
2999 &dev_state
->dummy_block_for_bio_bh_flush
;
3001 block
->is_iodone
= 0;
3002 block
->never_written
= 0;
3003 block
->iodone_w_error
= 0;
3004 block
->flush_gen
= dev_state
->last_flush_gen
+ 1;
3005 block
->submit_bio_bh_rw
= rw
;
3006 block
->orig_bio_bh_private
= bio
->bi_private
;
3007 block
->orig_bio_bh_end_io
.bio
= bio
->bi_end_io
;
3008 block
->next_in_same_bio
= NULL
;
3009 bio
->bi_private
= block
;
3010 bio
->bi_end_io
= btrfsic_bio_end_io
;
3014 mutex_unlock(&btrfsic_mutex
);
3017 void btrfsic_submit_bio(int rw
, struct bio
*bio
)
3019 __btrfsic_submit_bio(rw
, bio
);
3020 submit_bio(rw
, bio
);
3023 int btrfsic_submit_bio_wait(int rw
, struct bio
*bio
)
3025 __btrfsic_submit_bio(rw
, bio
);
3026 return submit_bio_wait(rw
, bio
);
3029 int btrfsic_mount(struct btrfs_root
*root
,
3030 struct btrfs_fs_devices
*fs_devices
,
3031 int including_extent_data
, u32 print_mask
)
3034 struct btrfsic_state
*state
;
3035 struct list_head
*dev_head
= &fs_devices
->devices
;
3036 struct btrfs_device
*device
;
3038 if (root
->nodesize
& ((u64
)PAGE_CACHE_SIZE
- 1)) {
3040 "btrfsic: cannot handle nodesize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
3041 root
->nodesize
, PAGE_CACHE_SIZE
);
3044 if (root
->sectorsize
& ((u64
)PAGE_CACHE_SIZE
- 1)) {
3046 "btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
3047 root
->sectorsize
, PAGE_CACHE_SIZE
);
3050 state
= kzalloc(sizeof(*state
), GFP_KERNEL
| __GFP_NOWARN
| __GFP_REPEAT
);
3052 state
= vzalloc(sizeof(*state
));
3054 printk(KERN_INFO
"btrfs check-integrity: vzalloc() failed!\n");
3059 if (!btrfsic_is_initialized
) {
3060 mutex_init(&btrfsic_mutex
);
3061 btrfsic_dev_state_hashtable_init(&btrfsic_dev_state_hashtable
);
3062 btrfsic_is_initialized
= 1;
3064 mutex_lock(&btrfsic_mutex
);
3066 state
->print_mask
= print_mask
;
3067 state
->include_extent_data
= including_extent_data
;
3068 state
->csum_size
= 0;
3069 state
->metablock_size
= root
->nodesize
;
3070 state
->datablock_size
= root
->sectorsize
;
3071 INIT_LIST_HEAD(&state
->all_blocks_list
);
3072 btrfsic_block_hashtable_init(&state
->block_hashtable
);
3073 btrfsic_block_link_hashtable_init(&state
->block_link_hashtable
);
3074 state
->max_superblock_generation
= 0;
3075 state
->latest_superblock
= NULL
;
3077 list_for_each_entry(device
, dev_head
, dev_list
) {
3078 struct btrfsic_dev_state
*ds
;
3081 if (!device
->bdev
|| !device
->name
)
3084 ds
= btrfsic_dev_state_alloc();
3087 "btrfs check-integrity: kmalloc() failed!\n");
3088 mutex_unlock(&btrfsic_mutex
);
3091 ds
->bdev
= device
->bdev
;
3093 bdevname(ds
->bdev
, ds
->name
);
3094 ds
->name
[BDEVNAME_SIZE
- 1] = '\0';
3095 for (p
= ds
->name
; *p
!= '\0'; p
++);
3096 while (p
> ds
->name
&& *p
!= '/')
3100 strlcpy(ds
->name
, p
, sizeof(ds
->name
));
3101 btrfsic_dev_state_hashtable_add(ds
,
3102 &btrfsic_dev_state_hashtable
);
3105 ret
= btrfsic_process_superblock(state
, fs_devices
);
3107 mutex_unlock(&btrfsic_mutex
);
3108 btrfsic_unmount(root
, fs_devices
);
3112 if (state
->print_mask
& BTRFSIC_PRINT_MASK_INITIAL_DATABASE
)
3113 btrfsic_dump_database(state
);
3114 if (state
->print_mask
& BTRFSIC_PRINT_MASK_INITIAL_TREE
)
3115 btrfsic_dump_tree(state
);
3117 mutex_unlock(&btrfsic_mutex
);
3121 void btrfsic_unmount(struct btrfs_root
*root
,
3122 struct btrfs_fs_devices
*fs_devices
)
3124 struct btrfsic_block
*b_all
, *tmp_all
;
3125 struct btrfsic_state
*state
;
3126 struct list_head
*dev_head
= &fs_devices
->devices
;
3127 struct btrfs_device
*device
;
3129 if (!btrfsic_is_initialized
)
3132 mutex_lock(&btrfsic_mutex
);
3135 list_for_each_entry(device
, dev_head
, dev_list
) {
3136 struct btrfsic_dev_state
*ds
;
3138 if (!device
->bdev
|| !device
->name
)
3141 ds
= btrfsic_dev_state_hashtable_lookup(
3143 &btrfsic_dev_state_hashtable
);
3146 btrfsic_dev_state_hashtable_remove(ds
);
3147 btrfsic_dev_state_free(ds
);
3151 if (NULL
== state
) {
3153 "btrfsic: error, cannot find state information"
3155 mutex_unlock(&btrfsic_mutex
);
3160 * Don't care about keeping the lists' state up to date,
3161 * just free all memory that was allocated dynamically.
3162 * Free the blocks and the block_links.
3164 list_for_each_entry_safe(b_all
, tmp_all
, &state
->all_blocks_list
,
3166 struct btrfsic_block_link
*l
, *tmp
;
3168 list_for_each_entry_safe(l
, tmp
, &b_all
->ref_to_list
,
3170 if (state
->print_mask
& BTRFSIC_PRINT_MASK_VERBOSE
)
3171 btrfsic_print_rem_link(state
, l
);
3174 if (0 == l
->ref_cnt
)
3175 btrfsic_block_link_free(l
);
3178 if (b_all
->is_iodone
|| b_all
->never_written
)
3179 btrfsic_block_free(b_all
);
3181 printk(KERN_INFO
"btrfs: attempt to free %c-block"
3182 " @%llu (%s/%llu/%d) on umount which is"
3183 " not yet iodone!\n",
3184 btrfsic_get_block_type(state
, b_all
),
3185 b_all
->logical_bytenr
, b_all
->dev_state
->name
,
3186 b_all
->dev_bytenr
, b_all
->mirror_num
);
3189 mutex_unlock(&btrfsic_mutex
);