| 1 | #ifndef __EXTENTIO__ |
| 2 | #define __EXTENTIO__ |
| 3 | |
| 4 | #include <linux/rbtree.h> |
| 5 | |
| 6 | /* bits for the extent state */ |
| 7 | #define EXTENT_DIRTY (1U << 0) |
| 8 | #define EXTENT_WRITEBACK (1U << 1) |
| 9 | #define EXTENT_UPTODATE (1U << 2) |
| 10 | #define EXTENT_LOCKED (1U << 3) |
| 11 | #define EXTENT_NEW (1U << 4) |
| 12 | #define EXTENT_DELALLOC (1U << 5) |
| 13 | #define EXTENT_DEFRAG (1U << 6) |
| 14 | #define EXTENT_BOUNDARY (1U << 9) |
| 15 | #define EXTENT_NODATASUM (1U << 10) |
| 16 | #define EXTENT_DO_ACCOUNTING (1U << 11) |
| 17 | #define EXTENT_FIRST_DELALLOC (1U << 12) |
| 18 | #define EXTENT_NEED_WAIT (1U << 13) |
| 19 | #define EXTENT_DAMAGED (1U << 14) |
| 20 | #define EXTENT_NORESERVE (1U << 15) |
| 21 | #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) |
| 22 | #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) |
| 23 | |
| 24 | /* |
| 25 | * flags for bio submission. The high bits indicate the compression |
| 26 | * type for this bio |
| 27 | */ |
| 28 | #define EXTENT_BIO_COMPRESSED 1 |
| 29 | #define EXTENT_BIO_TREE_LOG 2 |
| 30 | #define EXTENT_BIO_PARENT_LOCKED 4 |
| 31 | #define EXTENT_BIO_FLAG_SHIFT 16 |
| 32 | |
| 33 | /* these are bit numbers for test/set bit */ |
| 34 | #define EXTENT_BUFFER_UPTODATE 0 |
| 35 | #define EXTENT_BUFFER_DIRTY 2 |
| 36 | #define EXTENT_BUFFER_CORRUPT 3 |
| 37 | #define EXTENT_BUFFER_READAHEAD 4 /* this got triggered by readahead */ |
| 38 | #define EXTENT_BUFFER_TREE_REF 5 |
| 39 | #define EXTENT_BUFFER_STALE 6 |
| 40 | #define EXTENT_BUFFER_WRITEBACK 7 |
| 41 | #define EXTENT_BUFFER_READ_ERR 8 /* read IO error */ |
| 42 | #define EXTENT_BUFFER_DUMMY 9 |
| 43 | #define EXTENT_BUFFER_IN_TREE 10 |
| 44 | #define EXTENT_BUFFER_WRITE_ERR 11 /* write IO error */ |
| 45 | |
| 46 | /* these are flags for extent_clear_unlock_delalloc */ |
| 47 | #define PAGE_UNLOCK (1 << 0) |
| 48 | #define PAGE_CLEAR_DIRTY (1 << 1) |
| 49 | #define PAGE_SET_WRITEBACK (1 << 2) |
| 50 | #define PAGE_END_WRITEBACK (1 << 3) |
| 51 | #define PAGE_SET_PRIVATE2 (1 << 4) |
| 52 | #define PAGE_SET_ERROR (1 << 5) |
| 53 | |
| 54 | /* |
| 55 | * page->private values. Every page that is controlled by the extent |
| 56 | * map has page->private set to one. |
| 57 | */ |
| 58 | #define EXTENT_PAGE_PRIVATE 1 |
| 59 | |
| 60 | struct extent_state; |
| 61 | struct btrfs_root; |
| 62 | struct btrfs_io_bio; |
| 63 | |
| 64 | typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw, |
| 65 | struct bio *bio, int mirror_num, |
| 66 | unsigned long bio_flags, u64 bio_offset); |
| 67 | struct extent_io_ops { |
| 68 | int (*fill_delalloc)(struct inode *inode, struct page *locked_page, |
| 69 | u64 start, u64 end, int *page_started, |
| 70 | unsigned long *nr_written); |
| 71 | int (*writepage_start_hook)(struct page *page, u64 start, u64 end); |
| 72 | int (*writepage_io_hook)(struct page *page, u64 start, u64 end); |
| 73 | extent_submit_bio_hook_t *submit_bio_hook; |
| 74 | int (*merge_bio_hook)(int rw, struct page *page, unsigned long offset, |
| 75 | size_t size, struct bio *bio, |
| 76 | unsigned long bio_flags); |
| 77 | int (*readpage_io_failed_hook)(struct page *page, int failed_mirror); |
| 78 | int (*readpage_end_io_hook)(struct btrfs_io_bio *io_bio, u64 phy_offset, |
| 79 | struct page *page, u64 start, u64 end, |
| 80 | int mirror); |
| 81 | int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, |
| 82 | struct extent_state *state, int uptodate); |
| 83 | void (*set_bit_hook)(struct inode *inode, struct extent_state *state, |
| 84 | unsigned *bits); |
| 85 | void (*clear_bit_hook)(struct inode *inode, struct extent_state *state, |
| 86 | unsigned *bits); |
| 87 | void (*merge_extent_hook)(struct inode *inode, |
| 88 | struct extent_state *new, |
| 89 | struct extent_state *other); |
| 90 | void (*split_extent_hook)(struct inode *inode, |
| 91 | struct extent_state *orig, u64 split); |
| 92 | }; |
| 93 | |
| 94 | struct extent_io_tree { |
| 95 | struct rb_root state; |
| 96 | struct address_space *mapping; |
| 97 | u64 dirty_bytes; |
| 98 | int track_uptodate; |
| 99 | spinlock_t lock; |
| 100 | const struct extent_io_ops *ops; |
| 101 | }; |
| 102 | |
| 103 | struct extent_state { |
| 104 | u64 start; |
| 105 | u64 end; /* inclusive */ |
| 106 | struct rb_node rb_node; |
| 107 | |
| 108 | /* ADD NEW ELEMENTS AFTER THIS */ |
| 109 | wait_queue_head_t wq; |
| 110 | atomic_t refs; |
| 111 | unsigned state; |
| 112 | |
| 113 | /* for use by the FS */ |
| 114 | u64 private; |
| 115 | |
| 116 | #ifdef CONFIG_BTRFS_DEBUG |
| 117 | struct list_head leak_list; |
| 118 | #endif |
| 119 | }; |
| 120 | |
| 121 | #define INLINE_EXTENT_BUFFER_PAGES 16 |
| 122 | #define MAX_INLINE_EXTENT_BUFFER_SIZE (INLINE_EXTENT_BUFFER_PAGES * PAGE_CACHE_SIZE) |
| 123 | struct extent_buffer { |
| 124 | u64 start; |
| 125 | unsigned long len; |
| 126 | unsigned long bflags; |
| 127 | struct btrfs_fs_info *fs_info; |
| 128 | spinlock_t refs_lock; |
| 129 | atomic_t refs; |
| 130 | atomic_t io_pages; |
| 131 | int read_mirror; |
| 132 | struct rcu_head rcu_head; |
| 133 | pid_t lock_owner; |
| 134 | |
| 135 | /* count of read lock holders on the extent buffer */ |
| 136 | atomic_t write_locks; |
| 137 | atomic_t read_locks; |
| 138 | atomic_t blocking_writers; |
| 139 | atomic_t blocking_readers; |
| 140 | atomic_t spinning_readers; |
| 141 | atomic_t spinning_writers; |
| 142 | short lock_nested; |
| 143 | /* >= 0 if eb belongs to a log tree, -1 otherwise */ |
| 144 | short log_index; |
| 145 | |
| 146 | /* protects write locks */ |
| 147 | rwlock_t lock; |
| 148 | |
| 149 | /* readers use lock_wq while they wait for the write |
| 150 | * lock holders to unlock |
| 151 | */ |
| 152 | wait_queue_head_t write_lock_wq; |
| 153 | |
| 154 | /* writers use read_lock_wq while they wait for readers |
| 155 | * to unlock |
| 156 | */ |
| 157 | wait_queue_head_t read_lock_wq; |
| 158 | struct page *pages[INLINE_EXTENT_BUFFER_PAGES]; |
| 159 | #ifdef CONFIG_BTRFS_DEBUG |
| 160 | struct list_head leak_list; |
| 161 | #endif |
| 162 | }; |
| 163 | |
| 164 | static inline void extent_set_compress_type(unsigned long *bio_flags, |
| 165 | int compress_type) |
| 166 | { |
| 167 | *bio_flags |= compress_type << EXTENT_BIO_FLAG_SHIFT; |
| 168 | } |
| 169 | |
| 170 | static inline int extent_compress_type(unsigned long bio_flags) |
| 171 | { |
| 172 | return bio_flags >> EXTENT_BIO_FLAG_SHIFT; |
| 173 | } |
| 174 | |
| 175 | struct extent_map_tree; |
| 176 | |
| 177 | typedef struct extent_map *(get_extent_t)(struct inode *inode, |
| 178 | struct page *page, |
| 179 | size_t pg_offset, |
| 180 | u64 start, u64 len, |
| 181 | int create); |
| 182 | |
| 183 | void extent_io_tree_init(struct extent_io_tree *tree, |
| 184 | struct address_space *mapping); |
| 185 | int try_release_extent_mapping(struct extent_map_tree *map, |
| 186 | struct extent_io_tree *tree, struct page *page, |
| 187 | gfp_t mask); |
| 188 | int try_release_extent_buffer(struct page *page); |
| 189 | int lock_extent(struct extent_io_tree *tree, u64 start, u64 end); |
| 190 | int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, |
| 191 | unsigned bits, struct extent_state **cached); |
| 192 | int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end); |
| 193 | int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end, |
| 194 | struct extent_state **cached, gfp_t mask); |
| 195 | int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end); |
| 196 | int extent_read_full_page(struct extent_io_tree *tree, struct page *page, |
| 197 | get_extent_t *get_extent, int mirror_num); |
| 198 | int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page, |
| 199 | get_extent_t *get_extent, int mirror_num); |
| 200 | int __init extent_io_init(void); |
| 201 | void extent_io_exit(void); |
| 202 | |
| 203 | u64 count_range_bits(struct extent_io_tree *tree, |
| 204 | u64 *start, u64 search_end, |
| 205 | u64 max_bytes, unsigned bits, int contig); |
| 206 | |
| 207 | void free_extent_state(struct extent_state *state); |
| 208 | int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, |
| 209 | unsigned bits, int filled, |
| 210 | struct extent_state *cached_state); |
| 211 | int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, |
| 212 | unsigned bits, gfp_t mask); |
| 213 | int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, |
| 214 | unsigned bits, int wake, int delete, |
| 215 | struct extent_state **cached, gfp_t mask); |
| 216 | int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, |
| 217 | unsigned bits, gfp_t mask); |
| 218 | int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, |
| 219 | unsigned bits, u64 *failed_start, |
| 220 | struct extent_state **cached_state, gfp_t mask); |
| 221 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, |
| 222 | struct extent_state **cached_state, gfp_t mask); |
| 223 | int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, |
| 224 | struct extent_state **cached_state, gfp_t mask); |
| 225 | int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, |
| 226 | gfp_t mask); |
| 227 | int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, |
| 228 | gfp_t mask); |
| 229 | int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, |
| 230 | gfp_t mask); |
| 231 | int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, |
| 232 | unsigned bits, unsigned clear_bits, |
| 233 | struct extent_state **cached_state, gfp_t mask); |
| 234 | int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, |
| 235 | struct extent_state **cached_state, gfp_t mask); |
| 236 | int set_extent_defrag(struct extent_io_tree *tree, u64 start, u64 end, |
| 237 | struct extent_state **cached_state, gfp_t mask); |
| 238 | int find_first_extent_bit(struct extent_io_tree *tree, u64 start, |
| 239 | u64 *start_ret, u64 *end_ret, unsigned bits, |
| 240 | struct extent_state **cached_state); |
| 241 | int extent_invalidatepage(struct extent_io_tree *tree, |
| 242 | struct page *page, unsigned long offset); |
| 243 | int extent_write_full_page(struct extent_io_tree *tree, struct page *page, |
| 244 | get_extent_t *get_extent, |
| 245 | struct writeback_control *wbc); |
| 246 | int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, |
| 247 | u64 start, u64 end, get_extent_t *get_extent, |
| 248 | int mode); |
| 249 | int extent_writepages(struct extent_io_tree *tree, |
| 250 | struct address_space *mapping, |
| 251 | get_extent_t *get_extent, |
| 252 | struct writeback_control *wbc); |
| 253 | int btree_write_cache_pages(struct address_space *mapping, |
| 254 | struct writeback_control *wbc); |
| 255 | int extent_readpages(struct extent_io_tree *tree, |
| 256 | struct address_space *mapping, |
| 257 | struct list_head *pages, unsigned nr_pages, |
| 258 | get_extent_t get_extent); |
| 259 | int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
| 260 | __u64 start, __u64 len, get_extent_t *get_extent); |
| 261 | int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private); |
| 262 | void set_page_extent_mapped(struct page *page); |
| 263 | |
| 264 | struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, |
| 265 | u64 start); |
| 266 | struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info, |
| 267 | u64 start); |
| 268 | struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src); |
| 269 | struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info, |
| 270 | u64 start); |
| 271 | void free_extent_buffer(struct extent_buffer *eb); |
| 272 | void free_extent_buffer_stale(struct extent_buffer *eb); |
| 273 | #define WAIT_NONE 0 |
| 274 | #define WAIT_COMPLETE 1 |
| 275 | #define WAIT_PAGE_LOCK 2 |
| 276 | int read_extent_buffer_pages(struct extent_io_tree *tree, |
| 277 | struct extent_buffer *eb, u64 start, int wait, |
| 278 | get_extent_t *get_extent, int mirror_num); |
| 279 | void wait_on_extent_buffer_writeback(struct extent_buffer *eb); |
| 280 | |
| 281 | static inline unsigned long num_extent_pages(u64 start, u64 len) |
| 282 | { |
| 283 | return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) - |
| 284 | (start >> PAGE_CACHE_SHIFT); |
| 285 | } |
| 286 | |
| 287 | static inline void extent_buffer_get(struct extent_buffer *eb) |
| 288 | { |
| 289 | atomic_inc(&eb->refs); |
| 290 | } |
| 291 | |
| 292 | int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, |
| 293 | unsigned long start, |
| 294 | unsigned long len); |
| 295 | void read_extent_buffer(struct extent_buffer *eb, void *dst, |
| 296 | unsigned long start, |
| 297 | unsigned long len); |
| 298 | int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dst, |
| 299 | unsigned long start, |
| 300 | unsigned long len); |
| 301 | void write_extent_buffer(struct extent_buffer *eb, const void *src, |
| 302 | unsigned long start, unsigned long len); |
| 303 | void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, |
| 304 | unsigned long dst_offset, unsigned long src_offset, |
| 305 | unsigned long len); |
| 306 | void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, |
| 307 | unsigned long src_offset, unsigned long len); |
| 308 | void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, |
| 309 | unsigned long src_offset, unsigned long len); |
| 310 | void memset_extent_buffer(struct extent_buffer *eb, char c, |
| 311 | unsigned long start, unsigned long len); |
| 312 | int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start, |
| 313 | unsigned long pos); |
| 314 | void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start, |
| 315 | unsigned long pos, unsigned long len); |
| 316 | void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start, |
| 317 | unsigned long pos, unsigned long len); |
| 318 | void clear_extent_buffer_dirty(struct extent_buffer *eb); |
| 319 | int set_extent_buffer_dirty(struct extent_buffer *eb); |
| 320 | int set_extent_buffer_uptodate(struct extent_buffer *eb); |
| 321 | int clear_extent_buffer_uptodate(struct extent_buffer *eb); |
| 322 | int extent_buffer_uptodate(struct extent_buffer *eb); |
| 323 | int extent_buffer_under_io(struct extent_buffer *eb); |
| 324 | int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, |
| 325 | unsigned long min_len, char **map, |
| 326 | unsigned long *map_start, |
| 327 | unsigned long *map_len); |
| 328 | int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end); |
| 329 | int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end); |
| 330 | int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end, |
| 331 | struct page *locked_page, |
| 332 | unsigned bits_to_clear, |
| 333 | unsigned long page_ops); |
| 334 | struct bio * |
| 335 | btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, |
| 336 | gfp_t gfp_flags); |
| 337 | struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs); |
| 338 | struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask); |
| 339 | |
| 340 | struct btrfs_fs_info; |
| 341 | |
| 342 | int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical, |
| 343 | struct page *page, unsigned int pg_offset, |
| 344 | int mirror_num); |
| 345 | int clean_io_failure(struct inode *inode, u64 start, struct page *page, |
| 346 | unsigned int pg_offset); |
| 347 | int end_extent_writepage(struct page *page, int err, u64 start, u64 end); |
| 348 | int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb, |
| 349 | int mirror_num); |
| 350 | |
| 351 | /* |
| 352 | * When IO fails, either with EIO or csum verification fails, we |
| 353 | * try other mirrors that might have a good copy of the data. This |
| 354 | * io_failure_record is used to record state as we go through all the |
| 355 | * mirrors. If another mirror has good data, the page is set up to date |
| 356 | * and things continue. If a good mirror can't be found, the original |
| 357 | * bio end_io callback is called to indicate things have failed. |
| 358 | */ |
| 359 | struct io_failure_record { |
| 360 | struct page *page; |
| 361 | u64 start; |
| 362 | u64 len; |
| 363 | u64 logical; |
| 364 | unsigned long bio_flags; |
| 365 | int this_mirror; |
| 366 | int failed_mirror; |
| 367 | int in_validation; |
| 368 | }; |
| 369 | |
| 370 | void btrfs_free_io_failure_record(struct inode *inode, u64 start, u64 end); |
| 371 | int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end, |
| 372 | struct io_failure_record **failrec_ret); |
| 373 | int btrfs_check_repairable(struct inode *inode, struct bio *failed_bio, |
| 374 | struct io_failure_record *failrec, int fail_mirror); |
| 375 | struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio, |
| 376 | struct io_failure_record *failrec, |
| 377 | struct page *page, int pg_offset, int icsum, |
| 378 | bio_end_io_t *endio_func, void *data); |
| 379 | int free_io_failure(struct inode *inode, struct io_failure_record *rec); |
| 380 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS |
| 381 | noinline u64 find_lock_delalloc_range(struct inode *inode, |
| 382 | struct extent_io_tree *tree, |
| 383 | struct page *locked_page, u64 *start, |
| 384 | u64 *end, u64 max_bytes); |
| 385 | #endif |
| 386 | struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, |
| 387 | u64 start); |
| 388 | #endif |