Btrfs: allocator improvements, inode block groups
[deliverable/linux.git] / fs / btrfs / super.c
1 #include <linux/module.h>
2 #include <linux/buffer_head.h>
3 #include <linux/fs.h>
4 #include <linux/pagemap.h>
5 #include <linux/highmem.h>
6 #include <linux/time.h>
7 #include <linux/init.h>
8 #include <linux/string.h>
9 #include <linux/smp_lock.h>
10 #include <linux/backing-dev.h>
11 #include <linux/mpage.h>
12 #include <linux/swap.h>
13 #include <linux/writeback.h>
14 #include <linux/statfs.h>
15 #include "ctree.h"
16 #include "disk-io.h"
17 #include "transaction.h"
18 #include "btrfs_inode.h"
19 #include "ioctl.h"
20
21 void btrfs_fsinfo_release(struct kobject *obj)
22 {
23 struct btrfs_fs_info *fsinfo = container_of(obj,
24 struct btrfs_fs_info, kobj);
25 kfree(fsinfo);
26 }
27
28 struct kobj_type btrfs_fsinfo_ktype = {
29 .release = btrfs_fsinfo_release,
30 };
31
32 struct btrfs_iget_args {
33 u64 ino;
34 struct btrfs_root *root;
35 };
36
37 decl_subsys(btrfs, &btrfs_fsinfo_ktype, NULL);
38
39 #define BTRFS_SUPER_MAGIC 0x9123682E
40
41 static struct inode_operations btrfs_dir_inode_operations;
42 static struct inode_operations btrfs_dir_ro_inode_operations;
43 static struct super_operations btrfs_super_ops;
44 static struct file_operations btrfs_dir_file_operations;
45 static struct inode_operations btrfs_file_inode_operations;
46 static struct address_space_operations btrfs_aops;
47 static struct file_operations btrfs_file_operations;
48
49 static void btrfs_read_locked_inode(struct inode *inode)
50 {
51 struct btrfs_path *path;
52 struct btrfs_inode_item *inode_item;
53 struct btrfs_root *root = BTRFS_I(inode)->root;
54 struct btrfs_key location;
55 struct btrfs_block_group_cache *alloc_group;
56 u64 alloc_group_block;
57 int ret;
58
59 path = btrfs_alloc_path();
60 BUG_ON(!path);
61 btrfs_init_path(path);
62 mutex_lock(&root->fs_info->fs_mutex);
63
64 memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
65 ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
66 if (ret) {
67 btrfs_free_path(path);
68 goto make_bad;
69 }
70 inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
71 path->slots[0],
72 struct btrfs_inode_item);
73
74 inode->i_mode = btrfs_inode_mode(inode_item);
75 inode->i_nlink = btrfs_inode_nlink(inode_item);
76 inode->i_uid = btrfs_inode_uid(inode_item);
77 inode->i_gid = btrfs_inode_gid(inode_item);
78 inode->i_size = btrfs_inode_size(inode_item);
79 inode->i_atime.tv_sec = btrfs_timespec_sec(&inode_item->atime);
80 inode->i_atime.tv_nsec = btrfs_timespec_nsec(&inode_item->atime);
81 inode->i_mtime.tv_sec = btrfs_timespec_sec(&inode_item->mtime);
82 inode->i_mtime.tv_nsec = btrfs_timespec_nsec(&inode_item->mtime);
83 inode->i_ctime.tv_sec = btrfs_timespec_sec(&inode_item->ctime);
84 inode->i_ctime.tv_nsec = btrfs_timespec_nsec(&inode_item->ctime);
85 inode->i_blocks = btrfs_inode_nblocks(inode_item);
86 inode->i_generation = btrfs_inode_generation(inode_item);
87 alloc_group_block = btrfs_inode_block_group(inode_item);
88 ret = radix_tree_gang_lookup(&root->fs_info->block_group_radix,
89 (void **)&alloc_group,
90 alloc_group_block, 1);
91 BUG_ON(!ret);
92 BTRFS_I(inode)->block_group = alloc_group;
93
94 btrfs_free_path(path);
95 inode_item = NULL;
96
97 mutex_unlock(&root->fs_info->fs_mutex);
98
99 switch (inode->i_mode & S_IFMT) {
100 #if 0
101 default:
102 init_special_inode(inode, inode->i_mode,
103 btrfs_inode_rdev(inode_item));
104 break;
105 #endif
106 case S_IFREG:
107 inode->i_mapping->a_ops = &btrfs_aops;
108 inode->i_fop = &btrfs_file_operations;
109 inode->i_op = &btrfs_file_inode_operations;
110 break;
111 case S_IFDIR:
112 inode->i_fop = &btrfs_dir_file_operations;
113 if (root == root->fs_info->tree_root)
114 inode->i_op = &btrfs_dir_ro_inode_operations;
115 else
116 inode->i_op = &btrfs_dir_inode_operations;
117 break;
118 case S_IFLNK:
119 // inode->i_op = &page_symlink_inode_operations;
120 break;
121 }
122 return;
123
124 make_bad:
125 btrfs_release_path(root, path);
126 btrfs_free_path(path);
127 mutex_unlock(&root->fs_info->fs_mutex);
128 make_bad_inode(inode);
129 }
130
131 static void fill_inode_item(struct btrfs_inode_item *item,
132 struct inode *inode)
133 {
134 btrfs_set_inode_uid(item, inode->i_uid);
135 btrfs_set_inode_gid(item, inode->i_gid);
136 btrfs_set_inode_size(item, inode->i_size);
137 btrfs_set_inode_mode(item, inode->i_mode);
138 btrfs_set_inode_nlink(item, inode->i_nlink);
139 btrfs_set_timespec_sec(&item->atime, inode->i_atime.tv_sec);
140 btrfs_set_timespec_nsec(&item->atime, inode->i_atime.tv_nsec);
141 btrfs_set_timespec_sec(&item->mtime, inode->i_mtime.tv_sec);
142 btrfs_set_timespec_nsec(&item->mtime, inode->i_mtime.tv_nsec);
143 btrfs_set_timespec_sec(&item->ctime, inode->i_ctime.tv_sec);
144 btrfs_set_timespec_nsec(&item->ctime, inode->i_ctime.tv_nsec);
145 btrfs_set_inode_nblocks(item, inode->i_blocks);
146 btrfs_set_inode_generation(item, inode->i_generation);
147 btrfs_set_inode_block_group(item,
148 BTRFS_I(inode)->block_group->key.objectid);
149 }
150
151
152 static int btrfs_update_inode(struct btrfs_trans_handle *trans,
153 struct btrfs_root *root,
154 struct inode *inode)
155 {
156 struct btrfs_inode_item *inode_item;
157 struct btrfs_path *path;
158 int ret;
159
160 path = btrfs_alloc_path();
161 BUG_ON(!path);
162 btrfs_init_path(path);
163 ret = btrfs_lookup_inode(trans, root, path,
164 &BTRFS_I(inode)->location, 1);
165 if (ret) {
166 if (ret > 0)
167 ret = -ENOENT;
168 goto failed;
169 }
170
171 inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
172 path->slots[0],
173 struct btrfs_inode_item);
174
175 fill_inode_item(inode_item, inode);
176 btrfs_mark_buffer_dirty(path->nodes[0]);
177 ret = 0;
178 failed:
179 btrfs_release_path(root, path);
180 btrfs_free_path(path);
181 return ret;
182 }
183
184
185 static int btrfs_unlink_trans(struct btrfs_trans_handle *trans,
186 struct btrfs_root *root,
187 struct inode *dir,
188 struct dentry *dentry)
189 {
190 struct btrfs_path *path;
191 const char *name = dentry->d_name.name;
192 int name_len = dentry->d_name.len;
193 int ret = 0;
194 u64 objectid;
195 struct btrfs_dir_item *di;
196
197 path = btrfs_alloc_path();
198 BUG_ON(!path);
199 btrfs_init_path(path);
200 di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
201 name, name_len, -1);
202 if (IS_ERR(di)) {
203 ret = PTR_ERR(di);
204 goto err;
205 }
206 if (!di) {
207 ret = -ENOENT;
208 goto err;
209 }
210 objectid = btrfs_disk_key_objectid(&di->location);
211 ret = btrfs_delete_one_dir_name(trans, root, path, di);
212 BUG_ON(ret);
213 btrfs_release_path(root, path);
214
215 di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
216 objectid, name, name_len, -1);
217 if (IS_ERR(di)) {
218 ret = PTR_ERR(di);
219 goto err;
220 }
221 if (!di) {
222 ret = -ENOENT;
223 goto err;
224 }
225 ret = btrfs_delete_one_dir_name(trans, root, path, di);
226 BUG_ON(ret);
227
228 dentry->d_inode->i_ctime = dir->i_ctime;
229 err:
230 btrfs_free_path(path);
231 if (!ret) {
232 dir->i_size -= name_len * 2;
233 btrfs_update_inode(trans, root, dir);
234 drop_nlink(dentry->d_inode);
235 btrfs_update_inode(trans, root, dentry->d_inode);
236 dir->i_sb->s_dirt = 1;
237 }
238 return ret;
239 }
240
241 static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
242 {
243 struct btrfs_root *root;
244 struct btrfs_trans_handle *trans;
245 int ret;
246
247 root = BTRFS_I(dir)->root;
248 mutex_lock(&root->fs_info->fs_mutex);
249 trans = btrfs_start_transaction(root, 1);
250 btrfs_set_trans_block_group(trans, dir);
251 ret = btrfs_unlink_trans(trans, root, dir, dentry);
252 btrfs_end_transaction(trans, root);
253 mutex_unlock(&root->fs_info->fs_mutex);
254 return ret;
255 }
256
257 static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
258 {
259 struct inode *inode = dentry->d_inode;
260 int err;
261 int ret;
262 struct btrfs_root *root = BTRFS_I(dir)->root;
263 struct btrfs_path *path;
264 struct btrfs_key key;
265 struct btrfs_trans_handle *trans;
266 struct btrfs_key found_key;
267 int found_type;
268 struct btrfs_leaf *leaf;
269 char *goodnames = "..";
270
271 path = btrfs_alloc_path();
272 BUG_ON(!path);
273 btrfs_init_path(path);
274 mutex_lock(&root->fs_info->fs_mutex);
275 trans = btrfs_start_transaction(root, 1);
276 btrfs_set_trans_block_group(trans, dir);
277 key.objectid = inode->i_ino;
278 key.offset = (u64)-1;
279 key.flags = (u32)-1;
280 while(1) {
281 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
282 if (ret < 0) {
283 err = ret;
284 goto out;
285 }
286 BUG_ON(ret == 0);
287 if (path->slots[0] == 0) {
288 err = -ENOENT;
289 goto out;
290 }
291 path->slots[0]--;
292 leaf = btrfs_buffer_leaf(path->nodes[0]);
293 btrfs_disk_key_to_cpu(&found_key,
294 &leaf->items[path->slots[0]].key);
295 found_type = btrfs_key_type(&found_key);
296 if (found_key.objectid != inode->i_ino) {
297 err = -ENOENT;
298 goto out;
299 }
300 if ((found_type != BTRFS_DIR_ITEM_KEY &&
301 found_type != BTRFS_DIR_INDEX_KEY) ||
302 (!btrfs_match_dir_item_name(root, path, goodnames, 2) &&
303 !btrfs_match_dir_item_name(root, path, goodnames, 1))) {
304 err = -ENOTEMPTY;
305 goto out;
306 }
307 ret = btrfs_del_item(trans, root, path);
308 BUG_ON(ret);
309
310 if (found_type == BTRFS_DIR_ITEM_KEY && found_key.offset == 1)
311 break;
312 btrfs_release_path(root, path);
313 }
314 ret = 0;
315 btrfs_release_path(root, path);
316
317 /* now the directory is empty */
318 err = btrfs_unlink_trans(trans, root, dir, dentry);
319 if (!err) {
320 inode->i_size = 0;
321 }
322 out:
323 btrfs_release_path(root, path);
324 btrfs_free_path(path);
325 mutex_unlock(&root->fs_info->fs_mutex);
326 ret = btrfs_end_transaction(trans, root);
327 if (ret && !err)
328 err = ret;
329 return err;
330 }
331
332 static int btrfs_free_inode(struct btrfs_trans_handle *trans,
333 struct btrfs_root *root,
334 struct inode *inode)
335 {
336 struct btrfs_path *path;
337 int ret;
338
339 clear_inode(inode);
340
341 path = btrfs_alloc_path();
342 BUG_ON(!path);
343 btrfs_init_path(path);
344 ret = btrfs_lookup_inode(trans, root, path,
345 &BTRFS_I(inode)->location, -1);
346 BUG_ON(ret);
347 ret = btrfs_del_item(trans, root, path);
348 BUG_ON(ret);
349 btrfs_free_path(path);
350 return ret;
351 }
352
353 static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
354 struct btrfs_root *root,
355 struct inode *inode)
356 {
357 int ret;
358 struct btrfs_path *path;
359 struct btrfs_key key;
360 struct btrfs_disk_key *found_key;
361 struct btrfs_leaf *leaf;
362 struct btrfs_file_extent_item *fi = NULL;
363 u64 extent_start = 0;
364 u64 extent_num_blocks = 0;
365 int found_extent;
366
367 path = btrfs_alloc_path();
368 BUG_ON(!path);
369 /* FIXME, add redo link to tree so we don't leak on crash */
370 key.objectid = inode->i_ino;
371 key.offset = (u64)-1;
372 key.flags = 0;
373 /*
374 * use BTRFS_CSUM_ITEM_KEY because it is larger than inline keys
375 * or extent data
376 */
377 btrfs_set_key_type(&key, BTRFS_CSUM_ITEM_KEY);
378 while(1) {
379 btrfs_init_path(path);
380 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
381 if (ret < 0) {
382 goto error;
383 }
384 if (ret > 0) {
385 BUG_ON(path->slots[0] == 0);
386 path->slots[0]--;
387 }
388 leaf = btrfs_buffer_leaf(path->nodes[0]);
389 found_key = &leaf->items[path->slots[0]].key;
390 if (btrfs_disk_key_objectid(found_key) != inode->i_ino)
391 break;
392 if (btrfs_disk_key_type(found_key) != BTRFS_CSUM_ITEM_KEY &&
393 btrfs_disk_key_type(found_key) != BTRFS_EXTENT_DATA_KEY)
394 break;
395 if (btrfs_disk_key_offset(found_key) < inode->i_size)
396 break;
397 found_extent = 0;
398 if (btrfs_disk_key_type(found_key) == BTRFS_EXTENT_DATA_KEY) {
399 fi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
400 path->slots[0],
401 struct btrfs_file_extent_item);
402 if (btrfs_file_extent_type(fi) !=
403 BTRFS_FILE_EXTENT_INLINE) {
404 extent_start =
405 btrfs_file_extent_disk_blocknr(fi);
406 extent_num_blocks =
407 btrfs_file_extent_disk_num_blocks(fi);
408 /* FIXME blocksize != 4096 */
409 inode->i_blocks -=
410 btrfs_file_extent_num_blocks(fi) << 3;
411 found_extent = 1;
412 }
413 }
414 ret = btrfs_del_item(trans, root, path);
415 BUG_ON(ret);
416 btrfs_release_path(root, path);
417 if (found_extent) {
418 ret = btrfs_free_extent(trans, root, extent_start,
419 extent_num_blocks, 0);
420 BUG_ON(ret);
421 }
422 }
423 ret = 0;
424 error:
425 btrfs_release_path(root, path);
426 btrfs_free_path(path);
427 inode->i_sb->s_dirt = 1;
428 return ret;
429 }
430
431 static void btrfs_delete_inode(struct inode *inode)
432 {
433 struct btrfs_trans_handle *trans;
434 struct btrfs_root *root = BTRFS_I(inode)->root;
435 int ret;
436
437 truncate_inode_pages(&inode->i_data, 0);
438 if (is_bad_inode(inode)) {
439 goto no_delete;
440 }
441 inode->i_size = 0;
442 mutex_lock(&root->fs_info->fs_mutex);
443 trans = btrfs_start_transaction(root, 1);
444 btrfs_set_trans_block_group(trans, inode);
445 if (S_ISREG(inode->i_mode)) {
446 ret = btrfs_truncate_in_trans(trans, root, inode);
447 BUG_ON(ret);
448 }
449 btrfs_free_inode(trans, root, inode);
450 btrfs_end_transaction(trans, root);
451 mutex_unlock(&root->fs_info->fs_mutex);
452 return;
453 no_delete:
454 clear_inode(inode);
455 }
456
457 static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
458 struct btrfs_key *location)
459 {
460 const char *name = dentry->d_name.name;
461 int namelen = dentry->d_name.len;
462 struct btrfs_dir_item *di;
463 struct btrfs_path *path;
464 struct btrfs_root *root = BTRFS_I(dir)->root;
465 int ret;
466
467 path = btrfs_alloc_path();
468 BUG_ON(!path);
469 btrfs_init_path(path);
470 di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name,
471 namelen, 0);
472 if (!di || IS_ERR(di)) {
473 location->objectid = 0;
474 ret = 0;
475 goto out;
476 }
477 btrfs_disk_key_to_cpu(location, &di->location);
478 out:
479 btrfs_release_path(root, path);
480 btrfs_free_path(path);
481 return ret;
482 }
483
484 int fixup_tree_root_location(struct btrfs_root *root,
485 struct btrfs_key *location,
486 struct btrfs_root **sub_root)
487 {
488 struct btrfs_path *path;
489 struct btrfs_root_item *ri;
490
491 if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY)
492 return 0;
493 if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
494 return 0;
495
496 path = btrfs_alloc_path();
497 BUG_ON(!path);
498 mutex_lock(&root->fs_info->fs_mutex);
499
500 *sub_root = btrfs_read_fs_root(root->fs_info, location);
501 if (IS_ERR(*sub_root))
502 return PTR_ERR(*sub_root);
503
504 ri = &(*sub_root)->root_item;
505 location->objectid = btrfs_root_dirid(ri);
506 location->flags = 0;
507 btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
508 location->offset = 0;
509
510 btrfs_free_path(path);
511 mutex_unlock(&root->fs_info->fs_mutex);
512 return 0;
513 }
514
515 int btrfs_init_locked_inode(struct inode *inode, void *p)
516 {
517 struct btrfs_iget_args *args = p;
518 inode->i_ino = args->ino;
519 BTRFS_I(inode)->root = args->root;
520 return 0;
521 }
522
523 int btrfs_find_actor(struct inode *inode, void *opaque)
524 {
525 struct btrfs_iget_args *args = opaque;
526 return (args->ino == inode->i_ino &&
527 args->root == BTRFS_I(inode)->root);
528 }
529
530 struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
531 struct btrfs_root *root)
532 {
533 struct inode *inode;
534 struct btrfs_iget_args args;
535 args.ino = objectid;
536 args.root = root;
537
538 inode = iget5_locked(s, objectid, btrfs_find_actor,
539 btrfs_init_locked_inode,
540 (void *)&args);
541 return inode;
542 }
543
544 static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
545 struct nameidata *nd)
546 {
547 struct inode * inode;
548 struct btrfs_inode *bi = BTRFS_I(dir);
549 struct btrfs_root *root = bi->root;
550 struct btrfs_root *sub_root = root;
551 struct btrfs_key location;
552 int ret;
553
554 if (dentry->d_name.len > BTRFS_NAME_LEN)
555 return ERR_PTR(-ENAMETOOLONG);
556 mutex_lock(&root->fs_info->fs_mutex);
557 ret = btrfs_inode_by_name(dir, dentry, &location);
558 mutex_unlock(&root->fs_info->fs_mutex);
559 if (ret < 0)
560 return ERR_PTR(ret);
561 inode = NULL;
562 if (location.objectid) {
563 ret = fixup_tree_root_location(root, &location, &sub_root);
564 if (ret < 0)
565 return ERR_PTR(ret);
566 if (ret > 0)
567 return ERR_PTR(-ENOENT);
568 inode = btrfs_iget_locked(dir->i_sb, location.objectid,
569 sub_root);
570 if (!inode)
571 return ERR_PTR(-EACCES);
572 if (inode->i_state & I_NEW) {
573 if (sub_root != root) {
574 printk("adding new root for inode %lu root %p (found %p)\n", inode->i_ino, sub_root, BTRFS_I(inode)->root);
575 igrab(inode);
576 sub_root->inode = inode;
577 }
578 BTRFS_I(inode)->root = sub_root;
579 memcpy(&BTRFS_I(inode)->location, &location,
580 sizeof(location));
581 btrfs_read_locked_inode(inode);
582 unlock_new_inode(inode);
583 }
584 }
585 return d_splice_alias(inode, dentry);
586 }
587
588 static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
589 {
590 struct inode *inode = filp->f_path.dentry->d_inode;
591 struct btrfs_root *root = BTRFS_I(inode)->root;
592 struct btrfs_item *item;
593 struct btrfs_dir_item *di;
594 struct btrfs_key key;
595 struct btrfs_path *path;
596 int ret;
597 u32 nritems;
598 struct btrfs_leaf *leaf;
599 int slot;
600 int advance;
601 unsigned char d_type = DT_UNKNOWN;
602 int over = 0;
603 u32 di_cur;
604 u32 di_total;
605 u32 di_len;
606 int key_type = BTRFS_DIR_INDEX_KEY;
607
608 /* FIXME, use a real flag for deciding about the key type */
609 if (root->fs_info->tree_root == root)
610 key_type = BTRFS_DIR_ITEM_KEY;
611 mutex_lock(&root->fs_info->fs_mutex);
612 key.objectid = inode->i_ino;
613 key.flags = 0;
614 btrfs_set_key_type(&key, key_type);
615 key.offset = filp->f_pos;
616 path = btrfs_alloc_path();
617 btrfs_init_path(path);
618 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
619 if (ret < 0)
620 goto err;
621 advance = 0;
622 while(1) {
623 leaf = btrfs_buffer_leaf(path->nodes[0]);
624 nritems = btrfs_header_nritems(&leaf->header);
625 slot = path->slots[0];
626 if (advance || slot >= nritems) {
627 if (slot >= nritems -1) {
628 ret = btrfs_next_leaf(root, path);
629 if (ret)
630 break;
631 leaf = btrfs_buffer_leaf(path->nodes[0]);
632 nritems = btrfs_header_nritems(&leaf->header);
633 slot = path->slots[0];
634 } else {
635 slot++;
636 path->slots[0]++;
637 }
638 }
639 advance = 1;
640 item = leaf->items + slot;
641 if (btrfs_disk_key_objectid(&item->key) != key.objectid)
642 break;
643 if (btrfs_disk_key_type(&item->key) != key_type)
644 break;
645 if (btrfs_disk_key_offset(&item->key) < filp->f_pos)
646 continue;
647 filp->f_pos = btrfs_disk_key_offset(&item->key);
648 advance = 1;
649 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
650 di_cur = 0;
651 di_total = btrfs_item_size(leaf->items + slot);
652 while(di_cur < di_total) {
653 over = filldir(dirent, (const char *)(di + 1),
654 btrfs_dir_name_len(di),
655 btrfs_disk_key_offset(&item->key),
656 btrfs_disk_key_objectid(&di->location),
657 d_type);
658 if (over)
659 goto nopos;
660 di_len = btrfs_dir_name_len(di) + sizeof(*di);
661 di_cur += di_len;
662 di = (struct btrfs_dir_item *)((char *)di + di_len);
663 }
664 }
665 filp->f_pos++;
666 nopos:
667 ret = 0;
668 err:
669 btrfs_release_path(root, path);
670 btrfs_free_path(path);
671 mutex_unlock(&root->fs_info->fs_mutex);
672 return ret;
673 }
674
675 static void btrfs_put_super (struct super_block * sb)
676 {
677 struct btrfs_root *root = btrfs_sb(sb);
678 int ret;
679
680 ret = close_ctree(root);
681 if (ret) {
682 printk("close ctree returns %d\n", ret);
683 }
684 sb->s_fs_info = NULL;
685 }
686
687 static int btrfs_fill_super(struct super_block * sb, void * data, int silent)
688 {
689 struct inode * inode;
690 struct dentry * root_dentry;
691 struct btrfs_super_block *disk_super;
692 struct btrfs_root *tree_root;
693 struct btrfs_inode *bi;
694
695 sb->s_maxbytes = MAX_LFS_FILESIZE;
696 sb->s_magic = BTRFS_SUPER_MAGIC;
697 sb->s_op = &btrfs_super_ops;
698 sb->s_time_gran = 1;
699
700 tree_root = open_ctree(sb);
701
702 if (!tree_root) {
703 printk("btrfs: open_ctree failed\n");
704 return -EIO;
705 }
706 sb->s_fs_info = tree_root;
707 disk_super = tree_root->fs_info->disk_super;
708 printk("read in super total blocks %Lu root %Lu\n",
709 btrfs_super_total_blocks(disk_super),
710 btrfs_super_root_dir(disk_super));
711
712 inode = btrfs_iget_locked(sb, btrfs_super_root_dir(disk_super),
713 tree_root);
714 bi = BTRFS_I(inode);
715 bi->location.objectid = inode->i_ino;
716 bi->location.offset = 0;
717 bi->location.flags = 0;
718 bi->root = tree_root;
719 btrfs_set_key_type(&bi->location, BTRFS_INODE_ITEM_KEY);
720
721 if (!inode)
722 return -ENOMEM;
723 if (inode->i_state & I_NEW) {
724 btrfs_read_locked_inode(inode);
725 unlock_new_inode(inode);
726 }
727
728 root_dentry = d_alloc_root(inode);
729 if (!root_dentry) {
730 iput(inode);
731 return -ENOMEM;
732 }
733 sb->s_root = root_dentry;
734
735 return 0;
736 }
737
738 static int btrfs_write_inode(struct inode *inode, int wait)
739 {
740 struct btrfs_root *root = BTRFS_I(inode)->root;
741 struct btrfs_trans_handle *trans;
742 int ret = 0;
743
744 if (wait) {
745 mutex_lock(&root->fs_info->fs_mutex);
746 trans = btrfs_start_transaction(root, 1);
747 btrfs_set_trans_block_group(trans, inode);
748 ret = btrfs_commit_transaction(trans, root);
749 mutex_unlock(&root->fs_info->fs_mutex);
750 }
751 return ret;
752 }
753
754 static void btrfs_dirty_inode(struct inode *inode)
755 {
756 struct btrfs_root *root = BTRFS_I(inode)->root;
757 struct btrfs_trans_handle *trans;
758
759 mutex_lock(&root->fs_info->fs_mutex);
760 trans = btrfs_start_transaction(root, 1);
761 btrfs_set_trans_block_group(trans, inode);
762 btrfs_update_inode(trans, root, inode);
763 btrfs_end_transaction(trans, root);
764 mutex_unlock(&root->fs_info->fs_mutex);
765 }
766
767 static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
768 struct btrfs_root *root,
769 u64 objectid,
770 struct btrfs_block_group_cache *group,
771 int mode)
772 {
773 struct inode *inode;
774 struct btrfs_inode_item inode_item;
775 struct btrfs_key *location;
776 int ret;
777
778 inode = new_inode(root->fs_info->sb);
779 if (!inode)
780 return ERR_PTR(-ENOMEM);
781
782 BTRFS_I(inode)->root = root;
783 group = btrfs_find_block_group(root, group, 0);
784 BTRFS_I(inode)->block_group = group;
785
786 inode->i_uid = current->fsuid;
787 inode->i_gid = current->fsgid;
788 inode->i_mode = mode;
789 inode->i_ino = objectid;
790 inode->i_blocks = 0;
791 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
792 fill_inode_item(&inode_item, inode);
793 location = &BTRFS_I(inode)->location;
794 location->objectid = objectid;
795 location->flags = 0;
796 location->offset = 0;
797 btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
798
799 ret = btrfs_insert_inode(trans, root, objectid, &inode_item);
800 BUG_ON(ret);
801
802 insert_inode_hash(inode);
803 return inode;
804 }
805
806 static int btrfs_add_link(struct btrfs_trans_handle *trans,
807 struct dentry *dentry, struct inode *inode)
808 {
809 int ret;
810 struct btrfs_key key;
811 struct btrfs_root *root = BTRFS_I(dentry->d_parent->d_inode)->root;
812 key.objectid = inode->i_ino;
813 key.flags = 0;
814 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
815 key.offset = 0;
816
817 ret = btrfs_insert_dir_item(trans, root,
818 dentry->d_name.name, dentry->d_name.len,
819 dentry->d_parent->d_inode->i_ino,
820 &key, 0);
821 if (ret == 0) {
822 dentry->d_parent->d_inode->i_size += dentry->d_name.len * 2;
823 ret = btrfs_update_inode(trans, root,
824 dentry->d_parent->d_inode);
825 }
826 return ret;
827 }
828
829 static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
830 struct dentry *dentry, struct inode *inode)
831 {
832 int err = btrfs_add_link(trans, dentry, inode);
833 if (!err) {
834 d_instantiate(dentry, inode);
835 return 0;
836 }
837 if (err > 0)
838 err = -EEXIST;
839 return err;
840 }
841
842 static int btrfs_create(struct inode *dir, struct dentry *dentry,
843 int mode, struct nameidata *nd)
844 {
845 struct btrfs_trans_handle *trans;
846 struct btrfs_root *root = BTRFS_I(dir)->root;
847 struct inode *inode;
848 int err;
849 int drop_inode = 0;
850 u64 objectid;
851
852 mutex_lock(&root->fs_info->fs_mutex);
853 trans = btrfs_start_transaction(root, 1);
854 btrfs_set_trans_block_group(trans, dir);
855
856 err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
857 if (err) {
858 err = -ENOSPC;
859 goto out_unlock;
860 }
861
862 inode = btrfs_new_inode(trans, root, objectid,
863 BTRFS_I(dir)->block_group, mode);
864 err = PTR_ERR(inode);
865 if (IS_ERR(inode))
866 goto out_unlock;
867
868 btrfs_set_trans_block_group(trans, inode);
869 err = btrfs_add_nondir(trans, dentry, inode);
870 if (err)
871 drop_inode = 1;
872 else {
873 inode->i_mapping->a_ops = &btrfs_aops;
874 inode->i_fop = &btrfs_file_operations;
875 inode->i_op = &btrfs_file_inode_operations;
876 }
877 dir->i_sb->s_dirt = 1;
878 btrfs_update_inode_block_group(trans, inode);
879 btrfs_update_inode_block_group(trans, dir);
880 out_unlock:
881 btrfs_end_transaction(trans, root);
882 mutex_unlock(&root->fs_info->fs_mutex);
883
884 if (drop_inode) {
885 inode_dec_link_count(inode);
886 iput(inode);
887 }
888 return err;
889 }
890
891 static int btrfs_make_empty_dir(struct btrfs_trans_handle *trans,
892 struct btrfs_root *root,
893 u64 objectid, u64 dirid)
894 {
895 int ret;
896 char buf[2];
897 struct btrfs_key key;
898
899 buf[0] = '.';
900 buf[1] = '.';
901
902 key.objectid = objectid;
903 key.offset = 0;
904 key.flags = 0;
905 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
906
907 ret = btrfs_insert_dir_item(trans, root, buf, 1, objectid,
908 &key, 1);
909 if (ret)
910 goto error;
911 key.objectid = dirid;
912 ret = btrfs_insert_dir_item(trans, root, buf, 2, objectid,
913 &key, 1);
914 if (ret)
915 goto error;
916 error:
917 return ret;
918 }
919
920 static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
921 {
922 struct inode *inode;
923 struct btrfs_trans_handle *trans;
924 struct btrfs_root *root = BTRFS_I(dir)->root;
925 int err = 0;
926 int drop_on_err = 0;
927 u64 objectid;
928
929 mutex_lock(&root->fs_info->fs_mutex);
930 trans = btrfs_start_transaction(root, 1);
931 btrfs_set_trans_block_group(trans, dir);
932 if (IS_ERR(trans)) {
933 err = PTR_ERR(trans);
934 goto out_unlock;
935 }
936
937 err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
938 if (err) {
939 err = -ENOSPC;
940 goto out_unlock;
941 }
942
943 inode = btrfs_new_inode(trans, root, objectid,
944 BTRFS_I(dir)->block_group, S_IFDIR | mode);
945 if (IS_ERR(inode)) {
946 err = PTR_ERR(inode);
947 goto out_fail;
948 }
949 drop_on_err = 1;
950 inode->i_op = &btrfs_dir_inode_operations;
951 inode->i_fop = &btrfs_dir_file_operations;
952 btrfs_set_trans_block_group(trans, inode);
953
954 err = btrfs_make_empty_dir(trans, root, inode->i_ino, dir->i_ino);
955 if (err)
956 goto out_fail;
957
958 inode->i_size = 6;
959 err = btrfs_update_inode(trans, root, inode);
960 if (err)
961 goto out_fail;
962 err = btrfs_add_link(trans, dentry, inode);
963 if (err)
964 goto out_fail;
965 d_instantiate(dentry, inode);
966 drop_on_err = 0;
967 dir->i_sb->s_dirt = 1;
968 btrfs_update_inode_block_group(trans, inode);
969 btrfs_update_inode_block_group(trans, dir);
970
971 out_fail:
972 btrfs_end_transaction(trans, root);
973 out_unlock:
974 mutex_unlock(&root->fs_info->fs_mutex);
975 if (drop_on_err)
976 iput(inode);
977 return err;
978 }
979
980 static int btrfs_sync_file(struct file *file,
981 struct dentry *dentry, int datasync)
982 {
983 struct inode *inode = dentry->d_inode;
984 struct btrfs_root *root = BTRFS_I(inode)->root;
985 int ret;
986 struct btrfs_trans_handle *trans;
987
988 mutex_lock(&root->fs_info->fs_mutex);
989 trans = btrfs_start_transaction(root, 1);
990 if (!trans) {
991 ret = -ENOMEM;
992 goto out;
993 }
994 ret = btrfs_commit_transaction(trans, root);
995 mutex_unlock(&root->fs_info->fs_mutex);
996 out:
997 return ret > 0 ? EIO : ret;
998 }
999
1000 static int btrfs_sync_fs(struct super_block *sb, int wait)
1001 {
1002 struct btrfs_trans_handle *trans;
1003 struct btrfs_root *root;
1004 int ret;
1005 root = btrfs_sb(sb);
1006
1007 sb->s_dirt = 0;
1008 if (!wait) {
1009 filemap_flush(root->fs_info->btree_inode->i_mapping);
1010 return 0;
1011 }
1012 mutex_lock(&root->fs_info->fs_mutex);
1013 trans = btrfs_start_transaction(root, 1);
1014 ret = btrfs_commit_transaction(trans, root);
1015 sb->s_dirt = 0;
1016 BUG_ON(ret);
1017 printk("btrfs sync_fs\n");
1018 mutex_unlock(&root->fs_info->fs_mutex);
1019 return 0;
1020 }
1021
1022 static int btrfs_get_block_lock(struct inode *inode, sector_t iblock,
1023 struct buffer_head *result, int create)
1024 {
1025 int ret;
1026 int err = 0;
1027 u64 blocknr;
1028 u64 extent_start = 0;
1029 u64 extent_end = 0;
1030 u64 objectid = inode->i_ino;
1031 u32 found_type;
1032 struct btrfs_path *path;
1033 struct btrfs_root *root = BTRFS_I(inode)->root;
1034 struct btrfs_file_extent_item *item;
1035 struct btrfs_leaf *leaf;
1036 struct btrfs_disk_key *found_key;
1037
1038 path = btrfs_alloc_path();
1039 BUG_ON(!path);
1040 btrfs_init_path(path);
1041 if (create) {
1042 WARN_ON(1);
1043 }
1044
1045 ret = btrfs_lookup_file_extent(NULL, root, path,
1046 inode->i_ino,
1047 iblock << inode->i_blkbits, 0);
1048 if (ret < 0) {
1049 err = ret;
1050 goto out;
1051 }
1052
1053 if (ret != 0) {
1054 if (path->slots[0] == 0) {
1055 btrfs_release_path(root, path);
1056 goto out;
1057 }
1058 path->slots[0]--;
1059 }
1060
1061 item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
1062 struct btrfs_file_extent_item);
1063 leaf = btrfs_buffer_leaf(path->nodes[0]);
1064 blocknr = btrfs_file_extent_disk_blocknr(item);
1065 blocknr += btrfs_file_extent_offset(item);
1066
1067 /* are we inside the extent that was found? */
1068 found_key = &leaf->items[path->slots[0]].key;
1069 found_type = btrfs_disk_key_type(found_key);
1070 if (btrfs_disk_key_objectid(found_key) != objectid ||
1071 found_type != BTRFS_EXTENT_DATA_KEY) {
1072 extent_end = 0;
1073 extent_start = 0;
1074 btrfs_release_path(root, path);
1075 goto out;
1076 }
1077 found_type = btrfs_file_extent_type(item);
1078 extent_start = btrfs_disk_key_offset(&leaf->items[path->slots[0]].key);
1079 if (found_type == BTRFS_FILE_EXTENT_REG) {
1080 extent_start = extent_start >> inode->i_blkbits;
1081 extent_end = extent_start + btrfs_file_extent_num_blocks(item);
1082 if (iblock >= extent_start && iblock < extent_end) {
1083 err = 0;
1084 btrfs_map_bh_to_logical(root, result, blocknr +
1085 iblock - extent_start);
1086 goto out;
1087 }
1088 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
1089 char *ptr;
1090 char *map;
1091 u32 size;
1092 size = btrfs_file_extent_inline_len(leaf->items +
1093 path->slots[0]);
1094 extent_end = (extent_start + size) >> inode->i_blkbits;
1095 extent_start >>= inode->i_blkbits;
1096 if (iblock < extent_start || iblock > extent_end) {
1097 goto out;
1098 }
1099 ptr = btrfs_file_extent_inline_start(item);
1100 map = kmap(result->b_page);
1101 memcpy(map, ptr, size);
1102 memset(map + size, 0, PAGE_CACHE_SIZE - size);
1103 flush_dcache_page(result->b_page);
1104 kunmap(result->b_page);
1105 set_buffer_uptodate(result);
1106 SetPageChecked(result->b_page);
1107 btrfs_map_bh_to_logical(root, result, 0);
1108 }
1109 out:
1110 btrfs_release_path(root, path);
1111 btrfs_free_path(path);
1112 return err;
1113 }
1114
1115 static int btrfs_get_block(struct inode *inode, sector_t iblock,
1116 struct buffer_head *result, int create)
1117 {
1118 int err;
1119 struct btrfs_root *root = BTRFS_I(inode)->root;
1120 mutex_lock(&root->fs_info->fs_mutex);
1121 err = btrfs_get_block_lock(inode, iblock, result, create);
1122 mutex_unlock(&root->fs_info->fs_mutex);
1123 return err;
1124 }
1125
1126 static int btrfs_prepare_write(struct file *file, struct page *page,
1127 unsigned from, unsigned to)
1128 {
1129 return nobh_prepare_write(page, from, to, btrfs_get_block);
1130 }
1131
1132 static void btrfs_write_super(struct super_block *sb)
1133 {
1134 btrfs_sync_fs(sb, 1);
1135 }
1136
1137 static int btrfs_readpage(struct file *file, struct page *page)
1138 {
1139 return mpage_readpage(page, btrfs_get_block);
1140 }
1141
1142 /*
1143 * While block_write_full_page is writing back the dirty buffers under
1144 * the page lock, whoever dirtied the buffers may decide to clean them
1145 * again at any time. We handle that by only looking at the buffer
1146 * state inside lock_buffer().
1147 *
1148 * If block_write_full_page() is called for regular writeback
1149 * (wbc->sync_mode == WB_SYNC_NONE) then it will redirty a page which has a
1150 * locked buffer. This only can happen if someone has written the buffer
1151 * directly, with submit_bh(). At the address_space level PageWriteback
1152 * prevents this contention from occurring.
1153 */
1154 static int __btrfs_write_full_page(struct inode *inode, struct page *page,
1155 struct writeback_control *wbc)
1156 {
1157 int err;
1158 sector_t block;
1159 sector_t last_block;
1160 struct buffer_head *bh, *head;
1161 const unsigned blocksize = 1 << inode->i_blkbits;
1162 int nr_underway = 0;
1163
1164 BUG_ON(!PageLocked(page));
1165
1166 last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
1167
1168 if (!page_has_buffers(page)) {
1169 create_empty_buffers(page, blocksize,
1170 (1 << BH_Dirty)|(1 << BH_Uptodate));
1171 }
1172
1173 /*
1174 * Be very careful. We have no exclusion from __set_page_dirty_buffers
1175 * here, and the (potentially unmapped) buffers may become dirty at
1176 * any time. If a buffer becomes dirty here after we've inspected it
1177 * then we just miss that fact, and the page stays dirty.
1178 *
1179 * Buffers outside i_size may be dirtied by __set_page_dirty_buffers;
1180 * handle that here by just cleaning them.
1181 */
1182
1183 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1184 head = page_buffers(page);
1185 bh = head;
1186
1187 /*
1188 * Get all the dirty buffers mapped to disk addresses and
1189 * handle any aliases from the underlying blockdev's mapping.
1190 */
1191 do {
1192 if (block > last_block) {
1193 /*
1194 * mapped buffers outside i_size will occur, because
1195 * this page can be outside i_size when there is a
1196 * truncate in progress.
1197 */
1198 /*
1199 * The buffer was zeroed by block_write_full_page()
1200 */
1201 clear_buffer_dirty(bh);
1202 set_buffer_uptodate(bh);
1203 } else if (!buffer_mapped(bh) && buffer_dirty(bh)) {
1204 WARN_ON(bh->b_size != blocksize);
1205 err = btrfs_get_block(inode, block, bh, 0);
1206 if (err)
1207 goto recover;
1208 if (buffer_new(bh)) {
1209 /* blockdev mappings never come here */
1210 clear_buffer_new(bh);
1211 unmap_underlying_metadata(bh->b_bdev,
1212 bh->b_blocknr);
1213 }
1214 }
1215 bh = bh->b_this_page;
1216 block++;
1217 } while (bh != head);
1218
1219 do {
1220 if (!buffer_mapped(bh))
1221 continue;
1222 /*
1223 * If it's a fully non-blocking write attempt and we cannot
1224 * lock the buffer then redirty the page. Note that this can
1225 * potentially cause a busy-wait loop from pdflush and kswapd
1226 * activity, but those code paths have their own higher-level
1227 * throttling.
1228 */
1229 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
1230 lock_buffer(bh);
1231 } else if (test_set_buffer_locked(bh)) {
1232 redirty_page_for_writepage(wbc, page);
1233 continue;
1234 }
1235 if (test_clear_buffer_dirty(bh) && bh->b_blocknr != 0) {
1236 mark_buffer_async_write(bh);
1237 } else {
1238 unlock_buffer(bh);
1239 }
1240 } while ((bh = bh->b_this_page) != head);
1241
1242 /*
1243 * The page and its buffers are protected by PageWriteback(), so we can
1244 * drop the bh refcounts early.
1245 */
1246 BUG_ON(PageWriteback(page));
1247 set_page_writeback(page);
1248
1249 do {
1250 struct buffer_head *next = bh->b_this_page;
1251 if (buffer_async_write(bh)) {
1252 submit_bh(WRITE, bh);
1253 nr_underway++;
1254 }
1255 bh = next;
1256 } while (bh != head);
1257 unlock_page(page);
1258
1259 err = 0;
1260 done:
1261 if (nr_underway == 0) {
1262 /*
1263 * The page was marked dirty, but the buffers were
1264 * clean. Someone wrote them back by hand with
1265 * ll_rw_block/submit_bh. A rare case.
1266 */
1267 int uptodate = 1;
1268 do {
1269 if (!buffer_uptodate(bh)) {
1270 uptodate = 0;
1271 break;
1272 }
1273 bh = bh->b_this_page;
1274 } while (bh != head);
1275 if (uptodate)
1276 SetPageUptodate(page);
1277 end_page_writeback(page);
1278 /*
1279 * The page and buffer_heads can be released at any time from
1280 * here on.
1281 */
1282 wbc->pages_skipped++; /* We didn't write this page */
1283 }
1284 return err;
1285
1286 recover:
1287 /*
1288 * ENOSPC, or some other error. We may already have added some
1289 * blocks to the file, so we need to write these out to avoid
1290 * exposing stale data.
1291 * The page is currently locked and not marked for writeback
1292 */
1293 bh = head;
1294 /* Recovery: lock and submit the mapped buffers */
1295 do {
1296 if (buffer_mapped(bh) && buffer_dirty(bh)) {
1297 lock_buffer(bh);
1298 mark_buffer_async_write(bh);
1299 } else {
1300 /*
1301 * The buffer may have been set dirty during
1302 * attachment to a dirty page.
1303 */
1304 clear_buffer_dirty(bh);
1305 }
1306 } while ((bh = bh->b_this_page) != head);
1307 SetPageError(page);
1308 BUG_ON(PageWriteback(page));
1309 set_page_writeback(page);
1310 do {
1311 struct buffer_head *next = bh->b_this_page;
1312 if (buffer_async_write(bh)) {
1313 clear_buffer_dirty(bh);
1314 submit_bh(WRITE, bh);
1315 nr_underway++;
1316 }
1317 bh = next;
1318 } while (bh != head);
1319 unlock_page(page);
1320 goto done;
1321 }
1322
1323 /*
1324 * The generic ->writepage function for buffer-backed address_spaces
1325 */
1326 static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
1327 {
1328 struct inode * const inode = page->mapping->host;
1329 loff_t i_size = i_size_read(inode);
1330 const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
1331 unsigned offset;
1332 void *kaddr;
1333
1334 /* Is the page fully inside i_size? */
1335 if (page->index < end_index)
1336 return __btrfs_write_full_page(inode, page, wbc);
1337
1338 /* Is the page fully outside i_size? (truncate in progress) */
1339 offset = i_size & (PAGE_CACHE_SIZE-1);
1340 if (page->index >= end_index+1 || !offset) {
1341 /*
1342 * The page may have dirty, unmapped buffers. For example,
1343 * they may have been added in ext3_writepage(). Make them
1344 * freeable here, so the page does not leak.
1345 */
1346 block_invalidatepage(page, 0);
1347 unlock_page(page);
1348 return 0; /* don't care */
1349 }
1350
1351 /*
1352 * The page straddles i_size. It must be zeroed out on each and every
1353 * writepage invokation because it may be mmapped. "A file is mapped
1354 * in multiples of the page size. For a file that is not a multiple of
1355 * the page size, the remaining memory is zeroed when mapped, and
1356 * writes to that region are not written out to the file."
1357 */
1358 kaddr = kmap_atomic(page, KM_USER0);
1359 memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
1360 flush_dcache_page(page);
1361 kunmap_atomic(kaddr, KM_USER0);
1362 return __btrfs_write_full_page(inode, page, wbc);
1363 }
1364
1365 static void btrfs_truncate(struct inode *inode)
1366 {
1367 struct btrfs_root *root = BTRFS_I(inode)->root;
1368 int ret;
1369 struct btrfs_trans_handle *trans;
1370
1371 if (!S_ISREG(inode->i_mode))
1372 return;
1373 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
1374 return;
1375
1376 nobh_truncate_page(inode->i_mapping, inode->i_size);
1377
1378 /* FIXME, add redo link to tree so we don't leak on crash */
1379 mutex_lock(&root->fs_info->fs_mutex);
1380 trans = btrfs_start_transaction(root, 1);
1381 btrfs_set_trans_block_group(trans, inode);
1382 ret = btrfs_truncate_in_trans(trans, root, inode);
1383 BUG_ON(ret);
1384 ret = btrfs_end_transaction(trans, root);
1385 BUG_ON(ret);
1386 mutex_unlock(&root->fs_info->fs_mutex);
1387 mark_inode_dirty(inode);
1388 }
1389
1390 /*
1391 * Make sure any changes to nobh_commit_write() are reflected in
1392 * nobh_truncate_page(), since it doesn't call commit_write().
1393 */
1394 static int btrfs_commit_write(struct file *file, struct page *page,
1395 unsigned from, unsigned to)
1396 {
1397 struct inode *inode = page->mapping->host;
1398 struct buffer_head *bh;
1399 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
1400
1401 SetPageUptodate(page);
1402 bh = page_buffers(page);
1403 if (buffer_mapped(bh) && bh->b_blocknr != 0) {
1404 set_page_dirty(page);
1405 }
1406 if (pos > inode->i_size) {
1407 i_size_write(inode, pos);
1408 mark_inode_dirty(inode);
1409 }
1410 return 0;
1411 }
1412
1413 static int btrfs_copy_from_user(loff_t pos, int num_pages, int write_bytes,
1414 struct page **prepared_pages,
1415 const char __user * buf)
1416 {
1417 long page_fault = 0;
1418 int i;
1419 int offset = pos & (PAGE_CACHE_SIZE - 1);
1420
1421 for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) {
1422 size_t count = min_t(size_t,
1423 PAGE_CACHE_SIZE - offset, write_bytes);
1424 struct page *page = prepared_pages[i];
1425 fault_in_pages_readable(buf, count);
1426
1427 /* Copy data from userspace to the current page */
1428 kmap(page);
1429 page_fault = __copy_from_user(page_address(page) + offset,
1430 buf, count);
1431 /* Flush processor's dcache for this page */
1432 flush_dcache_page(page);
1433 kunmap(page);
1434 buf += count;
1435 write_bytes -= count;
1436
1437 if (page_fault)
1438 break;
1439 }
1440 return page_fault ? -EFAULT : 0;
1441 }
1442
1443 static void btrfs_drop_pages(struct page **pages, size_t num_pages)
1444 {
1445 size_t i;
1446 for (i = 0; i < num_pages; i++) {
1447 if (!pages[i])
1448 break;
1449 unlock_page(pages[i]);
1450 mark_page_accessed(pages[i]);
1451 page_cache_release(pages[i]);
1452 }
1453 }
1454 static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
1455 struct btrfs_root *root,
1456 struct file *file,
1457 struct page **pages,
1458 size_t num_pages,
1459 loff_t pos,
1460 size_t write_bytes)
1461 {
1462 int i;
1463 int offset;
1464 int err = 0;
1465 int ret;
1466 int this_write;
1467 struct inode *inode = file->f_path.dentry->d_inode;
1468 struct buffer_head *bh;
1469 struct btrfs_file_extent_item *ei;
1470
1471 for (i = 0; i < num_pages; i++) {
1472 offset = pos & (PAGE_CACHE_SIZE -1);
1473 this_write = min(PAGE_CACHE_SIZE - offset, write_bytes);
1474 /* FIXME, one block at a time */
1475
1476 mutex_lock(&root->fs_info->fs_mutex);
1477 trans = btrfs_start_transaction(root, 1);
1478 btrfs_set_trans_block_group(trans, inode);
1479
1480 bh = page_buffers(pages[i]);
1481 if (buffer_mapped(bh) && bh->b_blocknr == 0) {
1482 struct btrfs_key key;
1483 struct btrfs_path *path;
1484 char *ptr;
1485 u32 datasize;
1486
1487 path = btrfs_alloc_path();
1488 BUG_ON(!path);
1489 key.objectid = inode->i_ino;
1490 key.offset = pages[i]->index << PAGE_CACHE_SHIFT;
1491 key.flags = 0;
1492 btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
1493 BUG_ON(write_bytes >= PAGE_CACHE_SIZE);
1494 datasize = offset +
1495 btrfs_file_extent_calc_inline_size(write_bytes);
1496 ret = btrfs_insert_empty_item(trans, root, path, &key,
1497 datasize);
1498 BUG_ON(ret);
1499 ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
1500 path->slots[0], struct btrfs_file_extent_item);
1501 btrfs_set_file_extent_generation(ei, trans->transid);
1502 btrfs_set_file_extent_type(ei,
1503 BTRFS_FILE_EXTENT_INLINE);
1504 ptr = btrfs_file_extent_inline_start(ei);
1505 memcpy(ptr, bh->b_data, offset + write_bytes);
1506 mark_buffer_dirty(path->nodes[0]);
1507 btrfs_free_path(path);
1508 } else {
1509 btrfs_csum_file_block(trans, root, inode->i_ino,
1510 pages[i]->index << PAGE_CACHE_SHIFT,
1511 kmap(pages[i]), PAGE_CACHE_SIZE);
1512 kunmap(pages[i]);
1513 }
1514 SetPageChecked(pages[i]);
1515 btrfs_update_inode_block_group(trans, inode);
1516 ret = btrfs_end_transaction(trans, root);
1517 BUG_ON(ret);
1518 mutex_unlock(&root->fs_info->fs_mutex);
1519
1520 ret = btrfs_commit_write(file, pages[i], offset,
1521 offset + this_write);
1522 pos += this_write;
1523 if (ret) {
1524 err = ret;
1525 goto failed;
1526 }
1527 WARN_ON(this_write > write_bytes);
1528 write_bytes -= this_write;
1529 }
1530 failed:
1531 return err;
1532 }
1533
1534 static int drop_extents(struct btrfs_trans_handle *trans,
1535 struct btrfs_root *root,
1536 struct inode *inode,
1537 u64 start, u64 end)
1538 {
1539 int ret;
1540 struct btrfs_key key;
1541 struct btrfs_leaf *leaf;
1542 int slot;
1543 struct btrfs_file_extent_item *extent;
1544 u64 extent_end = 0;
1545 int keep;
1546 struct btrfs_file_extent_item old;
1547 struct btrfs_path *path;
1548 u64 search_start = start;
1549 int bookend;
1550 int found_type;
1551 int found_extent;
1552 int found_inline;
1553
1554 path = btrfs_alloc_path();
1555 if (!path)
1556 return -ENOMEM;
1557 while(1) {
1558 btrfs_release_path(root, path);
1559 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
1560 search_start, -1);
1561 if (ret < 0)
1562 goto out;
1563 if (ret > 0) {
1564 if (path->slots[0] == 0) {
1565 ret = 0;
1566 goto out;
1567 }
1568 path->slots[0]--;
1569 }
1570 keep = 0;
1571 bookend = 0;
1572 found_extent = 0;
1573 found_inline = 0;
1574 extent = NULL;
1575 leaf = btrfs_buffer_leaf(path->nodes[0]);
1576 slot = path->slots[0];
1577 btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key);
1578 if (key.offset >= end || key.objectid != inode->i_ino) {
1579 ret = 0;
1580 goto out;
1581 }
1582 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) {
1583 ret = 0;
1584 goto out;
1585 }
1586 extent = btrfs_item_ptr(leaf, slot,
1587 struct btrfs_file_extent_item);
1588 found_type = btrfs_file_extent_type(extent);
1589 if (found_type == BTRFS_FILE_EXTENT_REG) {
1590 extent_end = key.offset +
1591 (btrfs_file_extent_num_blocks(extent) <<
1592 inode->i_blkbits);
1593 found_extent = 1;
1594 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
1595 found_inline = 1;
1596 extent_end = key.offset +
1597 btrfs_file_extent_inline_len(leaf->items + slot);
1598 }
1599
1600 if (!found_extent && !found_inline) {
1601 ret = 0;
1602 goto out;
1603 }
1604
1605 if (search_start >= extent_end) {
1606 ret = 0;
1607 goto out;
1608 }
1609
1610 search_start = extent_end;
1611
1612 if (end < extent_end && end >= key.offset) {
1613 if (found_extent) {
1614 memcpy(&old, extent, sizeof(old));
1615 ret = btrfs_inc_extent_ref(trans, root,
1616 btrfs_file_extent_disk_blocknr(&old),
1617 btrfs_file_extent_disk_num_blocks(&old));
1618 BUG_ON(ret);
1619 }
1620 WARN_ON(found_inline);
1621 bookend = 1;
1622 }
1623
1624 if (start > key.offset) {
1625 u64 new_num;
1626 u64 old_num;
1627 /* truncate existing extent */
1628 keep = 1;
1629 WARN_ON(start & (root->blocksize - 1));
1630 if (found_extent) {
1631 new_num = (start - key.offset) >>
1632 inode->i_blkbits;
1633 old_num = btrfs_file_extent_num_blocks(extent);
1634 inode->i_blocks -= (old_num - new_num) << 3;
1635 btrfs_set_file_extent_num_blocks(extent,
1636 new_num);
1637 mark_buffer_dirty(path->nodes[0]);
1638 } else {
1639 WARN_ON(1);
1640 /*
1641 ret = btrfs_truncate_item(trans, root, path,
1642 start - key.offset);
1643 BUG_ON(ret);
1644 */
1645 }
1646 }
1647 if (!keep) {
1648 u64 disk_blocknr = 0;
1649 u64 disk_num_blocks = 0;
1650 u64 extent_num_blocks = 0;
1651 if (found_extent) {
1652 disk_blocknr =
1653 btrfs_file_extent_disk_blocknr(extent);
1654 disk_num_blocks =
1655 btrfs_file_extent_disk_num_blocks(extent);
1656 extent_num_blocks =
1657 btrfs_file_extent_num_blocks(extent);
1658 }
1659 ret = btrfs_del_item(trans, root, path);
1660 BUG_ON(ret);
1661 btrfs_release_path(root, path);
1662 if (found_extent) {
1663 inode->i_blocks -=
1664 btrfs_file_extent_num_blocks(extent) << 3;
1665 ret = btrfs_free_extent(trans, root,
1666 disk_blocknr,
1667 disk_num_blocks, 0);
1668 }
1669
1670 BUG_ON(ret);
1671 if (!bookend && search_start >= end) {
1672 ret = 0;
1673 goto out;
1674 }
1675 if (!bookend)
1676 continue;
1677 }
1678 if (bookend && found_extent) {
1679 /* create bookend */
1680 struct btrfs_key ins;
1681 ins.objectid = inode->i_ino;
1682 ins.offset = end;
1683 ins.flags = 0;
1684 btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY);
1685
1686 btrfs_release_path(root, path);
1687 ret = btrfs_insert_empty_item(trans, root, path, &ins,
1688 sizeof(*extent));
1689 BUG_ON(ret);
1690 extent = btrfs_item_ptr(
1691 btrfs_buffer_leaf(path->nodes[0]),
1692 path->slots[0],
1693 struct btrfs_file_extent_item);
1694 btrfs_set_file_extent_disk_blocknr(extent,
1695 btrfs_file_extent_disk_blocknr(&old));
1696 btrfs_set_file_extent_disk_num_blocks(extent,
1697 btrfs_file_extent_disk_num_blocks(&old));
1698
1699 btrfs_set_file_extent_offset(extent,
1700 btrfs_file_extent_offset(&old) +
1701 ((end - key.offset) >> inode->i_blkbits));
1702 WARN_ON(btrfs_file_extent_num_blocks(&old) <
1703 (end - key.offset) >> inode->i_blkbits);
1704 btrfs_set_file_extent_num_blocks(extent,
1705 btrfs_file_extent_num_blocks(&old) -
1706 ((end - key.offset) >> inode->i_blkbits));
1707
1708 btrfs_set_file_extent_type(extent,
1709 BTRFS_FILE_EXTENT_REG);
1710 btrfs_set_file_extent_generation(extent,
1711 btrfs_file_extent_generation(&old));
1712 btrfs_mark_buffer_dirty(path->nodes[0]);
1713 inode->i_blocks +=
1714 btrfs_file_extent_num_blocks(extent) << 3;
1715 ret = 0;
1716 goto out;
1717 }
1718 }
1719 out:
1720 btrfs_free_path(path);
1721 return ret;
1722 }
1723
1724 static int prepare_pages(struct btrfs_root *root,
1725 struct file *file,
1726 struct page **pages,
1727 size_t num_pages,
1728 loff_t pos,
1729 unsigned long first_index,
1730 unsigned long last_index,
1731 size_t write_bytes,
1732 u64 alloc_extent_start)
1733 {
1734 int i;
1735 unsigned long index = pos >> PAGE_CACHE_SHIFT;
1736 struct inode *inode = file->f_path.dentry->d_inode;
1737 int offset;
1738 int err = 0;
1739 int this_write;
1740 struct buffer_head *bh;
1741 struct buffer_head *head;
1742 loff_t isize = i_size_read(inode);
1743
1744 memset(pages, 0, num_pages * sizeof(struct page *));
1745
1746 for (i = 0; i < num_pages; i++) {
1747 pages[i] = grab_cache_page(inode->i_mapping, index + i);
1748 if (!pages[i]) {
1749 err = -ENOMEM;
1750 goto failed_release;
1751 }
1752 offset = pos & (PAGE_CACHE_SIZE -1);
1753 this_write = min(PAGE_CACHE_SIZE - offset, write_bytes);
1754 create_empty_buffers(pages[i], root->fs_info->sb->s_blocksize,
1755 (1 << BH_Uptodate));
1756 head = page_buffers(pages[i]);
1757 bh = head;
1758 do {
1759 err = btrfs_map_bh_to_logical(root, bh,
1760 alloc_extent_start);
1761 BUG_ON(err);
1762 if (err)
1763 goto failed_truncate;
1764 bh = bh->b_this_page;
1765 if (alloc_extent_start)
1766 alloc_extent_start++;
1767 } while (bh != head);
1768 pos += this_write;
1769 WARN_ON(this_write > write_bytes);
1770 write_bytes -= this_write;
1771 }
1772 return 0;
1773
1774 failed_release:
1775 btrfs_drop_pages(pages, num_pages);
1776 return err;
1777
1778 failed_truncate:
1779 btrfs_drop_pages(pages, num_pages);
1780 if (pos > isize)
1781 vmtruncate(inode, isize);
1782 return err;
1783 }
1784
1785 static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
1786 size_t count, loff_t *ppos)
1787 {
1788 loff_t pos;
1789 size_t num_written = 0;
1790 int err = 0;
1791 int ret = 0;
1792 struct inode *inode = file->f_path.dentry->d_inode;
1793 struct btrfs_root *root = BTRFS_I(inode)->root;
1794 struct page *pages[8];
1795 struct page *pinned[2] = { NULL, NULL };
1796 unsigned long first_index;
1797 unsigned long last_index;
1798 u64 start_pos;
1799 u64 num_blocks;
1800 u64 alloc_extent_start;
1801 struct btrfs_trans_handle *trans;
1802 struct btrfs_key ins;
1803
1804 if (file->f_flags & O_DIRECT)
1805 return -EINVAL;
1806 pos = *ppos;
1807 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
1808 current->backing_dev_info = inode->i_mapping->backing_dev_info;
1809 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
1810 if (err)
1811 goto out;
1812 if (count == 0)
1813 goto out;
1814 err = remove_suid(file->f_path.dentry);
1815 if (err)
1816 goto out;
1817 file_update_time(file);
1818
1819 start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1);
1820 num_blocks = (count + pos - start_pos + root->blocksize - 1) >>
1821 inode->i_blkbits;
1822
1823 mutex_lock(&inode->i_mutex);
1824 first_index = pos >> PAGE_CACHE_SHIFT;
1825 last_index = (pos + count) >> PAGE_CACHE_SHIFT;
1826
1827 if ((first_index << PAGE_CACHE_SHIFT) < inode->i_size &&
1828 (pos & (PAGE_CACHE_SIZE - 1))) {
1829 pinned[0] = grab_cache_page(inode->i_mapping, first_index);
1830 if (!PageUptodate(pinned[0])) {
1831 ret = mpage_readpage(pinned[0], btrfs_get_block);
1832 BUG_ON(ret);
1833 } else {
1834 unlock_page(pinned[0]);
1835 }
1836 }
1837 if (first_index != last_index &&
1838 (last_index << PAGE_CACHE_SHIFT) < inode->i_size &&
1839 (count & (PAGE_CACHE_SIZE - 1))) {
1840 pinned[1] = grab_cache_page(inode->i_mapping, last_index);
1841 if (!PageUptodate(pinned[1])) {
1842 ret = mpage_readpage(pinned[1], btrfs_get_block);
1843 BUG_ON(ret);
1844 } else {
1845 unlock_page(pinned[1]);
1846 }
1847 }
1848
1849 mutex_lock(&root->fs_info->fs_mutex);
1850 trans = btrfs_start_transaction(root, 1);
1851 if (!trans) {
1852 err = -ENOMEM;
1853 mutex_unlock(&root->fs_info->fs_mutex);
1854 goto out_unlock;
1855 }
1856 btrfs_set_trans_block_group(trans, inode);
1857 /* FIXME blocksize != 4096 */
1858 inode->i_blocks += num_blocks << 3;
1859 if (start_pos < inode->i_size) {
1860 /* FIXME blocksize != pagesize */
1861 ret = drop_extents(trans, root, inode,
1862 start_pos,
1863 (pos + count + root->blocksize -1) &
1864 ~((u64)root->blocksize - 1));
1865 BUG_ON(ret);
1866 }
1867 if (inode->i_size >= PAGE_CACHE_SIZE || pos + count < inode->i_size ||
1868 pos + count - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) {
1869 ret = btrfs_alloc_extent(trans, root, inode->i_ino,
1870 num_blocks, 1, (u64)-1, &ins);
1871 BUG_ON(ret);
1872 ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
1873 start_pos, ins.objectid, ins.offset);
1874 BUG_ON(ret);
1875 } else {
1876 ins.offset = 0;
1877 ins.objectid = 0;
1878 }
1879 BUG_ON(ret);
1880 alloc_extent_start = ins.objectid;
1881 btrfs_update_inode_block_group(trans, inode);
1882 ret = btrfs_end_transaction(trans, root);
1883 mutex_unlock(&root->fs_info->fs_mutex);
1884
1885 while(count > 0) {
1886 size_t offset = pos & (PAGE_CACHE_SIZE - 1);
1887 size_t write_bytes = min(count, PAGE_CACHE_SIZE - offset);
1888 size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >>
1889 PAGE_CACHE_SHIFT;
1890
1891 memset(pages, 0, sizeof(pages));
1892 ret = prepare_pages(root, file, pages, num_pages,
1893 pos, first_index, last_index,
1894 write_bytes, alloc_extent_start);
1895 BUG_ON(ret);
1896
1897 /* FIXME blocks != pagesize */
1898 if (alloc_extent_start)
1899 alloc_extent_start += num_pages;
1900 ret = btrfs_copy_from_user(pos, num_pages,
1901 write_bytes, pages, buf);
1902 BUG_ON(ret);
1903
1904 ret = dirty_and_release_pages(NULL, root, file, pages,
1905 num_pages, pos, write_bytes);
1906 BUG_ON(ret);
1907 btrfs_drop_pages(pages, num_pages);
1908
1909 buf += write_bytes;
1910 count -= write_bytes;
1911 pos += write_bytes;
1912 num_written += write_bytes;
1913
1914 balance_dirty_pages_ratelimited(inode->i_mapping);
1915 cond_resched();
1916 }
1917 out_unlock:
1918 mutex_unlock(&inode->i_mutex);
1919 out:
1920 if (pinned[0])
1921 page_cache_release(pinned[0]);
1922 if (pinned[1])
1923 page_cache_release(pinned[1]);
1924 *ppos = pos;
1925 current->backing_dev_info = NULL;
1926 mark_inode_dirty(inode);
1927 return num_written ? num_written : err;
1928 }
1929
1930 static int btrfs_read_actor(read_descriptor_t *desc, struct page *page,
1931 unsigned long offset, unsigned long size)
1932 {
1933 char *kaddr;
1934 unsigned long left, count = desc->count;
1935 struct inode *inode = page->mapping->host;
1936
1937 if (size > count)
1938 size = count;
1939
1940 if (!PageChecked(page)) {
1941 /* FIXME, do it per block */
1942 struct btrfs_root *root = BTRFS_I(inode)->root;
1943
1944 int ret = btrfs_csum_verify_file_block(root,
1945 page->mapping->host->i_ino,
1946 page->index << PAGE_CACHE_SHIFT,
1947 kmap(page), PAGE_CACHE_SIZE);
1948 if (ret) {
1949 printk("failed to verify ino %lu page %lu\n",
1950 page->mapping->host->i_ino,
1951 page->index);
1952 memset(page_address(page), 0, PAGE_CACHE_SIZE);
1953 }
1954 SetPageChecked(page);
1955 kunmap(page);
1956 }
1957 /*
1958 * Faults on the destination of a read are common, so do it before
1959 * taking the kmap.
1960 */
1961 if (!fault_in_pages_writeable(desc->arg.buf, size)) {
1962 kaddr = kmap_atomic(page, KM_USER0);
1963 left = __copy_to_user_inatomic(desc->arg.buf,
1964 kaddr + offset, size);
1965 kunmap_atomic(kaddr, KM_USER0);
1966 if (left == 0)
1967 goto success;
1968 }
1969
1970 /* Do it the slow way */
1971 kaddr = kmap(page);
1972 left = __copy_to_user(desc->arg.buf, kaddr + offset, size);
1973 kunmap(page);
1974
1975 if (left) {
1976 size -= left;
1977 desc->error = -EFAULT;
1978 }
1979 success:
1980 desc->count = count - size;
1981 desc->written += size;
1982 desc->arg.buf += size;
1983 return size;
1984 }
1985
1986 /**
1987 * btrfs_file_aio_read - filesystem read routine
1988 * @iocb: kernel I/O control block
1989 * @iov: io vector request
1990 * @nr_segs: number of segments in the iovec
1991 * @pos: current file position
1992 */
1993 static ssize_t btrfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1994 unsigned long nr_segs, loff_t pos)
1995 {
1996 struct file *filp = iocb->ki_filp;
1997 ssize_t retval;
1998 unsigned long seg;
1999 size_t count;
2000 loff_t *ppos = &iocb->ki_pos;
2001
2002 count = 0;
2003 for (seg = 0; seg < nr_segs; seg++) {
2004 const struct iovec *iv = &iov[seg];
2005
2006 /*
2007 * If any segment has a negative length, or the cumulative
2008 * length ever wraps negative then return -EINVAL.
2009 */
2010 count += iv->iov_len;
2011 if (unlikely((ssize_t)(count|iv->iov_len) < 0))
2012 return -EINVAL;
2013 if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len))
2014 continue;
2015 if (seg == 0)
2016 return -EFAULT;
2017 nr_segs = seg;
2018 count -= iv->iov_len; /* This segment is no good */
2019 break;
2020 }
2021 retval = 0;
2022 if (count) {
2023 for (seg = 0; seg < nr_segs; seg++) {
2024 read_descriptor_t desc;
2025
2026 desc.written = 0;
2027 desc.arg.buf = iov[seg].iov_base;
2028 desc.count = iov[seg].iov_len;
2029 if (desc.count == 0)
2030 continue;
2031 desc.error = 0;
2032 do_generic_file_read(filp, ppos, &desc,
2033 btrfs_read_actor);
2034 retval += desc.written;
2035 if (desc.error) {
2036 retval = retval ?: desc.error;
2037 break;
2038 }
2039 }
2040 }
2041 return retval;
2042 }
2043
2044 static int create_subvol(struct btrfs_root *root, char *name, int namelen)
2045 {
2046 struct btrfs_trans_handle *trans;
2047 struct btrfs_key key;
2048 struct btrfs_root_item root_item;
2049 struct btrfs_inode_item *inode_item;
2050 struct buffer_head *subvol;
2051 struct btrfs_leaf *leaf;
2052 struct btrfs_root *new_root;
2053 struct inode *inode;
2054 struct inode *dir;
2055 int ret;
2056 u64 objectid;
2057 u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
2058
2059 mutex_lock(&root->fs_info->fs_mutex);
2060 trans = btrfs_start_transaction(root, 1);
2061 BUG_ON(!trans);
2062
2063 subvol = btrfs_alloc_free_block(trans, root, 0);
2064 if (subvol == NULL)
2065 return -ENOSPC;
2066 leaf = btrfs_buffer_leaf(subvol);
2067 btrfs_set_header_nritems(&leaf->header, 0);
2068 btrfs_set_header_level(&leaf->header, 0);
2069 btrfs_set_header_blocknr(&leaf->header, bh_blocknr(subvol));
2070 btrfs_set_header_generation(&leaf->header, trans->transid);
2071 btrfs_set_header_owner(&leaf->header, root->root_key.objectid);
2072 memcpy(leaf->header.fsid, root->fs_info->disk_super->fsid,
2073 sizeof(leaf->header.fsid));
2074 mark_buffer_dirty(subvol);
2075
2076 inode_item = &root_item.inode;
2077 memset(inode_item, 0, sizeof(*inode_item));
2078 btrfs_set_inode_generation(inode_item, 1);
2079 btrfs_set_inode_size(inode_item, 3);
2080 btrfs_set_inode_nlink(inode_item, 1);
2081 btrfs_set_inode_nblocks(inode_item, 1);
2082 btrfs_set_inode_mode(inode_item, S_IFDIR | 0755);
2083
2084 btrfs_set_root_blocknr(&root_item, bh_blocknr(subvol));
2085 btrfs_set_root_refs(&root_item, 1);
2086 brelse(subvol);
2087 subvol = NULL;
2088
2089 ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
2090 0, &objectid);
2091 BUG_ON(ret);
2092
2093 btrfs_set_root_dirid(&root_item, new_dirid);
2094
2095 key.objectid = objectid;
2096 key.offset = 1;
2097 key.flags = 0;
2098 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
2099 ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
2100 &root_item);
2101 BUG_ON(ret);
2102
2103 /*
2104 * insert the directory item
2105 */
2106 key.offset = (u64)-1;
2107 dir = root->fs_info->sb->s_root->d_inode;
2108 ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
2109 name, namelen, dir->i_ino, &key, 0);
2110 BUG_ON(ret);
2111
2112 ret = btrfs_commit_transaction(trans, root);
2113 BUG_ON(ret);
2114
2115 new_root = btrfs_read_fs_root(root->fs_info, &key);
2116 BUG_ON(!new_root);
2117
2118 trans = btrfs_start_transaction(new_root, 1);
2119 BUG_ON(!trans);
2120
2121 inode = btrfs_new_inode(trans, new_root, new_dirid,
2122 BTRFS_I(dir)->block_group, S_IFDIR | 0700);
2123 inode->i_op = &btrfs_dir_inode_operations;
2124 inode->i_fop = &btrfs_dir_file_operations;
2125
2126 ret = btrfs_make_empty_dir(trans, new_root, new_dirid, new_dirid);
2127 BUG_ON(ret);
2128
2129 inode->i_nlink = 1;
2130 inode->i_size = 6;
2131 ret = btrfs_update_inode(trans, new_root, inode);
2132 BUG_ON(ret);
2133
2134 ret = btrfs_commit_transaction(trans, new_root);
2135 BUG_ON(ret);
2136
2137 iput(inode);
2138
2139 mutex_unlock(&root->fs_info->fs_mutex);
2140 return 0;
2141 }
2142
2143 static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
2144 {
2145 struct btrfs_trans_handle *trans;
2146 struct btrfs_key key;
2147 struct btrfs_root_item new_root_item;
2148 int ret;
2149 u64 objectid;
2150
2151 if (!root->ref_cows)
2152 return -EINVAL;
2153
2154 mutex_lock(&root->fs_info->fs_mutex);
2155 trans = btrfs_start_transaction(root, 1);
2156 BUG_ON(!trans);
2157
2158 ret = btrfs_update_inode(trans, root, root->inode);
2159 BUG_ON(ret);
2160
2161 ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
2162 0, &objectid);
2163 BUG_ON(ret);
2164
2165 memcpy(&new_root_item, &root->root_item,
2166 sizeof(new_root_item));
2167
2168 key.objectid = objectid;
2169 key.offset = 1;
2170 key.flags = 0;
2171 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
2172 btrfs_set_root_blocknr(&new_root_item, bh_blocknr(root->node));
2173
2174 ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
2175 &new_root_item);
2176 BUG_ON(ret);
2177
2178 /*
2179 * insert the directory item
2180 */
2181 key.offset = (u64)-1;
2182 ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
2183 name, namelen,
2184 root->fs_info->sb->s_root->d_inode->i_ino,
2185 &key, 0);
2186
2187 BUG_ON(ret);
2188
2189 ret = btrfs_inc_root_ref(trans, root);
2190 BUG_ON(ret);
2191
2192 ret = btrfs_commit_transaction(trans, root);
2193 BUG_ON(ret);
2194 mutex_unlock(&root->fs_info->fs_mutex);
2195 return 0;
2196 }
2197
2198 static int add_disk(struct btrfs_root *root, char *name, int namelen)
2199 {
2200 struct block_device *bdev;
2201 struct btrfs_path *path;
2202 struct super_block *sb = root->fs_info->sb;
2203 struct btrfs_root *dev_root = root->fs_info->dev_root;
2204 struct btrfs_trans_handle *trans;
2205 struct btrfs_device_item *dev_item;
2206 struct btrfs_key key;
2207 u16 item_size;
2208 u64 num_blocks;
2209 u64 new_blocks;
2210 u64 device_id;
2211 int ret;
2212
2213 printk("adding disk %s\n", name);
2214 path = btrfs_alloc_path();
2215 if (!path)
2216 return -ENOMEM;
2217 num_blocks = btrfs_super_total_blocks(root->fs_info->disk_super);
2218 bdev = open_bdev_excl(name, O_RDWR, sb);
2219 if (IS_ERR(bdev)) {
2220 ret = PTR_ERR(bdev);
2221 printk("open bdev excl failed ret %d\n", ret);
2222 goto out_nolock;
2223 }
2224 set_blocksize(bdev, sb->s_blocksize);
2225 new_blocks = bdev->bd_inode->i_size >> sb->s_blocksize_bits;
2226 key.objectid = num_blocks;
2227 key.offset = new_blocks;
2228 key.flags = 0;
2229 btrfs_set_key_type(&key, BTRFS_DEV_ITEM_KEY);
2230
2231 mutex_lock(&dev_root->fs_info->fs_mutex);
2232 trans = btrfs_start_transaction(dev_root, 1);
2233 item_size = sizeof(*dev_item) + namelen;
2234 printk("insert empty on %Lu %Lu %u size %d\n", num_blocks, new_blocks, key.flags, item_size);
2235 ret = btrfs_insert_empty_item(trans, dev_root, path, &key, item_size);
2236 if (ret) {
2237 printk("insert failed %d\n", ret);
2238 close_bdev_excl(bdev);
2239 if (ret > 0)
2240 ret = -EEXIST;
2241 goto out;
2242 }
2243 dev_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
2244 path->slots[0], struct btrfs_device_item);
2245 btrfs_set_device_pathlen(dev_item, namelen);
2246 memcpy(dev_item + 1, name, namelen);
2247
2248 device_id = btrfs_super_last_device_id(root->fs_info->disk_super) + 1;
2249 btrfs_set_super_last_device_id(root->fs_info->disk_super, device_id);
2250 btrfs_set_device_id(dev_item, device_id);
2251 mark_buffer_dirty(path->nodes[0]);
2252
2253 ret = btrfs_insert_dev_radix(root, bdev, device_id, num_blocks,
2254 new_blocks);
2255
2256 if (!ret) {
2257 btrfs_set_super_total_blocks(root->fs_info->disk_super,
2258 num_blocks + new_blocks);
2259 i_size_write(root->fs_info->btree_inode,
2260 (num_blocks + new_blocks) <<
2261 root->fs_info->btree_inode->i_blkbits);
2262 }
2263
2264 out:
2265 ret = btrfs_commit_transaction(trans, dev_root);
2266 BUG_ON(ret);
2267 mutex_unlock(&root->fs_info->fs_mutex);
2268 out_nolock:
2269 btrfs_free_path(path);
2270
2271 return ret;
2272 }
2273
2274 static int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int
2275 cmd, unsigned long arg)
2276 {
2277 struct btrfs_root *root = BTRFS_I(inode)->root;
2278 struct btrfs_ioctl_vol_args vol_args;
2279 int ret = 0;
2280 struct btrfs_dir_item *di;
2281 int namelen;
2282 struct btrfs_path *path;
2283 u64 root_dirid;
2284
2285 switch (cmd) {
2286 case BTRFS_IOC_SNAP_CREATE:
2287 if (copy_from_user(&vol_args,
2288 (struct btrfs_ioctl_vol_args __user *)arg,
2289 sizeof(vol_args)))
2290 return -EFAULT;
2291 namelen = strlen(vol_args.name);
2292 if (namelen > BTRFS_VOL_NAME_MAX)
2293 return -EINVAL;
2294 path = btrfs_alloc_path();
2295 if (!path)
2296 return -ENOMEM;
2297 root_dirid = root->fs_info->sb->s_root->d_inode->i_ino,
2298 mutex_lock(&root->fs_info->fs_mutex);
2299 di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root,
2300 path, root_dirid,
2301 vol_args.name, namelen, 0);
2302 mutex_unlock(&root->fs_info->fs_mutex);
2303 btrfs_free_path(path);
2304 if (di && !IS_ERR(di))
2305 return -EEXIST;
2306
2307 if (root == root->fs_info->tree_root)
2308 ret = create_subvol(root, vol_args.name, namelen);
2309 else
2310 ret = create_snapshot(root, vol_args.name, namelen);
2311 WARN_ON(ret);
2312 break;
2313 case BTRFS_IOC_ADD_DISK:
2314 if (copy_from_user(&vol_args,
2315 (struct btrfs_ioctl_vol_args __user *)arg,
2316 sizeof(vol_args)))
2317 return -EFAULT;
2318 namelen = strlen(vol_args.name);
2319 if (namelen > BTRFS_VOL_NAME_MAX)
2320 return -EINVAL;
2321 vol_args.name[namelen] = '\0';
2322 ret = add_disk(root, vol_args.name, namelen);
2323 break;
2324 default:
2325 return -ENOTTY;
2326 }
2327 return ret;
2328 }
2329
2330 static struct kmem_cache *btrfs_inode_cachep;
2331 struct kmem_cache *btrfs_trans_handle_cachep;
2332 struct kmem_cache *btrfs_transaction_cachep;
2333 struct kmem_cache *btrfs_bit_radix_cachep;
2334 struct kmem_cache *btrfs_path_cachep;
2335
2336 /*
2337 * Called inside transaction, so use GFP_NOFS
2338 */
2339 static struct inode *btrfs_alloc_inode(struct super_block *sb)
2340 {
2341 struct btrfs_inode *ei;
2342
2343 ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS);
2344 if (!ei)
2345 return NULL;
2346 return &ei->vfs_inode;
2347 }
2348
2349 static void btrfs_destroy_inode(struct inode *inode)
2350 {
2351 WARN_ON(!list_empty(&inode->i_dentry));
2352 WARN_ON(inode->i_data.nrpages);
2353
2354 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
2355 }
2356
2357 static void init_once(void * foo, struct kmem_cache * cachep,
2358 unsigned long flags)
2359 {
2360 struct btrfs_inode *ei = (struct btrfs_inode *) foo;
2361
2362 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
2363 SLAB_CTOR_CONSTRUCTOR) {
2364 inode_init_once(&ei->vfs_inode);
2365 }
2366 }
2367
2368 static int init_inodecache(void)
2369 {
2370 btrfs_inode_cachep = kmem_cache_create("btrfs_inode_cache",
2371 sizeof(struct btrfs_inode),
2372 0, (SLAB_RECLAIM_ACCOUNT|
2373 SLAB_MEM_SPREAD),
2374 init_once, NULL);
2375 btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle_cache",
2376 sizeof(struct btrfs_trans_handle),
2377 0, (SLAB_RECLAIM_ACCOUNT|
2378 SLAB_MEM_SPREAD),
2379 NULL, NULL);
2380 btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction_cache",
2381 sizeof(struct btrfs_transaction),
2382 0, (SLAB_RECLAIM_ACCOUNT|
2383 SLAB_MEM_SPREAD),
2384 NULL, NULL);
2385 btrfs_path_cachep = kmem_cache_create("btrfs_path_cache",
2386 sizeof(struct btrfs_transaction),
2387 0, (SLAB_RECLAIM_ACCOUNT|
2388 SLAB_MEM_SPREAD),
2389 NULL, NULL);
2390 btrfs_bit_radix_cachep = kmem_cache_create("btrfs_radix",
2391 256,
2392 0, (SLAB_RECLAIM_ACCOUNT|
2393 SLAB_MEM_SPREAD |
2394 SLAB_DESTROY_BY_RCU),
2395 NULL, NULL);
2396 if (btrfs_inode_cachep == NULL || btrfs_trans_handle_cachep == NULL ||
2397 btrfs_transaction_cachep == NULL || btrfs_bit_radix_cachep == NULL)
2398 return -ENOMEM;
2399 return 0;
2400 }
2401
2402 static void destroy_inodecache(void)
2403 {
2404 kmem_cache_destroy(btrfs_inode_cachep);
2405 kmem_cache_destroy(btrfs_trans_handle_cachep);
2406 kmem_cache_destroy(btrfs_transaction_cachep);
2407 kmem_cache_destroy(btrfs_bit_radix_cachep);
2408 kmem_cache_destroy(btrfs_path_cachep);
2409 }
2410
2411 static int btrfs_get_sb(struct file_system_type *fs_type,
2412 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
2413 {
2414 return get_sb_bdev(fs_type, flags, dev_name, data,
2415 btrfs_fill_super, mnt);
2416 }
2417
2418
2419 static int btrfs_getattr(struct vfsmount *mnt,
2420 struct dentry *dentry, struct kstat *stat)
2421 {
2422 struct inode *inode = dentry->d_inode;
2423 generic_fillattr(inode, stat);
2424 stat->blksize = 256 * 1024;
2425 return 0;
2426 }
2427
2428 static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
2429 {
2430 struct btrfs_root *root = btrfs_sb(dentry->d_sb);
2431 struct btrfs_super_block *disk_super = root->fs_info->disk_super;
2432
2433 buf->f_namelen = BTRFS_NAME_LEN;
2434 buf->f_blocks = btrfs_super_total_blocks(disk_super);
2435 buf->f_bfree = buf->f_blocks - btrfs_super_blocks_used(disk_super);
2436 buf->f_bavail = buf->f_bfree;
2437 buf->f_bsize = dentry->d_sb->s_blocksize;
2438 buf->f_type = BTRFS_SUPER_MAGIC;
2439 return 0;
2440 }
2441
2442 static struct file_system_type btrfs_fs_type = {
2443 .owner = THIS_MODULE,
2444 .name = "btrfs",
2445 .get_sb = btrfs_get_sb,
2446 .kill_sb = kill_block_super,
2447 .fs_flags = FS_REQUIRES_DEV,
2448 };
2449
2450 static struct super_operations btrfs_super_ops = {
2451 .delete_inode = btrfs_delete_inode,
2452 .put_super = btrfs_put_super,
2453 .read_inode = btrfs_read_locked_inode,
2454 .write_super = btrfs_write_super,
2455 .sync_fs = btrfs_sync_fs,
2456 .write_inode = btrfs_write_inode,
2457 .dirty_inode = btrfs_dirty_inode,
2458 .alloc_inode = btrfs_alloc_inode,
2459 .destroy_inode = btrfs_destroy_inode,
2460 .statfs = btrfs_statfs,
2461 };
2462
2463 static struct inode_operations btrfs_dir_inode_operations = {
2464 .lookup = btrfs_lookup,
2465 .create = btrfs_create,
2466 .unlink = btrfs_unlink,
2467 .mkdir = btrfs_mkdir,
2468 .rmdir = btrfs_rmdir,
2469 };
2470
2471 static struct inode_operations btrfs_dir_ro_inode_operations = {
2472 .lookup = btrfs_lookup,
2473 };
2474
2475 static struct file_operations btrfs_dir_file_operations = {
2476 .llseek = generic_file_llseek,
2477 .read = generic_read_dir,
2478 .readdir = btrfs_readdir,
2479 .ioctl = btrfs_ioctl,
2480 };
2481
2482 static struct address_space_operations btrfs_aops = {
2483 .readpage = btrfs_readpage,
2484 .writepage = btrfs_writepage,
2485 .sync_page = block_sync_page,
2486 .prepare_write = btrfs_prepare_write,
2487 .commit_write = btrfs_commit_write,
2488 };
2489
2490 static struct inode_operations btrfs_file_inode_operations = {
2491 .truncate = btrfs_truncate,
2492 .getattr = btrfs_getattr,
2493 };
2494
2495 static struct file_operations btrfs_file_operations = {
2496 .llseek = generic_file_llseek,
2497 .read = do_sync_read,
2498 .aio_read = btrfs_file_aio_read,
2499 .write = btrfs_file_write,
2500 .mmap = generic_file_mmap,
2501 .open = generic_file_open,
2502 .ioctl = btrfs_ioctl,
2503 .fsync = btrfs_sync_file,
2504 };
2505
2506 static int __init init_btrfs_fs(void)
2507 {
2508 int err;
2509 printk("btrfs loaded!\n");
2510 err = init_inodecache();
2511 if (err)
2512 return err;
2513 kset_set_kset_s(&btrfs_subsys, fs_subsys);
2514 err = subsystem_register(&btrfs_subsys);
2515 if (err)
2516 goto out;
2517 return register_filesystem(&btrfs_fs_type);
2518 out:
2519 destroy_inodecache();
2520 return err;
2521 }
2522
2523 static void __exit exit_btrfs_fs(void)
2524 {
2525 destroy_inodecache();
2526 unregister_filesystem(&btrfs_fs_type);
2527 subsystem_unregister(&btrfs_subsys);
2528 printk("btrfs unloaded\n");
2529 }
2530
2531 module_init(init_btrfs_fs)
2532 module_exit(exit_btrfs_fs)
2533
2534 MODULE_LICENSE("GPL");
This page took 0.08184 seconds and 5 git commands to generate.