fs/btrfs/super.c

   1 #include <linux/module.h>
   2 #include <linux/buffer_head.h>
   3 #include <linux/fs.h>
   4 #include <linux/pagemap.h>
   5 #include <linux/highmem.h>
   6 #include <linux/time.h>
   7 #include <linux/init.h>
   8 #include <linux/string.h>
   9 #include <linux/smp_lock.h>
  10 #include <linux/backing-dev.h>
  11 #include <linux/mpage.h>
  12 #include <linux/swap.h>
  13 #include <linux/writeback.h>
  14 #include <linux/statfs.h>
  15 #include "ctree.h"
  16 #include "disk-io.h"
  17 #include "transaction.h"
  18 #include "btrfs_inode.h"
  19 #include "ioctl.h"
  20
  21 void btrfs_fsinfo_release(struct kobject *obj)
  22 {
  23         struct btrfs_fs_info *fsinfo = container_of(obj,
  24                                             struct btrfs_fs_info, kobj);
  25         kfree(fsinfo);
  26 }
  27
  28 struct kobj_type btrfs_fsinfo_ktype = {
  29         .release = btrfs_fsinfo_release,
  30 };
  31
  32 struct btrfs_iget_args {
  33         u64 ino;
  34         struct btrfs_root *root;
  35 };
  36
  37 decl_subsys(btrfs, &btrfs_fsinfo_ktype, NULL);
  38
  39 #define BTRFS_SUPER_MAGIC 0x9123682E
  40
  41 static struct inode_operations btrfs_dir_inode_operations;
  42 static struct inode_operations btrfs_dir_ro_inode_operations;
  43 static struct super_operations btrfs_super_ops;
  44 static struct file_operations btrfs_dir_file_operations;
  45 static struct inode_operations btrfs_file_inode_operations;
  46 static struct address_space_operations btrfs_aops;
  47 static struct file_operations btrfs_file_operations;
  48
  49 static void btrfs_read_locked_inode(struct inode *inode)
  50 {
  51         struct btrfs_path *path;
  52         struct btrfs_inode_item *inode_item;
  53         struct btrfs_root *root = BTRFS_I(inode)->root;
  54         struct btrfs_key location;
  55         struct btrfs_block_group_cache *alloc_group;
  56         u64 alloc_group_block;
  57         int ret;
  58
  59         path = btrfs_alloc_path();
  60         BUG_ON(!path);
  61         btrfs_init_path(path);
  62         mutex_lock(&root->fs_info->fs_mutex);
  63
  64         memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
  65         ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
  66         if (ret) {
  67                 btrfs_free_path(path);
  68                 goto make_bad;
  69         }
  70         inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
  71                                   path->slots[0],
  72                                   struct btrfs_inode_item);
  73
  74         inode->i_mode = btrfs_inode_mode(inode_item);
  75         inode->i_nlink = btrfs_inode_nlink(inode_item);
  76         inode->i_uid = btrfs_inode_uid(inode_item);
  77         inode->i_gid = btrfs_inode_gid(inode_item);
  78         inode->i_size = btrfs_inode_size(inode_item);
  79         inode->i_atime.tv_sec = btrfs_timespec_sec(&inode_item->atime);
  80         inode->i_atime.tv_nsec = btrfs_timespec_nsec(&inode_item->atime);
  81         inode->i_mtime.tv_sec = btrfs_timespec_sec(&inode_item->mtime);
  82         inode->i_mtime.tv_nsec = btrfs_timespec_nsec(&inode_item->mtime);
  83         inode->i_ctime.tv_sec = btrfs_timespec_sec(&inode_item->ctime);
  84         inode->i_ctime.tv_nsec = btrfs_timespec_nsec(&inode_item->ctime);
  85         inode->i_blocks = btrfs_inode_nblocks(inode_item);
  86         inode->i_generation = btrfs_inode_generation(inode_item);
  87         alloc_group_block = btrfs_inode_block_group(inode_item);
  88         ret = radix_tree_gang_lookup(&root->fs_info->block_group_radix,
  89                                      (void **)&alloc_group,
  90                                      alloc_group_block, 1);
  91         BUG_ON(!ret);
  92         BTRFS_I(inode)->block_group = alloc_group;
  93
  94         btrfs_free_path(path);
  95         inode_item = NULL;
  96
  97         mutex_unlock(&root->fs_info->fs_mutex);
  98
  99         switch (inode->i_mode & S_IFMT) {
 100 #if 0
 101         default:
 102                 init_special_inode(inode, inode->i_mode,
 103                                    btrfs_inode_rdev(inode_item));
 104                 break;
 105 #endif
 106         case S_IFREG:
 107                 inode->i_mapping->a_ops = &btrfs_aops;
 108                 inode->i_fop = &btrfs_file_operations;
 109                 inode->i_op = &btrfs_file_inode_operations;
 110                 break;
 111         case S_IFDIR:
 112                 inode->i_fop = &btrfs_dir_file_operations;
 113                 if (root == root->fs_info->tree_root)
 114                         inode->i_op = &btrfs_dir_ro_inode_operations;
 115                 else
 116                         inode->i_op = &btrfs_dir_inode_operations;
 117                 break;
 118         case S_IFLNK:
 119                 // inode->i_op = &page_symlink_inode_operations;
 120                 break;
 121         }
 122         return;
 123
 124 make_bad:
 125         btrfs_release_path(root, path);
 126         btrfs_free_path(path);
 127         mutex_unlock(&root->fs_info->fs_mutex);
 128         make_bad_inode(inode);
 129 }
 130
 131 static void fill_inode_item(struct btrfs_inode_item *item,
 132                             struct inode *inode)
 133 {
 134         btrfs_set_inode_uid(item, inode->i_uid);
 135         btrfs_set_inode_gid(item, inode->i_gid);
 136         btrfs_set_inode_size(item, inode->i_size);
 137         btrfs_set_inode_mode(item, inode->i_mode);
 138         btrfs_set_inode_nlink(item, inode->i_nlink);
 139         btrfs_set_timespec_sec(&item->atime, inode->i_atime.tv_sec);
 140         btrfs_set_timespec_nsec(&item->atime, inode->i_atime.tv_nsec);
 141         btrfs_set_timespec_sec(&item->mtime, inode->i_mtime.tv_sec);
 142         btrfs_set_timespec_nsec(&item->mtime, inode->i_mtime.tv_nsec);
 143         btrfs_set_timespec_sec(&item->ctime, inode->i_ctime.tv_sec);
 144         btrfs_set_timespec_nsec(&item->ctime, inode->i_ctime.tv_nsec);
 145         btrfs_set_inode_nblocks(item, inode->i_blocks);
 146         btrfs_set_inode_generation(item, inode->i_generation);
 147         btrfs_set_inode_block_group(item,
 148                                     BTRFS_I(inode)->block_group->key.objectid);
 149 }
 150
 151
 152 static int btrfs_update_inode(struct btrfs_trans_handle *trans,
 153                               struct btrfs_root *root,
 154                               struct inode *inode)
 155 {
 156         struct btrfs_inode_item *inode_item;
 157         struct btrfs_path *path;
 158         int ret;
 159
 160         path = btrfs_alloc_path();
 161         BUG_ON(!path);
 162         btrfs_init_path(path);
 163         ret = btrfs_lookup_inode(trans, root, path,
 164                                  &BTRFS_I(inode)->location, 1);
 165         if (ret) {
 166                 if (ret > 0)
 167                         ret = -ENOENT;
 168                 goto failed;
 169         }
 170
 171         inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
 172                                   path->slots[0],
 173                                   struct btrfs_inode_item);
 174
 175         fill_inode_item(inode_item, inode);
 176         btrfs_mark_buffer_dirty(path->nodes[0]);
 177         ret = 0;
 178 failed:
 179         btrfs_release_path(root, path);
 180         btrfs_free_path(path);
 181         return ret;
 182 }
 183
 184
 185 static int btrfs_unlink_trans(struct btrfs_trans_handle *trans,
 186                               struct btrfs_root *root,
 187                               struct inode *dir,
 188                               struct dentry *dentry)
 189 {
 190         struct btrfs_path *path;
 191         const char *name = dentry->d_name.name;
 192         int name_len = dentry->d_name.len;
 193         int ret = 0;
 194         u64 objectid;
 195         struct btrfs_dir_item *di;
 196
 197         path = btrfs_alloc_path();
 198         BUG_ON(!path);
 199         btrfs_init_path(path);
 200         di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
 201                                     name, name_len, -1);
 202         if (IS_ERR(di)) {
 203                 ret = PTR_ERR(di);
 204                 goto err;
 205         }
 206         if (!di) {
 207                 ret = -ENOENT;
 208                 goto err;
 209         }
 210         objectid = btrfs_disk_key_objectid(&di->location);
 211         ret = btrfs_delete_one_dir_name(trans, root, path, di);
 212         BUG_ON(ret);
 213         btrfs_release_path(root, path);
 214
 215         di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
 216                                          objectid, name, name_len, -1);
 217         if (IS_ERR(di)) {
 218                 ret = PTR_ERR(di);
 219                 goto err;
 220         }
 221         if (!di) {
 222                 ret = -ENOENT;
 223                 goto err;
 224         }
 225         ret = btrfs_delete_one_dir_name(trans, root, path, di);
 226         BUG_ON(ret);
 227
 228         dentry->d_inode->i_ctime = dir->i_ctime;
 229 err:
 230         btrfs_free_path(path);
 231         if (!ret) {
 232                 dir->i_size -= name_len * 2;
 233                 btrfs_update_inode(trans, root, dir);
 234                 drop_nlink(dentry->d_inode);
 235                 btrfs_update_inode(trans, root, dentry->d_inode);
 236                 dir->i_sb->s_dirt = 1;
 237         }
 238         return ret;
 239 }
 240
 241 static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
 242 {
 243         struct btrfs_root *root;
 244         struct btrfs_trans_handle *trans;
 245         int ret;
 246
 247         root = BTRFS_I(dir)->root;
 248         mutex_lock(&root->fs_info->fs_mutex);
 249         trans = btrfs_start_transaction(root, 1);
 250         btrfs_set_trans_block_group(trans, dir);
 251         ret = btrfs_unlink_trans(trans, root, dir, dentry);
 252         btrfs_end_transaction(trans, root);
 253         mutex_unlock(&root->fs_info->fs_mutex);
 254         return ret;
 255 }
 256
 257 static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
 258 {
 259         struct inode *inode = dentry->d_inode;
 260         int err;
 261         int ret;
 262         struct btrfs_root *root = BTRFS_I(dir)->root;
 263         struct btrfs_path *path;
 264         struct btrfs_key key;
 265         struct btrfs_trans_handle *trans;
 266         struct btrfs_key found_key;
 267         int found_type;
 268         struct btrfs_leaf *leaf;
 269         char *goodnames = "..";
 270
 271         path = btrfs_alloc_path();
 272         BUG_ON(!path);
 273         btrfs_init_path(path);
 274         mutex_lock(&root->fs_info->fs_mutex);
 275         trans = btrfs_start_transaction(root, 1);
 276         btrfs_set_trans_block_group(trans, dir);
 277         key.objectid = inode->i_ino;
 278         key.offset = (u64)-1;
 279         key.flags = (u32)-1;
 280         while(1) {
 281                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
 282                 if (ret < 0) {
 283                         err = ret;
 284                         goto out;
 285                 }
 286                 BUG_ON(ret == 0);
 287                 if (path->slots[0] == 0) {
 288                         err = -ENOENT;
 289                         goto out;
 290                 }
 291                 path->slots[0]--;
 292                 leaf = btrfs_buffer_leaf(path->nodes[0]);
 293                 btrfs_disk_key_to_cpu(&found_key,
 294                                       &leaf->items[path->slots[0]].key);
 295                 found_type = btrfs_key_type(&found_key);
 296                 if (found_key.objectid != inode->i_ino) {
 297                         err = -ENOENT;
 298                         goto out;
 299                 }
 300                 if ((found_type != BTRFS_DIR_ITEM_KEY &&
 301                      found_type != BTRFS_DIR_INDEX_KEY) ||
 302                     (!btrfs_match_dir_item_name(root, path, goodnames, 2) &&
 303                     !btrfs_match_dir_item_name(root, path, goodnames, 1))) {
 304                         err = -ENOTEMPTY;
 305                         goto out;
 306                 }
 307                 ret = btrfs_del_item(trans, root, path);
 308                 BUG_ON(ret);
 309
 310                 if (found_type == BTRFS_DIR_ITEM_KEY && found_key.offset == 1)
 311                         break;
 312                 btrfs_release_path(root, path);
 313         }
 314         ret = 0;
 315         btrfs_release_path(root, path);
 316
 317         /* now the directory is empty */
 318         err = btrfs_unlink_trans(trans, root, dir, dentry);
 319         if (!err) {
 320                 inode->i_size = 0;
 321         }
 322 out:
 323         btrfs_release_path(root, path);
 324         btrfs_free_path(path);
 325         mutex_unlock(&root->fs_info->fs_mutex);
 326         ret = btrfs_end_transaction(trans, root);
 327         if (ret && !err)
 328                 err = ret;
 329         return err;
 330 }
 331
 332 static int btrfs_free_inode(struct btrfs_trans_handle *trans,
 333                             struct btrfs_root *root,
 334                             struct inode *inode)
 335 {
 336         struct btrfs_path *path;
 337         int ret;
 338
 339         clear_inode(inode);
 340
 341         path = btrfs_alloc_path();
 342         BUG_ON(!path);
 343         btrfs_init_path(path);
 344         ret = btrfs_lookup_inode(trans, root, path,
 345                                  &BTRFS_I(inode)->location, -1);
 346         BUG_ON(ret);
 347         ret = btrfs_del_item(trans, root, path);
 348         BUG_ON(ret);
 349         btrfs_free_path(path);
 350         return ret;
 351 }
 352
 353 static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
 354                                    struct btrfs_root *root,
 355                                    struct inode *inode)
 356 {
 357         int ret;
 358         struct btrfs_path *path;
 359         struct btrfs_key key;
 360         struct btrfs_disk_key *found_key;
 361         struct btrfs_leaf *leaf;
 362         struct btrfs_file_extent_item *fi = NULL;
 363         u64 extent_start = 0;
 364         u64 extent_num_blocks = 0;
 365         int found_extent;
 366
 367         path = btrfs_alloc_path();
 368         BUG_ON(!path);
 369         /* FIXME, add redo link to tree so we don't leak on crash */
 370         key.objectid = inode->i_ino;
 371         key.offset = (u64)-1;
 372         key.flags = 0;
 373         /*
 374          * use BTRFS_CSUM_ITEM_KEY because it is larger than inline keys
 375          * or extent data
 376          */
 377         btrfs_set_key_type(&key, BTRFS_CSUM_ITEM_KEY);
 378         while(1) {
 379                 btrfs_init_path(path);
 380                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
 381                 if (ret < 0) {
 382                         goto error;
 383                 }
 384                 if (ret > 0) {
 385                         BUG_ON(path->slots[0] == 0);
 386                         path->slots[0]--;
 387                 }
 388                 leaf = btrfs_buffer_leaf(path->nodes[0]);
 389                 found_key = &leaf->items[path->slots[0]].key;
 390                 if (btrfs_disk_key_objectid(found_key) != inode->i_ino)
 391                         break;
 392                 if (btrfs_disk_key_type(found_key) != BTRFS_CSUM_ITEM_KEY &&
 393                     btrfs_disk_key_type(found_key) != BTRFS_EXTENT_DATA_KEY)
 394                         break;
 395                 if (btrfs_disk_key_offset(found_key) < inode->i_size)
 396                         break;
 397                 found_extent = 0;
 398                 if (btrfs_disk_key_type(found_key) == BTRFS_EXTENT_DATA_KEY) {
 399                         fi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
 400                                             path->slots[0],
 401                                             struct btrfs_file_extent_item);
 402                         if (btrfs_file_extent_type(fi) !=
 403                             BTRFS_FILE_EXTENT_INLINE) {
 404                                 extent_start =
 405                                         btrfs_file_extent_disk_blocknr(fi);
 406                                 extent_num_blocks =
 407                                         btrfs_file_extent_disk_num_blocks(fi);
 408                                 /* FIXME blocksize != 4096 */
 409                                 inode->i_blocks -=
 410                                         btrfs_file_extent_num_blocks(fi) << 3;
 411                                 found_extent = 1;
 412                         }
 413                 }
 414                 ret = btrfs_del_item(trans, root, path);
 415                 BUG_ON(ret);
 416                 btrfs_release_path(root, path);
 417                 if (found_extent) {
 418                         ret = btrfs_free_extent(trans, root, extent_start,
 419                                                 extent_num_blocks, 0);
 420                         BUG_ON(ret);
 421                 }
 422         }
 423         ret = 0;
 424 error:
 425         btrfs_release_path(root, path);
 426         btrfs_free_path(path);
 427         inode->i_sb->s_dirt = 1;
 428         return ret;
 429 }
 430
 431 static void btrfs_delete_inode(struct inode *inode)
 432 {
 433         struct btrfs_trans_handle *trans;
 434         struct btrfs_root *root = BTRFS_I(inode)->root;
 435         int ret;
 436
 437         truncate_inode_pages(&inode->i_data, 0);
 438         if (is_bad_inode(inode)) {
 439                 goto no_delete;
 440         }
 441         inode->i_size = 0;
 442         mutex_lock(&root->fs_info->fs_mutex);
 443         trans = btrfs_start_transaction(root, 1);
 444         btrfs_set_trans_block_group(trans, inode);
 445         if (S_ISREG(inode->i_mode)) {
 446                 ret = btrfs_truncate_in_trans(trans, root, inode);
 447                 BUG_ON(ret);
 448         }
 449         btrfs_free_inode(trans, root, inode);
 450         btrfs_end_transaction(trans, root);
 451         mutex_unlock(&root->fs_info->fs_mutex);
 452         return;
 453 no_delete:
 454         clear_inode(inode);
 455 }
 456
 457 static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
 458                                struct btrfs_key *location)
 459 {
 460         const char *name = dentry->d_name.name;
 461         int namelen = dentry->d_name.len;
 462         struct btrfs_dir_item *di;
 463         struct btrfs_path *path;
 464         struct btrfs_root *root = BTRFS_I(dir)->root;
 465         int ret;
 466
 467         path = btrfs_alloc_path();
 468         BUG_ON(!path);
 469         btrfs_init_path(path);
 470         di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name,
 471                                     namelen, 0);
 472         if (!di || IS_ERR(di)) {
 473                 location->objectid = 0;
 474                 ret = 0;
 475                 goto out;
 476         }
 477         btrfs_disk_key_to_cpu(location, &di->location);
 478 out:
 479         btrfs_release_path(root, path);
 480         btrfs_free_path(path);
 481         return ret;
 482 }
 483
 484 int fixup_tree_root_location(struct btrfs_root *root,
 485                              struct btrfs_key *location,
 486                              struct btrfs_root **sub_root)
 487 {
 488         struct btrfs_path *path;
 489         struct btrfs_root_item *ri;
 490
 491         if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY)
 492                 return 0;
 493         if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
 494                 return 0;
 495
 496         path = btrfs_alloc_path();
 497         BUG_ON(!path);
 498         mutex_lock(&root->fs_info->fs_mutex);
 499
 500         *sub_root = btrfs_read_fs_root(root->fs_info, location);
 501         if (IS_ERR(*sub_root))
 502                 return PTR_ERR(*sub_root);
 503
 504         ri = &(*sub_root)->root_item;
 505         location->objectid = btrfs_root_dirid(ri);
 506         location->flags = 0;
 507         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
 508         location->offset = 0;
 509
 510         btrfs_free_path(path);
 511         mutex_unlock(&root->fs_info->fs_mutex);
 512         return 0;
 513 }
 514
 515 int btrfs_init_locked_inode(struct inode *inode, void *p)
 516 {
 517         struct btrfs_iget_args *args = p;
 518         inode->i_ino = args->ino;
 519         BTRFS_I(inode)->root = args->root;
 520         return 0;
 521 }
 522
 523 int btrfs_find_actor(struct inode *inode, void *opaque)
 524 {
 525         struct btrfs_iget_args *args = opaque;
 526         return (args->ino == inode->i_ino &&
 527                 args->root == BTRFS_I(inode)->root);
 528 }
 529
 530 struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
 531                                 struct btrfs_root *root)
 532 {
 533         struct inode *inode;
 534         struct btrfs_iget_args args;
 535         args.ino = objectid;
 536         args.root = root;
 537
 538         inode = iget5_locked(s, objectid, btrfs_find_actor,
 539                              btrfs_init_locked_inode,
 540                              (void *)&args);
 541         return inode;
 542 }
 543
 544 static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
 545                                    struct nameidata *nd)
 546 {
 547         struct inode * inode;
 548         struct btrfs_inode *bi = BTRFS_I(dir);
 549         struct btrfs_root *root = bi->root;
 550         struct btrfs_root *sub_root = root;
 551         struct btrfs_key location;
 552         int ret;
 553
 554         if (dentry->d_name.len > BTRFS_NAME_LEN)
 555                 return ERR_PTR(-ENAMETOOLONG);
 556         mutex_lock(&root->fs_info->fs_mutex);
 557         ret = btrfs_inode_by_name(dir, dentry, &location);
 558         mutex_unlock(&root->fs_info->fs_mutex);
 559         if (ret < 0)
 560                 return ERR_PTR(ret);
 561         inode = NULL;
 562         if (location.objectid) {
 563                 ret = fixup_tree_root_location(root, &location, &sub_root);
 564                 if (ret < 0)
 565                         return ERR_PTR(ret);
 566                 if (ret > 0)
 567                         return ERR_PTR(-ENOENT);
 568                 inode = btrfs_iget_locked(dir->i_sb, location.objectid,
 569                                           sub_root);
 570                 if (!inode)
 571                         return ERR_PTR(-EACCES);
 572                 if (inode->i_state & I_NEW) {
 573                         if (sub_root != root) {
 574 printk("adding new root for inode %lu root %p (found %p)\n", inode->i_ino, sub_root, BTRFS_I(inode)->root);
 575                                 igrab(inode);
 576                                 sub_root->inode = inode;
 577                         }
 578                         BTRFS_I(inode)->root = sub_root;
 579                         memcpy(&BTRFS_I(inode)->location, &location,
 580                                sizeof(location));
 581                         btrfs_read_locked_inode(inode);
 582                         unlock_new_inode(inode);
 583                 }
 584         }
 585         return d_splice_alias(inode, dentry);
 586 }
 587
 588 static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 589 {
 590         struct inode *inode = filp->f_path.dentry->d_inode;
 591         struct btrfs_root *root = BTRFS_I(inode)->root;
 592         struct btrfs_item *item;
 593         struct btrfs_dir_item *di;
 594         struct btrfs_key key;
 595         struct btrfs_path *path;
 596         int ret;
 597         u32 nritems;
 598         struct btrfs_leaf *leaf;
 599         int slot;
 600         int advance;
 601         unsigned char d_type = DT_UNKNOWN;
 602         int over = 0;
 603         u32 di_cur;
 604         u32 di_total;
 605         u32 di_len;
 606         int key_type = BTRFS_DIR_INDEX_KEY;
 607
 608         /* FIXME, use a real flag for deciding about the key type */
 609         if (root->fs_info->tree_root == root)
 610                 key_type = BTRFS_DIR_ITEM_KEY;
 611         mutex_lock(&root->fs_info->fs_mutex);
 612         key.objectid = inode->i_ino;
 613         key.flags = 0;
 614         btrfs_set_key_type(&key, key_type);
 615         key.offset = filp->f_pos;
 616         path = btrfs_alloc_path();
 617         btrfs_init_path(path);
 618         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
 619         if (ret < 0)
 620                 goto err;
 621         advance = 0;
 622         while(1) {
 623                 leaf = btrfs_buffer_leaf(path->nodes[0]);
 624                 nritems = btrfs_header_nritems(&leaf->header);
 625                 slot = path->slots[0];
 626                 if (advance || slot >= nritems) {
 627                         if (slot >= nritems -1) {
 628                                 ret = btrfs_next_leaf(root, path);
 629                                 if (ret)
 630                                         break;
 631                                 leaf = btrfs_buffer_leaf(path->nodes[0]);
 632                                 nritems = btrfs_header_nritems(&leaf->header);
 633                                 slot = path->slots[0];
 634                         } else {
 635                                 slot++;
 636                                 path->slots[0]++;
 637                         }
 638                 }
 639                 advance = 1;
 640                 item = leaf->items + slot;
 641                 if (btrfs_disk_key_objectid(&item->key) != key.objectid)
 642                         break;
 643                 if (btrfs_disk_key_type(&item->key) != key_type)
 644                         break;
 645                 if (btrfs_disk_key_offset(&item->key) < filp->f_pos)
 646                         continue;
 647                 filp->f_pos = btrfs_disk_key_offset(&item->key);
 648                 advance = 1;
 649                 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
 650                 di_cur = 0;
 651                 di_total = btrfs_item_size(leaf->items + slot);
 652                 while(di_cur < di_total) {
 653                         over = filldir(dirent, (const char *)(di + 1),
 654                                        btrfs_dir_name_len(di),
 655                                        btrfs_disk_key_offset(&item->key),
 656                                        btrfs_disk_key_objectid(&di->location),
 657                                        d_type);
 658                         if (over)
 659                                 goto nopos;
 660                         di_len = btrfs_dir_name_len(di) + sizeof(*di);
 661                         di_cur += di_len;
 662                         di = (struct btrfs_dir_item *)((char *)di + di_len);
 663                 }
 664         }
 665         filp->f_pos++;
 666 nopos:
 667         ret = 0;
 668 err:
 669         btrfs_release_path(root, path);
 670         btrfs_free_path(path);
 671         mutex_unlock(&root->fs_info->fs_mutex);
 672         return ret;
 673 }
 674
 675 static void btrfs_put_super (struct super_block * sb)
 676 {
 677         struct btrfs_root *root = btrfs_sb(sb);
 678         int ret;
 679
 680         ret = close_ctree(root);
 681         if (ret) {
 682                 printk("close ctree returns %d\n", ret);
 683         }
 684         sb->s_fs_info = NULL;
 685 }
 686
 687 static int btrfs_fill_super(struct super_block * sb, void * data, int silent)
 688 {
 689         struct inode * inode;
 690         struct dentry * root_dentry;
 691         struct btrfs_super_block *disk_super;
 692         struct btrfs_root *tree_root;
 693         struct btrfs_inode *bi;
 694
 695         sb->s_maxbytes = MAX_LFS_FILESIZE;
 696         sb->s_magic = BTRFS_SUPER_MAGIC;
 697         sb->s_op = &btrfs_super_ops;
 698         sb->s_time_gran = 1;
 699
 700         tree_root = open_ctree(sb);
 701
 702         if (!tree_root) {
 703                 printk("btrfs: open_ctree failed\n");
 704                 return -EIO;
 705         }
 706         sb->s_fs_info = tree_root;
 707         disk_super = tree_root->fs_info->disk_super;
 708         printk("read in super total blocks %Lu root %Lu\n",
 709                btrfs_super_total_blocks(disk_super),
 710                btrfs_super_root_dir(disk_super));
 711
 712         inode = btrfs_iget_locked(sb, btrfs_super_root_dir(disk_super),
 713                                   tree_root);
 714         bi = BTRFS_I(inode);
 715         bi->location.objectid = inode->i_ino;
 716         bi->location.offset = 0;
 717         bi->location.flags = 0;
 718         bi->root = tree_root;
 719         btrfs_set_key_type(&bi->location, BTRFS_INODE_ITEM_KEY);
 720
 721         if (!inode)
 722                 return -ENOMEM;
 723         if (inode->i_state & I_NEW) {
 724                 btrfs_read_locked_inode(inode);
 725                 unlock_new_inode(inode);
 726         }
 727
 728         root_dentry = d_alloc_root(inode);
 729         if (!root_dentry) {
 730                 iput(inode);
 731                 return -ENOMEM;
 732         }
 733         sb->s_root = root_dentry;
 734
 735         return 0;
 736 }
 737
 738 static int btrfs_write_inode(struct inode *inode, int wait)
 739 {
 740         struct btrfs_root *root = BTRFS_I(inode)->root;
 741         struct btrfs_trans_handle *trans;
 742         int ret = 0;
 743
 744         if (wait) {
 745                 mutex_lock(&root->fs_info->fs_mutex);
 746                 trans = btrfs_start_transaction(root, 1);
 747                 btrfs_set_trans_block_group(trans, inode);
 748                 ret = btrfs_commit_transaction(trans, root);
 749                 mutex_unlock(&root->fs_info->fs_mutex);
 750         }
 751         return ret;
 752 }
 753
 754 static void btrfs_dirty_inode(struct inode *inode)
 755 {
 756         struct btrfs_root *root = BTRFS_I(inode)->root;
 757         struct btrfs_trans_handle *trans;
 758
 759         mutex_lock(&root->fs_info->fs_mutex);
 760         trans = btrfs_start_transaction(root, 1);
 761         btrfs_set_trans_block_group(trans, inode);
 762         btrfs_update_inode(trans, root, inode);
 763         btrfs_end_transaction(trans, root);
 764         mutex_unlock(&root->fs_info->fs_mutex);
 765 }
 766
 767 static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
 768                                      struct btrfs_root *root,
 769                                      u64 objectid,
 770                                      struct btrfs_block_group_cache *group,
 771                                      int mode)
 772 {
 773         struct inode *inode;
 774         struct btrfs_inode_item inode_item;
 775         struct btrfs_key *location;
 776         int ret;
 777
 778         inode = new_inode(root->fs_info->sb);
 779         if (!inode)
 780                 return ERR_PTR(-ENOMEM);
 781
 782         BTRFS_I(inode)->root = root;
 783         group = btrfs_find_block_group(root, group, 0);
 784         BTRFS_I(inode)->block_group = group;
 785
 786         inode->i_uid = current->fsuid;
 787         inode->i_gid = current->fsgid;
 788         inode->i_mode = mode;
 789         inode->i_ino = objectid;
 790         inode->i_blocks = 0;
 791         inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
 792         fill_inode_item(&inode_item, inode);
 793         location = &BTRFS_I(inode)->location;
 794         location->objectid = objectid;
 795         location->flags = 0;
 796         location->offset = 0;
 797         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
 798
 799         ret = btrfs_insert_inode(trans, root, objectid, &inode_item);
 800         BUG_ON(ret);
 801
 802         insert_inode_hash(inode);
 803         return inode;
 804 }
 805
 806 static int btrfs_add_link(struct btrfs_trans_handle *trans,
 807                             struct dentry *dentry, struct inode *inode)
 808 {
 809         int ret;
 810         struct btrfs_key key;
 811         struct btrfs_root *root = BTRFS_I(dentry->d_parent->d_inode)->root;
 812         key.objectid = inode->i_ino;
 813         key.flags = 0;
 814         btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
 815         key.offset = 0;
 816
 817         ret = btrfs_insert_dir_item(trans, root,
 818                                     dentry->d_name.name, dentry->d_name.len,
 819                                     dentry->d_parent->d_inode->i_ino,
 820                                     &key, 0);
 821         if (ret == 0) {
 822                 dentry->d_parent->d_inode->i_size += dentry->d_name.len * 2;
 823                 ret = btrfs_update_inode(trans, root,
 824                                          dentry->d_parent->d_inode);
 825         }
 826         return ret;
 827 }
 828
 829 static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
 830                             struct dentry *dentry, struct inode *inode)
 831 {
 832         int err = btrfs_add_link(trans, dentry, inode);
 833         if (!err) {
 834                 d_instantiate(dentry, inode);
 835                 return 0;
 836         }
 837         if (err > 0)
 838                 err = -EEXIST;
 839         return err;
 840 }
 841
 842 static int btrfs_create(struct inode *dir, struct dentry *dentry,
 843                         int mode, struct nameidata *nd)
 844 {
 845         struct btrfs_trans_handle *trans;
 846         struct btrfs_root *root = BTRFS_I(dir)->root;
 847         struct inode *inode;
 848         int err;
 849         int drop_inode = 0;
 850         u64 objectid;
 851
 852         mutex_lock(&root->fs_info->fs_mutex);
 853         trans = btrfs_start_transaction(root, 1);
 854         btrfs_set_trans_block_group(trans, dir);
 855
 856         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
 857         if (err) {
 858                 err = -ENOSPC;
 859                 goto out_unlock;
 860         }
 861
 862         inode = btrfs_new_inode(trans, root, objectid,
 863                                 BTRFS_I(dir)->block_group, mode);
 864         err = PTR_ERR(inode);
 865         if (IS_ERR(inode))
 866                 goto out_unlock;
 867
 868         btrfs_set_trans_block_group(trans, inode);
 869         err = btrfs_add_nondir(trans, dentry, inode);
 870         if (err)
 871                 drop_inode = 1;
 872         else {
 873                 inode->i_mapping->a_ops = &btrfs_aops;
 874                 inode->i_fop = &btrfs_file_operations;
 875                 inode->i_op = &btrfs_file_inode_operations;
 876         }
 877         dir->i_sb->s_dirt = 1;
 878         btrfs_update_inode_block_group(trans, inode);
 879         btrfs_update_inode_block_group(trans, dir);
 880 out_unlock:
 881         btrfs_end_transaction(trans, root);
 882         mutex_unlock(&root->fs_info->fs_mutex);
 883
 884         if (drop_inode) {
 885                 inode_dec_link_count(inode);
 886                 iput(inode);
 887         }
 888         return err;
 889 }
 890
 891 static int btrfs_make_empty_dir(struct btrfs_trans_handle *trans,
 892                                 struct btrfs_root *root,
 893                                 u64 objectid, u64 dirid)
 894 {
 895         int ret;
 896         char buf[2];
 897         struct btrfs_key key;
 898
 899         buf[0] = '.';
 900         buf[1] = '.';
 901
 902         key.objectid = objectid;
 903         key.offset = 0;
 904         key.flags = 0;
 905         btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
 906
 907         ret = btrfs_insert_dir_item(trans, root, buf, 1, objectid,
 908                                     &key, 1);
 909         if (ret)
 910                 goto error;
 911         key.objectid = dirid;
 912         ret = btrfs_insert_dir_item(trans, root, buf, 2, objectid,
 913                                     &key, 1);
 914         if (ret)
 915                 goto error;
 916 error:
 917         return ret;
 918 }
 919
 920 static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 921 {
 922         struct inode *inode;
 923         struct btrfs_trans_handle *trans;
 924         struct btrfs_root *root = BTRFS_I(dir)->root;
 925         int err = 0;
 926         int drop_on_err = 0;
 927         u64 objectid;
 928
 929         mutex_lock(&root->fs_info->fs_mutex);
 930         trans = btrfs_start_transaction(root, 1);
 931         btrfs_set_trans_block_group(trans, dir);
 932         if (IS_ERR(trans)) {
 933                 err = PTR_ERR(trans);
 934                 goto out_unlock;
 935         }
 936
 937         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
 938         if (err) {
 939                 err = -ENOSPC;
 940                 goto out_unlock;
 941         }
 942
 943         inode = btrfs_new_inode(trans, root, objectid,
 944                                 BTRFS_I(dir)->block_group, S_IFDIR | mode);
 945         if (IS_ERR(inode)) {
 946                 err = PTR_ERR(inode);
 947                 goto out_fail;
 948         }
 949         drop_on_err = 1;
 950         inode->i_op = &btrfs_dir_inode_operations;
 951         inode->i_fop = &btrfs_dir_file_operations;
 952         btrfs_set_trans_block_group(trans, inode);
 953
 954         err = btrfs_make_empty_dir(trans, root, inode->i_ino, dir->i_ino);
 955         if (err)
 956                 goto out_fail;
 957
 958         inode->i_size = 6;
 959         err = btrfs_update_inode(trans, root, inode);
 960         if (err)
 961                 goto out_fail;
 962         err = btrfs_add_link(trans, dentry, inode);
 963         if (err)
 964                 goto out_fail;
 965         d_instantiate(dentry, inode);
 966         drop_on_err = 0;
 967         dir->i_sb->s_dirt = 1;
 968         btrfs_update_inode_block_group(trans, inode);
 969         btrfs_update_inode_block_group(trans, dir);
 970
 971 out_fail:
 972         btrfs_end_transaction(trans, root);
 973 out_unlock:
 974         mutex_unlock(&root->fs_info->fs_mutex);
 975         if (drop_on_err)
 976                 iput(inode);
 977         return err;
 978 }
 979
 980 static int btrfs_sync_file(struct file *file,
 981                            struct dentry *dentry, int datasync)
 982 {
 983         struct inode *inode = dentry->d_inode;
 984         struct btrfs_root *root = BTRFS_I(inode)->root;
 985         int ret;
 986         struct btrfs_trans_handle *trans;
 987
 988         mutex_lock(&root->fs_info->fs_mutex);
 989         trans = btrfs_start_transaction(root, 1);
 990         if (!trans) {
 991                 ret = -ENOMEM;
 992                 goto out;
 993         }
 994         ret = btrfs_commit_transaction(trans, root);
 995         mutex_unlock(&root->fs_info->fs_mutex);
 996 out:
 997         return ret > 0 ? EIO : ret;
 998 }
 999
1000 static int btrfs_sync_fs(struct super_block *sb, int wait)
1001 {
1002         struct btrfs_trans_handle *trans;
1003         struct btrfs_root *root;
1004         int ret;
1005         root = btrfs_sb(sb);
1006
1007         sb->s_dirt = 0;
1008         if (!wait) {
1009                 filemap_flush(root->fs_info->btree_inode->i_mapping);
1010                 return 0;
1011         }
1012         mutex_lock(&root->fs_info->fs_mutex);
1013         trans = btrfs_start_transaction(root, 1);
1014         ret = btrfs_commit_transaction(trans, root);
1015         sb->s_dirt = 0;
1016         BUG_ON(ret);
1017 printk("btrfs sync_fs\n");
1018         mutex_unlock(&root->fs_info->fs_mutex);
1019         return 0;
1020 }
1021
1022 static int btrfs_get_block_lock(struct inode *inode, sector_t iblock,
1023                            struct buffer_head *result, int create)
1024 {
1025         int ret;
1026         int err = 0;
1027         u64 blocknr;
1028         u64 extent_start = 0;
1029         u64 extent_end = 0;
1030         u64 objectid = inode->i_ino;
1031         u32 found_type;
1032         struct btrfs_path *path;
1033         struct btrfs_root *root = BTRFS_I(inode)->root;
1034         struct btrfs_file_extent_item *item;
1035         struct btrfs_leaf *leaf;
1036         struct btrfs_disk_key *found_key;
1037
1038         path = btrfs_alloc_path();
1039         BUG_ON(!path);
1040         btrfs_init_path(path);
1041         if (create) {
1042                 WARN_ON(1);
1043         }
1044
1045         ret = btrfs_lookup_file_extent(NULL, root, path,
1046                                        inode->i_ino,
1047                                        iblock << inode->i_blkbits, 0);
1048         if (ret < 0) {
1049                 err = ret;
1050                 goto out;
1051         }
1052
1053         if (ret != 0) {
1054                 if (path->slots[0] == 0) {
1055                         btrfs_release_path(root, path);
1056                         goto out;
1057                 }
1058                 path->slots[0]--;
1059         }
1060
1061         item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
1062                               struct btrfs_file_extent_item);
1063         leaf = btrfs_buffer_leaf(path->nodes[0]);
1064         blocknr = btrfs_file_extent_disk_blocknr(item);
1065         blocknr += btrfs_file_extent_offset(item);
1066
1067         /* are we inside the extent that was found? */
1068         found_key = &leaf->items[path->slots[0]].key;
1069         found_type = btrfs_disk_key_type(found_key);
1070         if (btrfs_disk_key_objectid(found_key) != objectid ||
1071             found_type != BTRFS_EXTENT_DATA_KEY) {
1072                 extent_end = 0;
1073                 extent_start = 0;
1074                 btrfs_release_path(root, path);
1075                 goto out;
1076         }
1077         found_type = btrfs_file_extent_type(item);
1078         extent_start = btrfs_disk_key_offset(&leaf->items[path->slots[0]].key);
1079         if (found_type == BTRFS_FILE_EXTENT_REG) {
1080                 extent_start = extent_start >> inode->i_blkbits;
1081                 extent_end = extent_start + btrfs_file_extent_num_blocks(item);
1082                 if (iblock >= extent_start && iblock < extent_end) {
1083                         err = 0;
1084                         btrfs_map_bh_to_logical(root, result, blocknr +
1085                                                 iblock - extent_start);
1086                         goto out;
1087                 }
1088         } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
1089                 char *ptr;
1090                 char *map;
1091                 u32 size;
1092                 size = btrfs_file_extent_inline_len(leaf->items +
1093                                                     path->slots[0]);
1094                 extent_end = (extent_start + size) >> inode->i_blkbits;
1095                 extent_start >>= inode->i_blkbits;
1096                 if (iblock < extent_start || iblock > extent_end) {
1097                         goto out;
1098                 }
1099                 ptr = btrfs_file_extent_inline_start(item);
1100                 map = kmap(result->b_page);
1101                 memcpy(map, ptr, size);
1102                 memset(map + size, 0, PAGE_CACHE_SIZE - size);
1103                 flush_dcache_page(result->b_page);
1104                 kunmap(result->b_page);
1105                 set_buffer_uptodate(result);
1106                 SetPageChecked(result->b_page);
1107                 btrfs_map_bh_to_logical(root, result, 0);
1108         }
1109 out:
1110         btrfs_release_path(root, path);
1111         btrfs_free_path(path);
1112         return err;
1113 }
1114
1115 static int btrfs_get_block(struct inode *inode, sector_t iblock,
1116                            struct buffer_head *result, int create)
1117 {
1118         int err;
1119         struct btrfs_root *root = BTRFS_I(inode)->root;
1120         mutex_lock(&root->fs_info->fs_mutex);
1121         err = btrfs_get_block_lock(inode, iblock, result, create);
1122         mutex_unlock(&root->fs_info->fs_mutex);
1123         return err;
1124 }
1125
1126 static int btrfs_prepare_write(struct file *file, struct page *page,
1127                                unsigned from, unsigned to)
1128 {
1129         return nobh_prepare_write(page, from, to, btrfs_get_block);
1130 }
1131
1132 static void btrfs_write_super(struct super_block *sb)
1133 {
1134         btrfs_sync_fs(sb, 1);
1135 }
1136
1137 static int btrfs_readpage(struct file *file, struct page *page)
1138 {
1139         return mpage_readpage(page, btrfs_get_block);
1140 }
1141
1142 /*
1143  * While block_write_full_page is writing back the dirty buffers under
1144  * the page lock, whoever dirtied the buffers may decide to clean them
1145  * again at any time.  We handle that by only looking at the buffer
1146  * state inside lock_buffer().
1147  *
1148  * If block_write_full_page() is called for regular writeback
1149  * (wbc->sync_mode == WB_SYNC_NONE) then it will redirty a page which has a
1150  * locked buffer.   This only can happen if someone has written the buffer
1151  * directly, with submit_bh().  At the address_space level PageWriteback
1152  * prevents this contention from occurring.
1153  */
1154 static int __btrfs_write_full_page(struct inode *inode, struct page *page,
1155                                    struct writeback_control *wbc)
1156 {
1157         int err;
1158         sector_t block;
1159         sector_t last_block;
1160         struct buffer_head *bh, *head;
1161         const unsigned blocksize = 1 << inode->i_blkbits;
1162         int nr_underway = 0;
1163
1164         BUG_ON(!PageLocked(page));
1165
1166         last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
1167
1168         if (!page_has_buffers(page)) {
1169                 create_empty_buffers(page, blocksize,
1170                                         (1 << BH_Dirty)|(1 << BH_Uptodate));
1171         }
1172
1173         /*
1174          * Be very careful.  We have no exclusion from __set_page_dirty_buffers
1175          * here, and the (potentially unmapped) buffers may become dirty at
1176          * any time.  If a buffer becomes dirty here after we've inspected it
1177          * then we just miss that fact, and the page stays dirty.
1178          *
1179          * Buffers outside i_size may be dirtied by __set_page_dirty_buffers;
1180          * handle that here by just cleaning them.
1181          */
1182
1183         block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1184         head = page_buffers(page);
1185         bh = head;
1186
1187         /*
1188          * Get all the dirty buffers mapped to disk addresses and
1189          * handle any aliases from the underlying blockdev's mapping.
1190          */
1191         do {
1192                 if (block > last_block) {
1193                         /*
1194                          * mapped buffers outside i_size will occur, because
1195                          * this page can be outside i_size when there is a
1196                          * truncate in progress.
1197                          */
1198                         /*
1199                          * The buffer was zeroed by block_write_full_page()
1200                          */
1201                         clear_buffer_dirty(bh);
1202                         set_buffer_uptodate(bh);
1203                 } else if (!buffer_mapped(bh) && buffer_dirty(bh)) {
1204                         WARN_ON(bh->b_size != blocksize);
1205                         err = btrfs_get_block(inode, block, bh, 0);
1206                         if (err)
1207                                 goto recover;
1208                         if (buffer_new(bh)) {
1209                                 /* blockdev mappings never come here */
1210                                 clear_buffer_new(bh);
1211                                 unmap_underlying_metadata(bh->b_bdev,
1212                                                         bh->b_blocknr);
1213                         }
1214                 }
1215                 bh = bh->b_this_page;
1216                 block++;
1217         } while (bh != head);
1218
1219         do {
1220                 if (!buffer_mapped(bh))
1221                         continue;
1222                 /*
1223                  * If it's a fully non-blocking write attempt and we cannot
1224                  * lock the buffer then redirty the page.  Note that this can
1225                  * potentially cause a busy-wait loop from pdflush and kswapd
1226                  * activity, but those code paths have their own higher-level
1227                  * throttling.
1228                  */
1229                 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
1230                         lock_buffer(bh);
1231                 } else if (test_set_buffer_locked(bh)) {
1232                         redirty_page_for_writepage(wbc, page);
1233                         continue;
1234                 }
1235                 if (test_clear_buffer_dirty(bh) && bh->b_blocknr != 0) {
1236                         mark_buffer_async_write(bh);
1237                 } else {
1238                         unlock_buffer(bh);
1239                 }
1240         } while ((bh = bh->b_this_page) != head);
1241
1242         /*
1243          * The page and its buffers are protected by PageWriteback(), so we can
1244          * drop the bh refcounts early.
1245          */
1246         BUG_ON(PageWriteback(page));
1247         set_page_writeback(page);
1248
1249         do {
1250                 struct buffer_head *next = bh->b_this_page;
1251                 if (buffer_async_write(bh)) {
1252                         submit_bh(WRITE, bh);
1253                         nr_underway++;
1254                 }
1255                 bh = next;
1256         } while (bh != head);
1257         unlock_page(page);
1258
1259         err = 0;
1260 done:
1261         if (nr_underway == 0) {
1262                 /*
1263                  * The page was marked dirty, but the buffers were
1264                  * clean.  Someone wrote them back by hand with
1265                  * ll_rw_block/submit_bh.  A rare case.
1266                  */
1267                 int uptodate = 1;
1268                 do {
1269                         if (!buffer_uptodate(bh)) {
1270                                 uptodate = 0;
1271                                 break;
1272                         }
1273                         bh = bh->b_this_page;
1274                 } while (bh != head);
1275                 if (uptodate)
1276                         SetPageUptodate(page);
1277                 end_page_writeback(page);
1278                 /*
1279                  * The page and buffer_heads can be released at any time from
1280                  * here on.
1281                  */
1282                 wbc->pages_skipped++;   /* We didn't write this page */
1283         }
1284         return err;
1285
1286 recover:
1287         /*
1288          * ENOSPC, or some other error.  We may already have added some
1289          * blocks to the file, so we need to write these out to avoid
1290          * exposing stale data.
1291          * The page is currently locked and not marked for writeback
1292          */
1293         bh = head;
1294         /* Recovery: lock and submit the mapped buffers */
1295         do {
1296                 if (buffer_mapped(bh) && buffer_dirty(bh)) {
1297                         lock_buffer(bh);
1298                         mark_buffer_async_write(bh);
1299                 } else {
1300                         /*
1301                          * The buffer may have been set dirty during
1302                          * attachment to a dirty page.
1303                          */
1304                         clear_buffer_dirty(bh);
1305                 }
1306         } while ((bh = bh->b_this_page) != head);
1307         SetPageError(page);
1308         BUG_ON(PageWriteback(page));
1309         set_page_writeback(page);
1310         do {
1311                 struct buffer_head *next = bh->b_this_page;
1312                 if (buffer_async_write(bh)) {
1313                         clear_buffer_dirty(bh);
1314                         submit_bh(WRITE, bh);
1315                         nr_underway++;
1316                 }
1317                 bh = next;
1318         } while (bh != head);
1319         unlock_page(page);
1320         goto done;
1321 }
1322
1323 /*
1324  * The generic ->writepage function for buffer-backed address_spaces
1325  */
1326 static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
1327 {
1328         struct inode * const inode = page->mapping->host;
1329         loff_t i_size = i_size_read(inode);
1330         const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
1331         unsigned offset;
1332         void *kaddr;
1333
1334         /* Is the page fully inside i_size? */
1335         if (page->index < end_index)
1336                 return __btrfs_write_full_page(inode, page, wbc);
1337
1338         /* Is the page fully outside i_size? (truncate in progress) */
1339         offset = i_size & (PAGE_CACHE_SIZE-1);
1340         if (page->index >= end_index+1 || !offset) {
1341                 /*
1342                  * The page may have dirty, unmapped buffers.  For example,
1343                  * they may have been added in ext3_writepage().  Make them
1344                  * freeable here, so the page does not leak.
1345                  */
1346                 block_invalidatepage(page, 0);
1347                 unlock_page(page);
1348                 return 0; /* don't care */
1349         }
1350
1351         /*
1352          * The page straddles i_size.  It must be zeroed out on each and every
1353          * writepage invokation because it may be mmapped.  "A file is mapped
1354          * in multiples of the page size.  For a file that is not a multiple of
1355          * the  page size, the remaining memory is zeroed when mapped, and
1356          * writes to that region are not written out to the file."
1357          */
1358         kaddr = kmap_atomic(page, KM_USER0);
1359         memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
1360         flush_dcache_page(page);
1361         kunmap_atomic(kaddr, KM_USER0);
1362         return __btrfs_write_full_page(inode, page, wbc);
1363 }
1364
1365 static void btrfs_truncate(struct inode *inode)
1366 {
1367         struct btrfs_root *root = BTRFS_I(inode)->root;
1368         int ret;
1369         struct btrfs_trans_handle *trans;
1370
1371         if (!S_ISREG(inode->i_mode))
1372                 return;
1373         if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
1374                 return;
1375
1376         nobh_truncate_page(inode->i_mapping, inode->i_size);
1377
1378         /* FIXME, add redo link to tree so we don't leak on crash */
1379         mutex_lock(&root->fs_info->fs_mutex);
1380         trans = btrfs_start_transaction(root, 1);
1381         btrfs_set_trans_block_group(trans, inode);
1382         ret = btrfs_truncate_in_trans(trans, root, inode);
1383         BUG_ON(ret);
1384         ret = btrfs_end_transaction(trans, root);
1385         BUG_ON(ret);
1386         mutex_unlock(&root->fs_info->fs_mutex);
1387         mark_inode_dirty(inode);
1388 }
1389
1390 /*
1391  * Make sure any changes to nobh_commit_write() are reflected in
1392  * nobh_truncate_page(), since it doesn't call commit_write().
1393  */
1394 static int btrfs_commit_write(struct file *file, struct page *page,
1395                               unsigned from, unsigned to)
1396 {
1397         struct inode *inode = page->mapping->host;
1398         struct buffer_head *bh;
1399         loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
1400
1401         SetPageUptodate(page);
1402         bh = page_buffers(page);
1403         if (buffer_mapped(bh) && bh->b_blocknr != 0) {
1404                 set_page_dirty(page);
1405         }
1406         if (pos > inode->i_size) {
1407                 i_size_write(inode, pos);
1408                 mark_inode_dirty(inode);
1409         }
1410         return 0;
1411 }
1412
1413 static int btrfs_copy_from_user(loff_t pos, int num_pages, int write_bytes,
1414                                 struct page **prepared_pages,
1415                                 const char __user * buf)
1416 {
1417         long page_fault = 0;
1418         int i;
1419         int offset = pos & (PAGE_CACHE_SIZE - 1);
1420
1421         for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) {
1422                 size_t count = min_t(size_t,
1423                                      PAGE_CACHE_SIZE - offset, write_bytes);
1424                 struct page *page = prepared_pages[i];
1425                 fault_in_pages_readable(buf, count);
1426
1427                 /* Copy data from userspace to the current page */
1428                 kmap(page);
1429                 page_fault = __copy_from_user(page_address(page) + offset,
1430                                               buf, count);
1431                 /* Flush processor's dcache for this page */
1432                 flush_dcache_page(page);
1433                 kunmap(page);
1434                 buf += count;
1435                 write_bytes -= count;
1436
1437                 if (page_fault)
1438                         break;
1439         }
1440         return page_fault ? -EFAULT : 0;
1441 }
1442
1443 static void btrfs_drop_pages(struct page **pages, size_t num_pages)
1444 {
1445         size_t i;
1446         for (i = 0; i < num_pages; i++) {
1447                 if (!pages[i])
1448                         break;
1449                 unlock_page(pages[i]);
1450                 mark_page_accessed(pages[i]);
1451                 page_cache_release(pages[i]);
1452         }
1453 }
1454 static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
1455                                    struct btrfs_root *root,
1456                                    struct file *file,
1457                                    struct page **pages,
1458                                    size_t num_pages,
1459                                    loff_t pos,
1460                                    size_t write_bytes)
1461 {
1462         int i;
1463         int offset;
1464         int err = 0;
1465         int ret;
1466         int this_write;
1467         struct inode *inode = file->f_path.dentry->d_inode;
1468         struct buffer_head *bh;
1469         struct btrfs_file_extent_item *ei;
1470
1471         for (i = 0; i < num_pages; i++) {
1472                 offset = pos & (PAGE_CACHE_SIZE -1);
1473                 this_write = min(PAGE_CACHE_SIZE - offset, write_bytes);
1474                 /* FIXME, one block at a time */
1475
1476                 mutex_lock(&root->fs_info->fs_mutex);
1477                 trans = btrfs_start_transaction(root, 1);
1478                 btrfs_set_trans_block_group(trans, inode);
1479
1480                 bh = page_buffers(pages[i]);
1481                 if (buffer_mapped(bh) && bh->b_blocknr == 0) {
1482                         struct btrfs_key key;
1483                         struct btrfs_path *path;
1484                         char *ptr;
1485                         u32 datasize;
1486
1487                         path = btrfs_alloc_path();
1488                         BUG_ON(!path);
1489                         key.objectid = inode->i_ino;
1490                         key.offset = pages[i]->index << PAGE_CACHE_SHIFT;
1491                         key.flags = 0;
1492                         btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
1493                         BUG_ON(write_bytes >= PAGE_CACHE_SIZE);
1494                         datasize = offset +
1495                                 btrfs_file_extent_calc_inline_size(write_bytes);
1496                         ret = btrfs_insert_empty_item(trans, root, path, &key,
1497                                                       datasize);
1498                         BUG_ON(ret);
1499                         ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
1500                                path->slots[0], struct btrfs_file_extent_item);
1501                         btrfs_set_file_extent_generation(ei, trans->transid);
1502                         btrfs_set_file_extent_type(ei,
1503                                                    BTRFS_FILE_EXTENT_INLINE);
1504                         ptr = btrfs_file_extent_inline_start(ei);
1505                         memcpy(ptr, bh->b_data, offset + write_bytes);
1506                         mark_buffer_dirty(path->nodes[0]);
1507                         btrfs_free_path(path);
1508                 } else {
1509                         btrfs_csum_file_block(trans, root, inode->i_ino,
1510                                       pages[i]->index << PAGE_CACHE_SHIFT,
1511                                       kmap(pages[i]), PAGE_CACHE_SIZE);
1512                         kunmap(pages[i]);
1513                 }
1514                 SetPageChecked(pages[i]);
1515                 btrfs_update_inode_block_group(trans, inode);
1516                 ret = btrfs_end_transaction(trans, root);
1517                 BUG_ON(ret);
1518                 mutex_unlock(&root->fs_info->fs_mutex);
1519
1520                 ret = btrfs_commit_write(file, pages[i], offset,
1521                                          offset + this_write);
1522                 pos += this_write;
1523                 if (ret) {
1524                         err = ret;
1525                         goto failed;
1526                 }
1527                 WARN_ON(this_write > write_bytes);
1528                 write_bytes -= this_write;
1529         }
1530 failed:
1531         return err;
1532 }
1533
1534 static int drop_extents(struct btrfs_trans_handle *trans,
1535                           struct btrfs_root *root,
1536                           struct inode *inode,
1537                           u64 start, u64 end)
1538 {
1539         int ret;
1540         struct btrfs_key key;
1541         struct btrfs_leaf *leaf;
1542         int slot;
1543         struct btrfs_file_extent_item *extent;
1544         u64 extent_end = 0;
1545         int keep;
1546         struct btrfs_file_extent_item old;
1547         struct btrfs_path *path;
1548         u64 search_start = start;
1549         int bookend;
1550         int found_type;
1551         int found_extent;
1552         int found_inline;
1553
1554         path = btrfs_alloc_path();
1555         if (!path)
1556                 return -ENOMEM;
1557         while(1) {
1558                 btrfs_release_path(root, path);
1559                 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
1560                                                search_start, -1);
1561                 if (ret < 0)
1562                         goto out;
1563                 if (ret > 0) {
1564                         if (path->slots[0] == 0) {
1565                                 ret = 0;
1566                                 goto out;
1567                         }
1568                         path->slots[0]--;
1569                 }
1570                 keep = 0;
1571                 bookend = 0;
1572                 found_extent = 0;
1573                 found_inline = 0;
1574                 extent = NULL;
1575                 leaf = btrfs_buffer_leaf(path->nodes[0]);
1576                 slot = path->slots[0];
1577                 btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key);
1578                 if (key.offset >= end || key.objectid != inode->i_ino) {
1579                         ret = 0;
1580                         goto out;
1581                 }
1582                 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) {
1583                         ret = 0;
1584                         goto out;
1585                 }
1586                 extent = btrfs_item_ptr(leaf, slot,
1587                                         struct btrfs_file_extent_item);
1588                 found_type = btrfs_file_extent_type(extent);
1589                 if (found_type == BTRFS_FILE_EXTENT_REG) {
1590                         extent_end = key.offset +
1591                                 (btrfs_file_extent_num_blocks(extent) <<
1592                                  inode->i_blkbits);
1593                         found_extent = 1;
1594                 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
1595                         found_inline = 1;
1596                         extent_end = key.offset +
1597                              btrfs_file_extent_inline_len(leaf->items + slot);
1598                 }
1599
1600                 if (!found_extent && !found_inline) {
1601                         ret = 0;
1602                         goto out;
1603                 }
1604
1605                 if (search_start >= extent_end) {
1606                         ret = 0;
1607                         goto out;
1608                 }
1609
1610                 search_start = extent_end;
1611
1612                 if (end < extent_end && end >= key.offset) {
1613                         if (found_extent) {
1614                                 memcpy(&old, extent, sizeof(old));
1615                                 ret = btrfs_inc_extent_ref(trans, root,
1616                                       btrfs_file_extent_disk_blocknr(&old),
1617                                       btrfs_file_extent_disk_num_blocks(&old));
1618                                 BUG_ON(ret);
1619                         }
1620                         WARN_ON(found_inline);
1621                         bookend = 1;
1622                 }
1623
1624                 if (start > key.offset) {
1625                         u64 new_num;
1626                         u64 old_num;
1627                         /* truncate existing extent */
1628                         keep = 1;
1629                         WARN_ON(start & (root->blocksize - 1));
1630                         if (found_extent) {
1631                                 new_num = (start - key.offset) >>
1632                                         inode->i_blkbits;
1633                                 old_num = btrfs_file_extent_num_blocks(extent);
1634                                 inode->i_blocks -= (old_num - new_num) << 3;
1635                                 btrfs_set_file_extent_num_blocks(extent,
1636                                                                  new_num);
1637                                 mark_buffer_dirty(path->nodes[0]);
1638                         } else {
1639                                 WARN_ON(1);
1640                                 /*
1641                                 ret = btrfs_truncate_item(trans, root, path,
1642                                                           start - key.offset);
1643                                 BUG_ON(ret);
1644                                 */
1645                         }
1646                 }
1647                 if (!keep) {
1648                         u64 disk_blocknr = 0;
1649                         u64 disk_num_blocks = 0;
1650                         u64 extent_num_blocks = 0;
1651                         if (found_extent) {
1652                                 disk_blocknr =
1653                                       btrfs_file_extent_disk_blocknr(extent);
1654                                 disk_num_blocks =
1655                                       btrfs_file_extent_disk_num_blocks(extent);
1656                                 extent_num_blocks =
1657                                       btrfs_file_extent_num_blocks(extent);
1658                         }
1659                         ret = btrfs_del_item(trans, root, path);
1660                         BUG_ON(ret);
1661                         btrfs_release_path(root, path);
1662                         if (found_extent) {
1663                                 inode->i_blocks -=
1664                                 btrfs_file_extent_num_blocks(extent) << 3;
1665                                 ret = btrfs_free_extent(trans, root,
1666                                                         disk_blocknr,
1667                                                         disk_num_blocks, 0);
1668                         }
1669
1670                         BUG_ON(ret);
1671                         if (!bookend && search_start >= end) {
1672                                 ret = 0;
1673                                 goto out;
1674                         }
1675                         if (!bookend)
1676                                 continue;
1677                 }
1678                 if (bookend && found_extent) {
1679                         /* create bookend */
1680                         struct btrfs_key ins;
1681                         ins.objectid = inode->i_ino;
1682                         ins.offset = end;
1683                         ins.flags = 0;
1684                         btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY);
1685
1686                         btrfs_release_path(root, path);
1687                         ret = btrfs_insert_empty_item(trans, root, path, &ins,
1688                                                       sizeof(*extent));
1689                         BUG_ON(ret);
1690                         extent = btrfs_item_ptr(
1691                                     btrfs_buffer_leaf(path->nodes[0]),
1692                                     path->slots[0],
1693                                     struct btrfs_file_extent_item);
1694                         btrfs_set_file_extent_disk_blocknr(extent,
1695                                     btrfs_file_extent_disk_blocknr(&old));
1696                         btrfs_set_file_extent_disk_num_blocks(extent,
1697                                     btrfs_file_extent_disk_num_blocks(&old));
1698
1699                         btrfs_set_file_extent_offset(extent,
1700                                     btrfs_file_extent_offset(&old) +
1701                                     ((end - key.offset) >> inode->i_blkbits));
1702                         WARN_ON(btrfs_file_extent_num_blocks(&old) <
1703                                 (end - key.offset) >> inode->i_blkbits);
1704                         btrfs_set_file_extent_num_blocks(extent,
1705                                     btrfs_file_extent_num_blocks(&old) -
1706                                     ((end - key.offset) >> inode->i_blkbits));
1707
1708                         btrfs_set_file_extent_type(extent,
1709                                                    BTRFS_FILE_EXTENT_REG);
1710                         btrfs_set_file_extent_generation(extent,
1711                                     btrfs_file_extent_generation(&old));
1712                         btrfs_mark_buffer_dirty(path->nodes[0]);
1713                         inode->i_blocks +=
1714                                 btrfs_file_extent_num_blocks(extent) << 3;
1715                         ret = 0;
1716                         goto out;
1717                 }
1718         }
1719 out:
1720         btrfs_free_path(path);
1721         return ret;
1722 }
1723
1724 static int prepare_pages(struct btrfs_root *root,
1725                          struct file *file,
1726                          struct page **pages,
1727                          size_t num_pages,
1728                          loff_t pos,
1729                          unsigned long first_index,
1730                          unsigned long last_index,
1731                          size_t write_bytes,
1732                          u64 alloc_extent_start)
1733 {
1734         int i;
1735         unsigned long index = pos >> PAGE_CACHE_SHIFT;
1736         struct inode *inode = file->f_path.dentry->d_inode;
1737         int offset;
1738         int err = 0;
1739         int this_write;
1740         struct buffer_head *bh;
1741         struct buffer_head *head;
1742         loff_t isize = i_size_read(inode);
1743
1744         memset(pages, 0, num_pages * sizeof(struct page *));
1745
1746         for (i = 0; i < num_pages; i++) {
1747                 pages[i] = grab_cache_page(inode->i_mapping, index + i);
1748                 if (!pages[i]) {
1749                         err = -ENOMEM;
1750                         goto failed_release;
1751                 }
1752                 offset = pos & (PAGE_CACHE_SIZE -1);
1753                 this_write = min(PAGE_CACHE_SIZE - offset, write_bytes);
1754                 create_empty_buffers(pages[i], root->fs_info->sb->s_blocksize,
1755                                      (1 << BH_Uptodate));
1756                 head = page_buffers(pages[i]);
1757                 bh = head;
1758                 do {
1759                         err = btrfs_map_bh_to_logical(root, bh,
1760                                                       alloc_extent_start);
1761                         BUG_ON(err);
1762                         if (err)
1763                                 goto failed_truncate;
1764                         bh = bh->b_this_page;
1765                         if (alloc_extent_start)
1766                                 alloc_extent_start++;
1767                 } while (bh != head);
1768                 pos += this_write;
1769                 WARN_ON(this_write > write_bytes);
1770                 write_bytes -= this_write;
1771         }
1772         return 0;
1773
1774 failed_release:
1775         btrfs_drop_pages(pages, num_pages);
1776         return err;
1777
1778 failed_truncate:
1779         btrfs_drop_pages(pages, num_pages);
1780         if (pos > isize)
1781                 vmtruncate(inode, isize);
1782         return err;
1783 }
1784
1785 static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
1786                                 size_t count, loff_t *ppos)
1787 {
1788         loff_t pos;
1789         size_t num_written = 0;
1790         int err = 0;
1791         int ret = 0;
1792         struct inode *inode = file->f_path.dentry->d_inode;
1793         struct btrfs_root *root = BTRFS_I(inode)->root;
1794         struct page *pages[8];
1795         struct page *pinned[2] = { NULL, NULL };
1796         unsigned long first_index;
1797         unsigned long last_index;
1798         u64 start_pos;
1799         u64 num_blocks;
1800         u64 alloc_extent_start;
1801         struct btrfs_trans_handle *trans;
1802         struct btrfs_key ins;
1803
1804         if (file->f_flags & O_DIRECT)
1805                 return -EINVAL;
1806         pos = *ppos;
1807         vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
1808         current->backing_dev_info = inode->i_mapping->backing_dev_info;
1809         err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
1810         if (err)
1811                 goto out;
1812         if (count == 0)
1813                 goto out;
1814         err = remove_suid(file->f_path.dentry);
1815         if (err)
1816                 goto out;
1817         file_update_time(file);
1818
1819         start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1);
1820         num_blocks = (count + pos - start_pos + root->blocksize - 1) >>
1821                         inode->i_blkbits;
1822
1823         mutex_lock(&inode->i_mutex);
1824         first_index = pos >> PAGE_CACHE_SHIFT;
1825         last_index = (pos + count) >> PAGE_CACHE_SHIFT;
1826
1827         if ((first_index << PAGE_CACHE_SHIFT) < inode->i_size &&
1828             (pos & (PAGE_CACHE_SIZE - 1))) {
1829                 pinned[0] = grab_cache_page(inode->i_mapping, first_index);
1830                 if (!PageUptodate(pinned[0])) {
1831                         ret = mpage_readpage(pinned[0], btrfs_get_block);
1832                         BUG_ON(ret);
1833                 } else {
1834                         unlock_page(pinned[0]);
1835                 }
1836         }
1837         if (first_index != last_index &&
1838             (last_index << PAGE_CACHE_SHIFT) < inode->i_size &&
1839             (count & (PAGE_CACHE_SIZE - 1))) {
1840                 pinned[1] = grab_cache_page(inode->i_mapping, last_index);
1841                 if (!PageUptodate(pinned[1])) {
1842                         ret = mpage_readpage(pinned[1], btrfs_get_block);
1843                         BUG_ON(ret);
1844                 } else {
1845                         unlock_page(pinned[1]);
1846                 }
1847         }
1848
1849         mutex_lock(&root->fs_info->fs_mutex);
1850         trans = btrfs_start_transaction(root, 1);
1851         if (!trans) {
1852                 err = -ENOMEM;
1853                 mutex_unlock(&root->fs_info->fs_mutex);
1854                 goto out_unlock;
1855         }
1856         btrfs_set_trans_block_group(trans, inode);
1857         /* FIXME blocksize != 4096 */
1858         inode->i_blocks += num_blocks << 3;
1859         if (start_pos < inode->i_size) {
1860                 /* FIXME blocksize != pagesize */
1861                 ret = drop_extents(trans, root, inode,
1862                                    start_pos,
1863                                    (pos + count + root->blocksize -1) &
1864                                    ~((u64)root->blocksize - 1));
1865                 BUG_ON(ret);
1866         }
1867         if (inode->i_size >= PAGE_CACHE_SIZE || pos + count < inode->i_size ||
1868             pos + count - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) {
1869                 ret = btrfs_alloc_extent(trans, root, inode->i_ino,
1870                                          num_blocks, 1, (u64)-1, &ins);
1871                 BUG_ON(ret);
1872                 ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
1873                                        start_pos, ins.objectid, ins.offset);
1874                 BUG_ON(ret);
1875         } else {
1876                 ins.offset = 0;
1877                 ins.objectid = 0;
1878         }
1879         BUG_ON(ret);
1880         alloc_extent_start = ins.objectid;
1881         btrfs_update_inode_block_group(trans, inode);
1882         ret = btrfs_end_transaction(trans, root);
1883         mutex_unlock(&root->fs_info->fs_mutex);
1884
1885         while(count > 0) {
1886                 size_t offset = pos & (PAGE_CACHE_SIZE - 1);
1887                 size_t write_bytes = min(count, PAGE_CACHE_SIZE - offset);
1888                 size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >>
1889                                         PAGE_CACHE_SHIFT;
1890
1891                 memset(pages, 0, sizeof(pages));
1892                 ret = prepare_pages(root, file, pages, num_pages,
1893                                     pos, first_index, last_index,
1894                                     write_bytes, alloc_extent_start);
1895                 BUG_ON(ret);
1896
1897                 /* FIXME blocks != pagesize */
1898                 if (alloc_extent_start)
1899                         alloc_extent_start += num_pages;
1900                 ret = btrfs_copy_from_user(pos, num_pages,
1901                                            write_bytes, pages, buf);
1902                 BUG_ON(ret);
1903
1904                 ret = dirty_and_release_pages(NULL, root, file, pages,
1905                                               num_pages, pos, write_bytes);
1906                 BUG_ON(ret);
1907                 btrfs_drop_pages(pages, num_pages);
1908
1909                 buf += write_bytes;
1910                 count -= write_bytes;
1911                 pos += write_bytes;
1912                 num_written += write_bytes;
1913
1914                 balance_dirty_pages_ratelimited(inode->i_mapping);
1915                 cond_resched();
1916         }
1917 out_unlock:
1918         mutex_unlock(&inode->i_mutex);
1919 out:
1920         if (pinned[0])
1921                 page_cache_release(pinned[0]);
1922         if (pinned[1])
1923                 page_cache_release(pinned[1]);
1924         *ppos = pos;
1925         current->backing_dev_info = NULL;
1926         mark_inode_dirty(inode);
1927         return num_written ? num_written : err;
1928 }
1929
1930 static int btrfs_read_actor(read_descriptor_t *desc, struct page *page,
1931                         unsigned long offset, unsigned long size)
1932 {
1933         char *kaddr;
1934         unsigned long left, count = desc->count;
1935         struct inode *inode = page->mapping->host;
1936
1937         if (size > count)
1938                 size = count;
1939
1940         if (!PageChecked(page)) {
1941                 /* FIXME, do it per block */
1942                 struct btrfs_root *root = BTRFS_I(inode)->root;
1943
1944                 int ret = btrfs_csum_verify_file_block(root,
1945                                   page->mapping->host->i_ino,
1946                                   page->index << PAGE_CACHE_SHIFT,
1947                                   kmap(page), PAGE_CACHE_SIZE);
1948                 if (ret) {
1949                         printk("failed to verify ino %lu page %lu\n",
1950                                page->mapping->host->i_ino,
1951                                page->index);
1952                         memset(page_address(page), 0, PAGE_CACHE_SIZE);
1953                 }
1954                 SetPageChecked(page);
1955                 kunmap(page);
1956         }
1957         /*
1958          * Faults on the destination of a read are common, so do it before
1959          * taking the kmap.
1960          */
1961         if (!fault_in_pages_writeable(desc->arg.buf, size)) {
1962                 kaddr = kmap_atomic(page, KM_USER0);
1963                 left = __copy_to_user_inatomic(desc->arg.buf,
1964                                                 kaddr + offset, size);
1965                 kunmap_atomic(kaddr, KM_USER0);
1966                 if (left == 0)
1967                         goto success;
1968         }
1969
1970         /* Do it the slow way */
1971         kaddr = kmap(page);
1972         left = __copy_to_user(desc->arg.buf, kaddr + offset, size);
1973         kunmap(page);
1974
1975         if (left) {
1976                 size -= left;
1977                 desc->error = -EFAULT;
1978         }
1979 success:
1980         desc->count = count - size;
1981         desc->written += size;
1982         desc->arg.buf += size;
1983         return size;
1984 }
1985
1986 /**
1987  * btrfs_file_aio_read - filesystem read routine
1988  * @iocb:       kernel I/O control block
1989  * @iov:        io vector request
1990  * @nr_segs:    number of segments in the iovec
1991  * @pos:        current file position
1992  */
1993 static ssize_t btrfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1994                                    unsigned long nr_segs, loff_t pos)
1995 {
1996         struct file *filp = iocb->ki_filp;
1997         ssize_t retval;
1998         unsigned long seg;
1999         size_t count;
2000         loff_t *ppos = &iocb->ki_pos;
2001
2002         count = 0;
2003         for (seg = 0; seg < nr_segs; seg++) {
2004                 const struct iovec *iv = &iov[seg];
2005
2006                 /*
2007                  * If any segment has a negative length, or the cumulative
2008                  * length ever wraps negative then return -EINVAL.
2009                  */
2010                 count += iv->iov_len;
2011                 if (unlikely((ssize_t)(count|iv->iov_len) < 0))
2012                         return -EINVAL;
2013                 if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len))
2014                         continue;
2015                 if (seg == 0)
2016                         return -EFAULT;
2017                 nr_segs = seg;
2018                 count -= iv->iov_len;   /* This segment is no good */
2019                 break;
2020         }
2021         retval = 0;
2022         if (count) {
2023                 for (seg = 0; seg < nr_segs; seg++) {
2024                         read_descriptor_t desc;
2025
2026                         desc.written = 0;
2027                         desc.arg.buf = iov[seg].iov_base;
2028                         desc.count = iov[seg].iov_len;
2029                         if (desc.count == 0)
2030                                 continue;
2031                         desc.error = 0;
2032                         do_generic_file_read(filp, ppos, &desc,
2033                                              btrfs_read_actor);
2034                         retval += desc.written;
2035                         if (desc.error) {
2036                                 retval = retval ?: desc.error;
2037                                 break;
2038                         }
2039                 }
2040         }
2041         return retval;
2042 }
2043
2044 static int create_subvol(struct btrfs_root *root, char *name, int namelen)
2045 {
2046         struct btrfs_trans_handle *trans;
2047         struct btrfs_key key;
2048         struct btrfs_root_item root_item;
2049         struct btrfs_inode_item *inode_item;
2050         struct buffer_head *subvol;
2051         struct btrfs_leaf *leaf;
2052         struct btrfs_root *new_root;
2053         struct inode *inode;
2054         struct inode *dir;
2055         int ret;
2056         u64 objectid;
2057         u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
2058
2059         mutex_lock(&root->fs_info->fs_mutex);
2060         trans = btrfs_start_transaction(root, 1);
2061         BUG_ON(!trans);
2062
2063         subvol = btrfs_alloc_free_block(trans, root, 0);
2064         if (subvol == NULL)
2065                 return -ENOSPC;
2066         leaf = btrfs_buffer_leaf(subvol);
2067         btrfs_set_header_nritems(&leaf->header, 0);
2068         btrfs_set_header_level(&leaf->header, 0);
2069         btrfs_set_header_blocknr(&leaf->header, bh_blocknr(subvol));
2070         btrfs_set_header_generation(&leaf->header, trans->transid);
2071         btrfs_set_header_owner(&leaf->header, root->root_key.objectid);
2072         memcpy(leaf->header.fsid, root->fs_info->disk_super->fsid,
2073                sizeof(leaf->header.fsid));
2074         mark_buffer_dirty(subvol);
2075
2076         inode_item = &root_item.inode;
2077         memset(inode_item, 0, sizeof(*inode_item));
2078         btrfs_set_inode_generation(inode_item, 1);
2079         btrfs_set_inode_size(inode_item, 3);
2080         btrfs_set_inode_nlink(inode_item, 1);
2081         btrfs_set_inode_nblocks(inode_item, 1);
2082         btrfs_set_inode_mode(inode_item, S_IFDIR | 0755);
2083
2084         btrfs_set_root_blocknr(&root_item, bh_blocknr(subvol));
2085         btrfs_set_root_refs(&root_item, 1);
2086         brelse(subvol);
2087         subvol = NULL;
2088
2089         ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
2090                                        0, &objectid);
2091         BUG_ON(ret);
2092
2093         btrfs_set_root_dirid(&root_item, new_dirid);
2094
2095         key.objectid = objectid;
2096         key.offset = 1;
2097         key.flags = 0;
2098         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
2099         ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
2100                                 &root_item);
2101         BUG_ON(ret);
2102
2103         /*
2104          * insert the directory item
2105          */
2106         key.offset = (u64)-1;
2107         dir = root->fs_info->sb->s_root->d_inode;
2108         ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
2109                                     name, namelen, dir->i_ino, &key, 0);
2110         BUG_ON(ret);
2111
2112         ret = btrfs_commit_transaction(trans, root);
2113         BUG_ON(ret);
2114
2115         new_root = btrfs_read_fs_root(root->fs_info, &key);
2116         BUG_ON(!new_root);
2117
2118         trans = btrfs_start_transaction(new_root, 1);
2119         BUG_ON(!trans);
2120
2121         inode = btrfs_new_inode(trans, new_root, new_dirid,
2122                                 BTRFS_I(dir)->block_group, S_IFDIR | 0700);
2123         inode->i_op = &btrfs_dir_inode_operations;
2124         inode->i_fop = &btrfs_dir_file_operations;
2125
2126         ret = btrfs_make_empty_dir(trans, new_root, new_dirid, new_dirid);
2127         BUG_ON(ret);
2128
2129         inode->i_nlink = 1;
2130         inode->i_size = 6;
2131         ret = btrfs_update_inode(trans, new_root, inode);
2132         BUG_ON(ret);
2133
2134         ret = btrfs_commit_transaction(trans, new_root);
2135         BUG_ON(ret);
2136
2137         iput(inode);
2138
2139         mutex_unlock(&root->fs_info->fs_mutex);
2140         return 0;
2141 }
2142
2143 static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
2144 {
2145         struct btrfs_trans_handle *trans;
2146         struct btrfs_key key;
2147         struct btrfs_root_item new_root_item;
2148         int ret;
2149         u64 objectid;
2150
2151         if (!root->ref_cows)
2152                 return -EINVAL;
2153
2154         mutex_lock(&root->fs_info->fs_mutex);
2155         trans = btrfs_start_transaction(root, 1);
2156         BUG_ON(!trans);
2157
2158         ret = btrfs_update_inode(trans, root, root->inode);
2159         BUG_ON(ret);
2160
2161         ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
2162                                        0, &objectid);
2163         BUG_ON(ret);
2164
2165         memcpy(&new_root_item, &root->root_item,
2166                sizeof(new_root_item));
2167
2168         key.objectid = objectid;
2169         key.offset = 1;
2170         key.flags = 0;
2171         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
2172         btrfs_set_root_blocknr(&new_root_item, bh_blocknr(root->node));
2173
2174         ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
2175                                 &new_root_item);
2176         BUG_ON(ret);
2177
2178         /*
2179          * insert the directory item
2180          */
2181         key.offset = (u64)-1;
2182         ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
2183                                     name, namelen,
2184                                     root->fs_info->sb->s_root->d_inode->i_ino,
2185                                     &key, 0);
2186
2187         BUG_ON(ret);
2188
2189         ret = btrfs_inc_root_ref(trans, root);
2190         BUG_ON(ret);
2191
2192         ret = btrfs_commit_transaction(trans, root);
2193         BUG_ON(ret);
2194         mutex_unlock(&root->fs_info->fs_mutex);
2195         return 0;
2196 }
2197
2198 static int add_disk(struct btrfs_root *root, char *name, int namelen)
2199 {
2200         struct block_device *bdev;
2201         struct btrfs_path *path;
2202         struct super_block *sb = root->fs_info->sb;
2203         struct btrfs_root *dev_root = root->fs_info->dev_root;
2204         struct btrfs_trans_handle *trans;
2205         struct btrfs_device_item *dev_item;
2206         struct btrfs_key key;
2207         u16 item_size;
2208         u64 num_blocks;
2209         u64 new_blocks;
2210         u64 device_id;
2211         int ret;
2212
2213 printk("adding disk %s\n", name);
2214         path = btrfs_alloc_path();
2215         if (!path)
2216                 return -ENOMEM;
2217         num_blocks = btrfs_super_total_blocks(root->fs_info->disk_super);
2218         bdev = open_bdev_excl(name, O_RDWR, sb);
2219         if (IS_ERR(bdev)) {
2220                 ret = PTR_ERR(bdev);
2221 printk("open bdev excl failed ret %d\n", ret);
2222                 goto out_nolock;
2223         }
2224         set_blocksize(bdev, sb->s_blocksize);
2225         new_blocks = bdev->bd_inode->i_size >> sb->s_blocksize_bits;
2226         key.objectid = num_blocks;
2227         key.offset = new_blocks;
2228         key.flags = 0;
2229         btrfs_set_key_type(&key, BTRFS_DEV_ITEM_KEY);
2230
2231         mutex_lock(&dev_root->fs_info->fs_mutex);
2232         trans = btrfs_start_transaction(dev_root, 1);
2233         item_size = sizeof(*dev_item) + namelen;
2234 printk("insert empty on %Lu %Lu %u size %d\n", num_blocks, new_blocks, key.flags, item_size);
2235         ret = btrfs_insert_empty_item(trans, dev_root, path, &key, item_size);
2236         if (ret) {
2237 printk("insert failed %d\n", ret);
2238                 close_bdev_excl(bdev);
2239                 if (ret > 0)
2240                         ret = -EEXIST;
2241                 goto out;
2242         }
2243         dev_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
2244                                   path->slots[0], struct btrfs_device_item);
2245         btrfs_set_device_pathlen(dev_item, namelen);
2246         memcpy(dev_item + 1, name, namelen);
2247
2248         device_id = btrfs_super_last_device_id(root->fs_info->disk_super) + 1;
2249         btrfs_set_super_last_device_id(root->fs_info->disk_super, device_id);
2250         btrfs_set_device_id(dev_item, device_id);
2251         mark_buffer_dirty(path->nodes[0]);
2252
2253         ret = btrfs_insert_dev_radix(root, bdev, device_id, num_blocks,
2254                                      new_blocks);
2255
2256         if (!ret) {
2257                 btrfs_set_super_total_blocks(root->fs_info->disk_super,
2258                                              num_blocks + new_blocks);
2259                 i_size_write(root->fs_info->btree_inode,
2260                              (num_blocks + new_blocks) <<
2261                              root->fs_info->btree_inode->i_blkbits);
2262         }
2263
2264 out:
2265         ret = btrfs_commit_transaction(trans, dev_root);
2266         BUG_ON(ret);
2267         mutex_unlock(&root->fs_info->fs_mutex);
2268 out_nolock:
2269         btrfs_free_path(path);
2270
2271         return ret;
2272 }
2273
2274 static int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int
2275                        cmd, unsigned long arg)
2276 {
2277         struct btrfs_root *root = BTRFS_I(inode)->root;
2278         struct btrfs_ioctl_vol_args vol_args;
2279         int ret = 0;
2280         struct btrfs_dir_item *di;
2281         int namelen;
2282         struct btrfs_path *path;
2283         u64 root_dirid;
2284
2285         switch (cmd) {
2286         case BTRFS_IOC_SNAP_CREATE:
2287                 if (copy_from_user(&vol_args,
2288                                    (struct btrfs_ioctl_vol_args __user *)arg,
2289                                    sizeof(vol_args)))
2290                         return -EFAULT;
2291                 namelen = strlen(vol_args.name);
2292                 if (namelen > BTRFS_VOL_NAME_MAX)
2293                         return -EINVAL;
2294                 path = btrfs_alloc_path();
2295                 if (!path)
2296                         return -ENOMEM;
2297                 root_dirid = root->fs_info->sb->s_root->d_inode->i_ino,
2298                 mutex_lock(&root->fs_info->fs_mutex);
2299                 di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root,
2300                                     path, root_dirid,
2301                                     vol_args.name, namelen, 0);
2302                 mutex_unlock(&root->fs_info->fs_mutex);
2303                 btrfs_free_path(path);
2304                 if (di && !IS_ERR(di))
2305                         return -EEXIST;
2306
2307                 if (root == root->fs_info->tree_root)
2308                         ret = create_subvol(root, vol_args.name, namelen);
2309                 else
2310                         ret = create_snapshot(root, vol_args.name, namelen);
2311                 WARN_ON(ret);
2312                 break;
2313         case BTRFS_IOC_ADD_DISK:
2314                 if (copy_from_user(&vol_args,
2315                                    (struct btrfs_ioctl_vol_args __user *)arg,
2316                                    sizeof(vol_args)))
2317                         return -EFAULT;
2318                 namelen = strlen(vol_args.name);
2319                 if (namelen > BTRFS_VOL_NAME_MAX)
2320                         return -EINVAL;
2321                 vol_args.name[namelen] = '\0';
2322                 ret = add_disk(root, vol_args.name, namelen);
2323                 break;
2324         default:
2325                 return -ENOTTY;
2326         }
2327         return ret;
2328 }
2329
2330 static struct kmem_cache *btrfs_inode_cachep;
2331 struct kmem_cache *btrfs_trans_handle_cachep;
2332 struct kmem_cache *btrfs_transaction_cachep;
2333 struct kmem_cache *btrfs_bit_radix_cachep;
2334 struct kmem_cache *btrfs_path_cachep;
2335
2336 /*
2337  * Called inside transaction, so use GFP_NOFS
2338  */
2339 static struct inode *btrfs_alloc_inode(struct super_block *sb)
2340 {
2341         struct btrfs_inode *ei;
2342
2343         ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS);
2344         if (!ei)
2345                 return NULL;
2346         return &ei->vfs_inode;
2347 }
2348
2349 static void btrfs_destroy_inode(struct inode *inode)
2350 {
2351         WARN_ON(!list_empty(&inode->i_dentry));
2352         WARN_ON(inode->i_data.nrpages);
2353
2354         kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
2355 }
2356
2357 static void init_once(void * foo, struct kmem_cache * cachep,
2358                       unsigned long flags)
2359 {
2360         struct btrfs_inode *ei = (struct btrfs_inode *) foo;
2361
2362         if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
2363             SLAB_CTOR_CONSTRUCTOR) {
2364                 inode_init_once(&ei->vfs_inode);
2365         }
2366 }
2367
2368 static int init_inodecache(void)
2369 {
2370         btrfs_inode_cachep = kmem_cache_create("btrfs_inode_cache",
2371                                              sizeof(struct btrfs_inode),
2372                                              0, (SLAB_RECLAIM_ACCOUNT|
2373                                                 SLAB_MEM_SPREAD),
2374                                              init_once, NULL);
2375         btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle_cache",
2376                                              sizeof(struct btrfs_trans_handle),
2377                                              0, (SLAB_RECLAIM_ACCOUNT|
2378                                                 SLAB_MEM_SPREAD),
2379                                              NULL, NULL);
2380         btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction_cache",
2381                                              sizeof(struct btrfs_transaction),
2382                                              0, (SLAB_RECLAIM_ACCOUNT|
2383                                                 SLAB_MEM_SPREAD),
2384                                              NULL, NULL);
2385         btrfs_path_cachep = kmem_cache_create("btrfs_path_cache",
2386                                              sizeof(struct btrfs_transaction),
2387                                              0, (SLAB_RECLAIM_ACCOUNT|
2388                                                 SLAB_MEM_SPREAD),
2389                                              NULL, NULL);
2390         btrfs_bit_radix_cachep = kmem_cache_create("btrfs_radix",
2391                                              256,
2392                                              0, (SLAB_RECLAIM_ACCOUNT|
2393                                                 SLAB_MEM_SPREAD |
2394                                                 SLAB_DESTROY_BY_RCU),
2395                                              NULL, NULL);
2396         if (btrfs_inode_cachep == NULL || btrfs_trans_handle_cachep == NULL ||
2397             btrfs_transaction_cachep == NULL || btrfs_bit_radix_cachep == NULL)
2398                 return -ENOMEM;
2399         return 0;
2400 }
2401
2402 static void destroy_inodecache(void)
2403 {
2404         kmem_cache_destroy(btrfs_inode_cachep);
2405         kmem_cache_destroy(btrfs_trans_handle_cachep);
2406         kmem_cache_destroy(btrfs_transaction_cachep);
2407         kmem_cache_destroy(btrfs_bit_radix_cachep);
2408         kmem_cache_destroy(btrfs_path_cachep);
2409 }
2410
2411 static int btrfs_get_sb(struct file_system_type *fs_type,
2412         int flags, const char *dev_name, void *data, struct vfsmount *mnt)
2413 {
2414         return get_sb_bdev(fs_type, flags, dev_name, data,
2415                            btrfs_fill_super, mnt);
2416 }
2417
2418
2419 static int btrfs_getattr(struct vfsmount *mnt,
2420                          struct dentry *dentry, struct kstat *stat)
2421 {
2422         struct inode *inode = dentry->d_inode;
2423         generic_fillattr(inode, stat);
2424         stat->blksize = 256 * 1024;
2425         return 0;
2426 }
2427
2428 static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
2429 {
2430         struct btrfs_root *root = btrfs_sb(dentry->d_sb);
2431         struct btrfs_super_block *disk_super = root->fs_info->disk_super;
2432
2433         buf->f_namelen = BTRFS_NAME_LEN;
2434         buf->f_blocks = btrfs_super_total_blocks(disk_super);
2435         buf->f_bfree = buf->f_blocks - btrfs_super_blocks_used(disk_super);
2436         buf->f_bavail = buf->f_bfree;
2437         buf->f_bsize = dentry->d_sb->s_blocksize;
2438         buf->f_type = BTRFS_SUPER_MAGIC;
2439         return 0;
2440 }
2441
2442 static struct file_system_type btrfs_fs_type = {
2443         .owner          = THIS_MODULE,
2444         .name           = "btrfs",
2445         .get_sb         = btrfs_get_sb,
2446         .kill_sb        = kill_block_super,
2447         .fs_flags       = FS_REQUIRES_DEV,
2448 };
2449
2450 static struct super_operations btrfs_super_ops = {
2451         .delete_inode   = btrfs_delete_inode,
2452         .put_super      = btrfs_put_super,
2453         .read_inode     = btrfs_read_locked_inode,
2454         .write_super    = btrfs_write_super,
2455         .sync_fs        = btrfs_sync_fs,
2456         .write_inode    = btrfs_write_inode,
2457         .dirty_inode    = btrfs_dirty_inode,
2458         .alloc_inode    = btrfs_alloc_inode,
2459         .destroy_inode  = btrfs_destroy_inode,
2460         .statfs         = btrfs_statfs,
2461 };
2462
2463 static struct inode_operations btrfs_dir_inode_operations = {
2464         .lookup         = btrfs_lookup,
2465         .create         = btrfs_create,
2466         .unlink         = btrfs_unlink,
2467         .mkdir          = btrfs_mkdir,
2468         .rmdir          = btrfs_rmdir,
2469 };
2470
2471 static struct inode_operations btrfs_dir_ro_inode_operations = {
2472         .lookup         = btrfs_lookup,
2473 };
2474
2475 static struct file_operations btrfs_dir_file_operations = {
2476         .llseek         = generic_file_llseek,
2477         .read           = generic_read_dir,
2478         .readdir        = btrfs_readdir,
2479         .ioctl          = btrfs_ioctl,
2480 };
2481
2482 static struct address_space_operations btrfs_aops = {
2483         .readpage       = btrfs_readpage,
2484         .writepage      = btrfs_writepage,
2485         .sync_page      = block_sync_page,
2486         .prepare_write  = btrfs_prepare_write,
2487         .commit_write   = btrfs_commit_write,
2488 };
2489
2490 static struct inode_operations btrfs_file_inode_operations = {
2491         .truncate       = btrfs_truncate,
2492         .getattr        = btrfs_getattr,
2493 };
2494
2495 static struct file_operations btrfs_file_operations = {
2496         .llseek         = generic_file_llseek,
2497         .read           = do_sync_read,
2498         .aio_read       = btrfs_file_aio_read,
2499         .write          = btrfs_file_write,
2500         .mmap           = generic_file_mmap,
2501         .open           = generic_file_open,
2502         .ioctl          = btrfs_ioctl,
2503         .fsync          = btrfs_sync_file,
2504 };
2505
2506 static int __init init_btrfs_fs(void)
2507 {
2508         int err;
2509         printk("btrfs loaded!\n");
2510         err = init_inodecache();
2511         if (err)
2512                 return err;
2513         kset_set_kset_s(&btrfs_subsys, fs_subsys);
2514         err = subsystem_register(&btrfs_subsys);
2515         if (err)
2516                 goto out;
2517         return register_filesystem(&btrfs_fs_type);
2518 out:
2519         destroy_inodecache();
2520         return err;
2521 }
2522
2523 static void __exit exit_btrfs_fs(void)
2524 {
2525         destroy_inodecache();
2526         unregister_filesystem(&btrfs_fs_type);
2527         subsystem_unregister(&btrfs_subsys);
2528         printk("btrfs unloaded\n");
2529 }
2530
2531 module_init(init_btrfs_fs)
2532 module_exit(exit_btrfs_fs)
2533
2534 MODULE_LICENSE("GPL");