ext4: teach the inode allocator to use a goal inode number
authorAndreas Dilger <adilger@sun.com>
Sat, 13 Jun 2009 15:45:35 +0000 (11:45 -0400)
committerTheodore Ts'o <tytso@mit.edu>
Sat, 13 Jun 2009 15:45:35 +0000 (11:45 -0400)
Enhance the inode allocator to take a goal inode number as a
paremeter; if it is specified, it takes precedence over Orlov or
parent directory inode allocation algorithms.

The extents migration function uses the goal inode number so that the
extent trees allocated the migration function use the correct flex_bg.
In the future, the goal inode functionality will also be used to
allocate an adjacent inode for the extended attributes.

Also, for testing purposes the goal inode number can be specified via
/sys/fs/{dev}/inode_goal.  This can be useful for testing inode
allocation beyond 2^32 blocks on very large filesystems.

Signed-off-by: Andreas Dilger <adilger@sun.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Documentation/ABI/testing/sysfs-fs-ext4
fs/ext4/ext4.h
fs/ext4/ialloc.c
fs/ext4/migrate.c
fs/ext4/namei.c
fs/ext4/super.c

index 4e79074de2827703561007c16d0653d5177f9392..5fb709997d9635ac4f5ee1d8bed00f207b9c3feb 100644 (file)
@@ -79,3 +79,13 @@ Description:
                This file is read-only and shows the number of
                kilobytes of data that have been written to this
                filesystem since it was mounted.
+
+What:          /sys/fs/ext4/<disk>/inode_goal
+Date:          June 2008
+Contact:       "Theodore Ts'o" <tytso@mit.edu>
+Description:
+               Tuning parameter which (if non-zero) controls the goal
+               inode used by the inode allocator in p0reference to
+               all other allocation hueristics.  This is intended for
+               debugging use only, and should be 0 on production
+               systems.
index d035cf149e0e2b06e3d3598eec3930a805a26576..746cdcba969daba7c9f047341c36469806f7109b 100644 (file)
@@ -863,6 +863,7 @@ struct ext4_sb_info {
        int s_inode_size;
        int s_first_ino;
        unsigned int s_inode_readahead_blks;
+       unsigned int s_inode_goal;
        spinlock_t s_next_gen_lock;
        u32 s_next_generation;
        u32 s_hash_seed[4];
@@ -1316,7 +1317,7 @@ extern int ext4fs_dirhash(const char *name, int len, struct
 
 /* ialloc.c */
 extern struct inode *ext4_new_inode(handle_t *, struct inode *, int,
-                                   const struct qstr *qstr);
+                                   const struct qstr *qstr, __u32 goal);
 extern void ext4_free_inode(handle_t *, struct inode *);
 extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
 extern unsigned long ext4_count_free_inodes(struct super_block *);
index 3f98ee712ff484e0cf5510ed5e3dc6a2380f30c7..2f645732e3b76ef84e76f58761c16116825c2763 100644 (file)
@@ -799,7 +799,7 @@ err_ret:
  * group to find a free inode.
  */
 struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode,
-                            const struct qstr *qstr)
+                            const struct qstr *qstr, __u32 goal)
 {
        struct super_block *sb;
        struct buffer_head *inode_bitmap_bh = NULL;
@@ -830,6 +830,16 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode,
        ei = EXT4_I(inode);
        sbi = EXT4_SB(sb);
 
+       if (!goal)
+               goal = sbi->s_inode_goal;
+
+       if (goal && goal < le32_to_cpu(sbi->s_es->s_inodes_count)) {
+               group = (goal - 1) / EXT4_INODES_PER_GROUP(sb);
+               ino = (goal - 1) % EXT4_INODES_PER_GROUP(sb);
+               ret2 = 0;
+               goto got_group;
+       }
+
        if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) {
                ret2 = find_group_flex(sb, dir, &group);
                if (ret2 == -1) {
@@ -858,7 +868,7 @@ got_group:
        if (ret2 == -1)
                goto out;
 
-       for (i = 0; i < ngroups; i++) {
+       for (i = 0; i < ngroups; i++, ino = 0) {
                err = -EIO;
 
                gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
@@ -870,8 +880,6 @@ got_group:
                if (!inode_bitmap_bh)
                        goto fail;
 
-               ino = 0;
-
 repeat_in_this_group:
                ino = ext4_find_next_zero_bit((unsigned long *)
                                              inode_bitmap_bh->b_data,
index 80d075b8aeaf28f4afaba931bcbe40afff062fb9..313a50b39741065654796448d9083caef3ba4df9 100644 (file)
@@ -458,6 +458,7 @@ int ext4_ext_migrate(struct inode *inode)
        struct inode *tmp_inode = NULL;
        struct list_blocks_struct lb;
        unsigned long max_entries;
+       __u32 goal;
 
        /*
         * If the filesystem does not support extents, or the inode
@@ -483,8 +484,10 @@ int ext4_ext_migrate(struct inode *inode)
                retval = PTR_ERR(handle);
                return retval;
        }
+       goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) *
+               EXT4_INODES_PER_GROUP(inode->i_sb)) + 1;
        tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode,
-                                  S_IFREG, 0);
+                                  S_IFREG, 0, goal);
        if (IS_ERR(tmp_inode)) {
                retval = -ENOMEM;
                ext4_journal_stop(handle);
index 5f00d2418a831cbdf7913789297e27a252be42c7..de04013d16ffd3825a28cb21dc92995e3818226a 100644 (file)
@@ -1782,7 +1782,7 @@ retry:
        if (IS_DIRSYNC(dir))
                ext4_handle_sync(handle);
 
-       inode = ext4_new_inode(handle, dir, mode, &dentry->d_name);
+       inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0);
        err = PTR_ERR(inode);
        if (!IS_ERR(inode)) {
                inode->i_op = &ext4_file_inode_operations;
@@ -1816,7 +1816,7 @@ retry:
        if (IS_DIRSYNC(dir))
                ext4_handle_sync(handle);
 
-       inode = ext4_new_inode(handle, dir, mode, &dentry->d_name);
+       inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0);
        err = PTR_ERR(inode);
        if (!IS_ERR(inode)) {
                init_special_inode(inode, inode->i_mode, rdev);
@@ -1853,7 +1853,8 @@ retry:
        if (IS_DIRSYNC(dir))
                ext4_handle_sync(handle);
 
-       inode = ext4_new_inode(handle, dir, S_IFDIR | mode, &dentry->d_name);
+       inode = ext4_new_inode(handle, dir, S_IFDIR | mode,
+                              &dentry->d_name, 0);
        err = PTR_ERR(inode);
        if (IS_ERR(inode))
                goto out_stop;
@@ -2264,7 +2265,8 @@ retry:
        if (IS_DIRSYNC(dir))
                ext4_handle_sync(handle);
 
-       inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO, &dentry->d_name);
+       inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO,
+                              &dentry->d_name, 0);
        err = PTR_ERR(inode);
        if (IS_ERR(inode))
                goto out_stop;
index 04486a53469fa72e7a4ae4d7a42a7cab82709895..23013d303f811cb75e5ab410fca80f99578549b4 100644 (file)
@@ -2206,6 +2206,7 @@ EXT4_RO_ATTR(session_write_kbytes);
 EXT4_RO_ATTR(lifetime_write_kbytes);
 EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show,
                 inode_readahead_blks_store, s_inode_readahead_blks);
+EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal);
 EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats);
 EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan);
 EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
@@ -2218,6 +2219,7 @@ static struct attribute *ext4_attrs[] = {
        ATTR_LIST(session_write_kbytes),
        ATTR_LIST(lifetime_write_kbytes),
        ATTR_LIST(inode_readahead_blks),
+       ATTR_LIST(inode_goal),
        ATTR_LIST(mb_stats),
        ATTR_LIST(mb_max_to_scan),
        ATTR_LIST(mb_min_to_scan),
This page took 0.076457 seconds and 5 git commands to generate.