fs/ext4/super.c

   1 /*
   2  *  linux/fs/ext4/super.c
   3  *
   4  * Copyright (C) 1992, 1993, 1994, 1995
   5  * Remy Card (card@masi.ibp.fr)
   6  * Laboratoire MASI - Institut Blaise Pascal
   7  * Universite Pierre et Marie Curie (Paris VI)
   8  *
   9  *  from
  10  *
  11  *  linux/fs/minix/inode.c
  12  *
  13  *  Copyright (C) 1991, 1992  Linus Torvalds
  14  *
  15  *  Big-endian to little-endian byte-swapping/bitmaps by
  16  *        David S. Miller (davem@caip.rutgers.edu), 1995
  17  */
  18
  19 #include <linux/module.h>
  20 #include <linux/string.h>
  21 #include <linux/fs.h>
  22 #include <linux/time.h>
  23 #include <linux/vmalloc.h>
  24 #include <linux/slab.h>
  25 #include <linux/init.h>
  26 #include <linux/blkdev.h>
  27 #include <linux/backing-dev.h>
  28 #include <linux/parser.h>
  29 #include <linux/buffer_head.h>
  30 #include <linux/exportfs.h>
  31 #include <linux/vfs.h>
  32 #include <linux/random.h>
  33 #include <linux/mount.h>
  34 #include <linux/namei.h>
  35 #include <linux/quotaops.h>
  36 #include <linux/seq_file.h>
  37 #include <linux/ctype.h>
  38 #include <linux/log2.h>
  39 #include <linux/crc16.h>
  40 #include <linux/cleancache.h>
  41 #include <asm/uaccess.h>
  42
  43 #include <linux/kthread.h>
  44 #include <linux/freezer.h>
  45
  46 #include "ext4.h"
  47 #include "ext4_extents.h"       /* Needed for trace points definition */
  48 #include "ext4_jbd2.h"
  49 #include "xattr.h"
  50 #include "acl.h"
  51 #include "mballoc.h"
  52
  53 #define CREATE_TRACE_POINTS
  54 #include <trace/events/ext4.h>
  55
  56 static struct ext4_lazy_init *ext4_li_info;
  57 static struct mutex ext4_li_mtx;
  58 static struct ratelimit_state ext4_mount_msg_ratelimit;
  59
  60 static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
  61                              unsigned long journal_devnum);
  62 static int ext4_show_options(struct seq_file *seq, struct dentry *root);
  63 static int ext4_commit_super(struct super_block *sb, int sync);
  64 static void ext4_mark_recovery_complete(struct super_block *sb,
  65                                         struct ext4_super_block *es);
  66 static void ext4_clear_journal_err(struct super_block *sb,
  67                                    struct ext4_super_block *es);
  68 static int ext4_sync_fs(struct super_block *sb, int wait);
  69 static int ext4_remount(struct super_block *sb, int *flags, char *data);
  70 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
  71 static int ext4_unfreeze(struct super_block *sb);
  72 static int ext4_freeze(struct super_block *sb);
  73 static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
  74                        const char *dev_name, void *data);
  75 static inline int ext2_feature_set_ok(struct super_block *sb);
  76 static inline int ext3_feature_set_ok(struct super_block *sb);
  77 static int ext4_feature_set_ok(struct super_block *sb, int readonly);
  78 static void ext4_destroy_lazyinit_thread(void);
  79 static void ext4_unregister_li_request(struct super_block *sb);
  80 static void ext4_clear_request_list(void);
  81
  82 /*
  83  * Lock ordering
  84  *
  85  * Note the difference between i_mmap_sem (EXT4_I(inode)->i_mmap_sem) and
  86  * i_mmap_rwsem (inode->i_mmap_rwsem)!
  87  *
  88  * page fault path:
  89  * mmap_sem -> sb_start_pagefault -> i_mmap_sem (r) -> transaction start ->
  90  *   page lock -> i_data_sem (rw)
  91  *
  92  * buffered write path:
  93  * sb_start_write -> i_mutex -> mmap_sem
  94  * sb_start_write -> i_mutex -> transaction start -> page lock ->
  95  *   i_data_sem (rw)
  96  *
  97  * truncate:
  98  * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (w) -> i_mmap_sem (w) ->
  99  *   i_mmap_rwsem (w) -> page lock
 100  * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (w) -> i_mmap_sem (w) ->
 101  *   transaction start -> i_data_sem (rw)
 102  *
 103  * direct IO:
 104  * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (r) -> mmap_sem
 105  * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (r) ->
 106  *   transaction start -> i_data_sem (rw)
 107  *
 108  * writepages:
 109  * transaction start -> page lock(s) -> i_data_sem (rw)
 110  */
 111
 112 #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2)
 113 static struct file_system_type ext2_fs_type = {
 114         .owner          = THIS_MODULE,
 115         .name           = "ext2",
 116         .mount          = ext4_mount,
 117         .kill_sb        = kill_block_super,
 118         .fs_flags       = FS_REQUIRES_DEV,
 119 };
 120 MODULE_ALIAS_FS("ext2");
 121 MODULE_ALIAS("ext2");
 122 #define IS_EXT2_SB(sb) ((sb)->s_bdev->bd_holder == &ext2_fs_type)
 123 #else
 124 #define IS_EXT2_SB(sb) (0)
 125 #endif
 126
 127
 128 static struct file_system_type ext3_fs_type = {
 129         .owner          = THIS_MODULE,
 130         .name           = "ext3",
 131         .mount          = ext4_mount,
 132         .kill_sb        = kill_block_super,
 133         .fs_flags       = FS_REQUIRES_DEV,
 134 };
 135 MODULE_ALIAS_FS("ext3");
 136 MODULE_ALIAS("ext3");
 137 #define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type)
 138
 139 static int ext4_verify_csum_type(struct super_block *sb,
 140                                  struct ext4_super_block *es)
 141 {
 142         if (!ext4_has_feature_metadata_csum(sb))
 143                 return 1;
 144
 145         return es->s_checksum_type == EXT4_CRC32C_CHKSUM;
 146 }
 147
 148 static __le32 ext4_superblock_csum(struct super_block *sb,
 149                                    struct ext4_super_block *es)
 150 {
 151         struct ext4_sb_info *sbi = EXT4_SB(sb);
 152         int offset = offsetof(struct ext4_super_block, s_checksum);
 153         __u32 csum;
 154
 155         csum = ext4_chksum(sbi, ~0, (char *)es, offset);
 156
 157         return cpu_to_le32(csum);
 158 }
 159
 160 static int ext4_superblock_csum_verify(struct super_block *sb,
 161                                        struct ext4_super_block *es)
 162 {
 163         if (!ext4_has_metadata_csum(sb))
 164                 return 1;
 165
 166         return es->s_checksum == ext4_superblock_csum(sb, es);
 167 }
 168
 169 void ext4_superblock_csum_set(struct super_block *sb)
 170 {
 171         struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 172
 173         if (!ext4_has_metadata_csum(sb))
 174                 return;
 175
 176         es->s_checksum = ext4_superblock_csum(sb, es);
 177 }
 178
 179 void *ext4_kvmalloc(size_t size, gfp_t flags)
 180 {
 181         void *ret;
 182
 183         ret = kmalloc(size, flags | __GFP_NOWARN);
 184         if (!ret)
 185                 ret = __vmalloc(size, flags, PAGE_KERNEL);
 186         return ret;
 187 }
 188
 189 void *ext4_kvzalloc(size_t size, gfp_t flags)
 190 {
 191         void *ret;
 192
 193         ret = kzalloc(size, flags | __GFP_NOWARN);
 194         if (!ret)
 195                 ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL);
 196         return ret;
 197 }
 198
 199 ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
 200                                struct ext4_group_desc *bg)
 201 {
 202         return le32_to_cpu(bg->bg_block_bitmap_lo) |
 203                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 204                  (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
 205 }
 206
 207 ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
 208                                struct ext4_group_desc *bg)
 209 {
 210         return le32_to_cpu(bg->bg_inode_bitmap_lo) |
 211                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 212                  (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
 213 }
 214
 215 ext4_fsblk_t ext4_inode_table(struct super_block *sb,
 216                               struct ext4_group_desc *bg)
 217 {
 218         return le32_to_cpu(bg->bg_inode_table_lo) |
 219                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 220                  (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
 221 }
 222
 223 __u32 ext4_free_group_clusters(struct super_block *sb,
 224                                struct ext4_group_desc *bg)
 225 {
 226         return le16_to_cpu(bg->bg_free_blocks_count_lo) |
 227                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 228                  (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0);
 229 }
 230
 231 __u32 ext4_free_inodes_count(struct super_block *sb,
 232                               struct ext4_group_desc *bg)
 233 {
 234         return le16_to_cpu(bg->bg_free_inodes_count_lo) |
 235                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 236                  (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
 237 }
 238
 239 __u32 ext4_used_dirs_count(struct super_block *sb,
 240                               struct ext4_group_desc *bg)
 241 {
 242         return le16_to_cpu(bg->bg_used_dirs_count_lo) |
 243                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 244                  (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0);
 245 }
 246
 247 __u32 ext4_itable_unused_count(struct super_block *sb,
 248                               struct ext4_group_desc *bg)
 249 {
 250         return le16_to_cpu(bg->bg_itable_unused_lo) |
 251                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 252                  (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0);
 253 }
 254
 255 void ext4_block_bitmap_set(struct super_block *sb,
 256                            struct ext4_group_desc *bg, ext4_fsblk_t blk)
 257 {
 258         bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk);
 259         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 260                 bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32);
 261 }
 262
 263 void ext4_inode_bitmap_set(struct super_block *sb,
 264                            struct ext4_group_desc *bg, ext4_fsblk_t blk)
 265 {
 266         bg->bg_inode_bitmap_lo  = cpu_to_le32((u32)blk);
 267         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 268                 bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32);
 269 }
 270
 271 void ext4_inode_table_set(struct super_block *sb,
 272                           struct ext4_group_desc *bg, ext4_fsblk_t blk)
 273 {
 274         bg->bg_inode_table_lo = cpu_to_le32((u32)blk);
 275         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 276                 bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);
 277 }
 278
 279 void ext4_free_group_clusters_set(struct super_block *sb,
 280                                   struct ext4_group_desc *bg, __u32 count)
 281 {
 282         bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count);
 283         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 284                 bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16);
 285 }
 286
 287 void ext4_free_inodes_set(struct super_block *sb,
 288                           struct ext4_group_desc *bg, __u32 count)
 289 {
 290         bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count);
 291         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 292                 bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16);
 293 }
 294
 295 void ext4_used_dirs_set(struct super_block *sb,
 296                           struct ext4_group_desc *bg, __u32 count)
 297 {
 298         bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count);
 299         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 300                 bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16);
 301 }
 302
 303 void ext4_itable_unused_set(struct super_block *sb,
 304                           struct ext4_group_desc *bg, __u32 count)
 305 {
 306         bg->bg_itable_unused_lo = cpu_to_le16((__u16)count);
 307         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 308                 bg->bg_itable_unused_hi = cpu_to_le16(count >> 16);
 309 }
 310
 311
 312 static void __save_error_info(struct super_block *sb, const char *func,
 313                             unsigned int line)
 314 {
 315         struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 316
 317         EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
 318         if (bdev_read_only(sb->s_bdev))
 319                 return;
 320         es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
 321         es->s_last_error_time = cpu_to_le32(get_seconds());
 322         strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func));
 323         es->s_last_error_line = cpu_to_le32(line);
 324         if (!es->s_first_error_time) {
 325                 es->s_first_error_time = es->s_last_error_time;
 326                 strncpy(es->s_first_error_func, func,
 327                         sizeof(es->s_first_error_func));
 328                 es->s_first_error_line = cpu_to_le32(line);
 329                 es->s_first_error_ino = es->s_last_error_ino;
 330                 es->s_first_error_block = es->s_last_error_block;
 331         }
 332         /*
 333          * Start the daily error reporting function if it hasn't been
 334          * started already
 335          */
 336         if (!es->s_error_count)
 337                 mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ);
 338         le32_add_cpu(&es->s_error_count, 1);
 339 }
 340
 341 static void save_error_info(struct super_block *sb, const char *func,
 342                             unsigned int line)
 343 {
 344         __save_error_info(sb, func, line);
 345         ext4_commit_super(sb, 1);
 346 }
 347
 348 /*
 349  * The del_gendisk() function uninitializes the disk-specific data
 350  * structures, including the bdi structure, without telling anyone
 351  * else.  Once this happens, any attempt to call mark_buffer_dirty()
 352  * (for example, by ext4_commit_super), will cause a kernel OOPS.
 353  * This is a kludge to prevent these oops until we can put in a proper
 354  * hook in del_gendisk() to inform the VFS and file system layers.
 355  */
 356 static int block_device_ejected(struct super_block *sb)
 357 {
 358         struct inode *bd_inode = sb->s_bdev->bd_inode;
 359         struct backing_dev_info *bdi = inode_to_bdi(bd_inode);
 360
 361         return bdi->dev == NULL;
 362 }
 363
 364 static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
 365 {
 366         struct super_block              *sb = journal->j_private;
 367         struct ext4_sb_info             *sbi = EXT4_SB(sb);
 368         int                             error = is_journal_aborted(journal);
 369         struct ext4_journal_cb_entry    *jce;
 370
 371         BUG_ON(txn->t_state == T_FINISHED);
 372         spin_lock(&sbi->s_md_lock);
 373         while (!list_empty(&txn->t_private_list)) {
 374                 jce = list_entry(txn->t_private_list.next,
 375                                  struct ext4_journal_cb_entry, jce_list);
 376                 list_del_init(&jce->jce_list);
 377                 spin_unlock(&sbi->s_md_lock);
 378                 jce->jce_func(sb, jce, error);
 379                 spin_lock(&sbi->s_md_lock);
 380         }
 381         spin_unlock(&sbi->s_md_lock);
 382 }
 383
 384 /* Deal with the reporting of failure conditions on a filesystem such as
 385  * inconsistencies detected or read IO failures.
 386  *
 387  * On ext2, we can store the error state of the filesystem in the
 388  * superblock.  That is not possible on ext4, because we may have other
 389  * write ordering constraints on the superblock which prevent us from
 390  * writing it out straight away; and given that the journal is about to
 391  * be aborted, we can't rely on the current, or future, transactions to
 392  * write out the superblock safely.
 393  *
 394  * We'll just use the jbd2_journal_abort() error code to record an error in
 395  * the journal instead.  On recovery, the journal will complain about
 396  * that error until we've noted it down and cleared it.
 397  */
 398
 399 static void ext4_handle_error(struct super_block *sb)
 400 {
 401         if (sb->s_flags & MS_RDONLY)
 402                 return;
 403
 404         if (!test_opt(sb, ERRORS_CONT)) {
 405                 journal_t *journal = EXT4_SB(sb)->s_journal;
 406
 407                 EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
 408                 if (journal)
 409                         jbd2_journal_abort(journal, -EIO);
 410         }
 411         if (test_opt(sb, ERRORS_RO)) {
 412                 ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
 413                 /*
 414                  * Make sure updated value of ->s_mount_flags will be visible
 415                  * before ->s_flags update
 416                  */
 417                 smp_wmb();
 418                 sb->s_flags |= MS_RDONLY;
 419         }
 420         if (test_opt(sb, ERRORS_PANIC)) {
 421                 if (EXT4_SB(sb)->s_journal &&
 422                   !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR))
 423                         return;
 424                 panic("EXT4-fs (device %s): panic forced after error\n",
 425                         sb->s_id);
 426         }
 427 }
 428
 429 #define ext4_error_ratelimit(sb)                                        \
 430                 ___ratelimit(&(EXT4_SB(sb)->s_err_ratelimit_state),     \
 431                              "EXT4-fs error")
 432
 433 void __ext4_error(struct super_block *sb, const char *function,
 434                   unsigned int line, const char *fmt, ...)
 435 {
 436         struct va_format vaf;
 437         va_list args;
 438
 439         if (ext4_error_ratelimit(sb)) {
 440                 va_start(args, fmt);
 441                 vaf.fmt = fmt;
 442                 vaf.va = &args;
 443                 printk(KERN_CRIT
 444                        "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n",
 445                        sb->s_id, function, line, current->comm, &vaf);
 446                 va_end(args);
 447         }
 448         save_error_info(sb, function, line);
 449         ext4_handle_error(sb);
 450 }
 451
 452 void __ext4_error_inode(struct inode *inode, const char *function,
 453                         unsigned int line, ext4_fsblk_t block,
 454                         const char *fmt, ...)
 455 {
 456         va_list args;
 457         struct va_format vaf;
 458         struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
 459
 460         es->s_last_error_ino = cpu_to_le32(inode->i_ino);
 461         es->s_last_error_block = cpu_to_le64(block);
 462         if (ext4_error_ratelimit(inode->i_sb)) {
 463                 va_start(args, fmt);
 464                 vaf.fmt = fmt;
 465                 vaf.va = &args;
 466                 if (block)
 467                         printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
 468                                "inode #%lu: block %llu: comm %s: %pV\n",
 469                                inode->i_sb->s_id, function, line, inode->i_ino,
 470                                block, current->comm, &vaf);
 471                 else
 472                         printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
 473                                "inode #%lu: comm %s: %pV\n",
 474                                inode->i_sb->s_id, function, line, inode->i_ino,
 475                                current->comm, &vaf);
 476                 va_end(args);
 477         }
 478         save_error_info(inode->i_sb, function, line);
 479         ext4_handle_error(inode->i_sb);
 480 }
 481
 482 void __ext4_error_file(struct file *file, const char *function,
 483                        unsigned int line, ext4_fsblk_t block,
 484                        const char *fmt, ...)
 485 {
 486         va_list args;
 487         struct va_format vaf;
 488         struct ext4_super_block *es;
 489         struct inode *inode = file_inode(file);
 490         char pathname[80], *path;
 491
 492         es = EXT4_SB(inode->i_sb)->s_es;
 493         es->s_last_error_ino = cpu_to_le32(inode->i_ino);
 494         if (ext4_error_ratelimit(inode->i_sb)) {
 495                 path = file_path(file, pathname, sizeof(pathname));
 496                 if (IS_ERR(path))
 497                         path = "(unknown)";
 498                 va_start(args, fmt);
 499                 vaf.fmt = fmt;
 500                 vaf.va = &args;
 501                 if (block)
 502                         printk(KERN_CRIT
 503                                "EXT4-fs error (device %s): %s:%d: inode #%lu: "
 504                                "block %llu: comm %s: path %s: %pV\n",
 505                                inode->i_sb->s_id, function, line, inode->i_ino,
 506                                block, current->comm, path, &vaf);
 507                 else
 508                         printk(KERN_CRIT
 509                                "EXT4-fs error (device %s): %s:%d: inode #%lu: "
 510                                "comm %s: path %s: %pV\n",
 511                                inode->i_sb->s_id, function, line, inode->i_ino,
 512                                current->comm, path, &vaf);
 513                 va_end(args);
 514         }
 515         save_error_info(inode->i_sb, function, line);
 516         ext4_handle_error(inode->i_sb);
 517 }
 518
 519 const char *ext4_decode_error(struct super_block *sb, int errno,
 520                               char nbuf[16])
 521 {
 522         char *errstr = NULL;
 523
 524         switch (errno) {
 525         case -EFSCORRUPTED:
 526                 errstr = "Corrupt filesystem";
 527                 break;
 528         case -EFSBADCRC:
 529                 errstr = "Filesystem failed CRC";
 530                 break;
 531         case -EIO:
 532                 errstr = "IO failure";
 533                 break;
 534         case -ENOMEM:
 535                 errstr = "Out of memory";
 536                 break;
 537         case -EROFS:
 538                 if (!sb || (EXT4_SB(sb)->s_journal &&
 539                             EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT))
 540                         errstr = "Journal has aborted";
 541                 else
 542                         errstr = "Readonly filesystem";
 543                 break;
 544         default:
 545                 /* If the caller passed in an extra buffer for unknown
 546                  * errors, textualise them now.  Else we just return
 547                  * NULL. */
 548                 if (nbuf) {
 549                         /* Check for truncated error codes... */
 550                         if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
 551                                 errstr = nbuf;
 552                 }
 553                 break;
 554         }
 555
 556         return errstr;
 557 }
 558
 559 /* __ext4_std_error decodes expected errors from journaling functions
 560  * automatically and invokes the appropriate error response.  */
 561
 562 void __ext4_std_error(struct super_block *sb, const char *function,
 563                       unsigned int line, int errno)
 564 {
 565         char nbuf[16];
 566         const char *errstr;
 567
 568         /* Special case: if the error is EROFS, and we're not already
 569          * inside a transaction, then there's really no point in logging
 570          * an error. */
 571         if (errno == -EROFS && journal_current_handle() == NULL &&
 572             (sb->s_flags & MS_RDONLY))
 573                 return;
 574
 575         if (ext4_error_ratelimit(sb)) {
 576                 errstr = ext4_decode_error(sb, errno, nbuf);
 577                 printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n",
 578                        sb->s_id, function, line, errstr);
 579         }
 580
 581         save_error_info(sb, function, line);
 582         ext4_handle_error(sb);
 583 }
 584
 585 /*
 586  * ext4_abort is a much stronger failure handler than ext4_error.  The
 587  * abort function may be used to deal with unrecoverable failures such
 588  * as journal IO errors or ENOMEM at a critical moment in log management.
 589  *
 590  * We unconditionally force the filesystem into an ABORT|READONLY state,
 591  * unless the error response on the fs has been set to panic in which
 592  * case we take the easy way out and panic immediately.
 593  */
 594
 595 void __ext4_abort(struct super_block *sb, const char *function,
 596                 unsigned int line, const char *fmt, ...)
 597 {
 598         va_list args;
 599
 600         save_error_info(sb, function, line);
 601         va_start(args, fmt);
 602         printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: ", sb->s_id,
 603                function, line);
 604         vprintk(fmt, args);
 605         printk("\n");
 606         va_end(args);
 607
 608         if ((sb->s_flags & MS_RDONLY) == 0) {
 609                 ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
 610                 EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
 611                 /*
 612                  * Make sure updated value of ->s_mount_flags will be visible
 613                  * before ->s_flags update
 614                  */
 615                 smp_wmb();
 616                 sb->s_flags |= MS_RDONLY;
 617                 if (EXT4_SB(sb)->s_journal)
 618                         jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
 619                 save_error_info(sb, function, line);
 620         }
 621         if (test_opt(sb, ERRORS_PANIC)) {
 622                 if (EXT4_SB(sb)->s_journal &&
 623                   !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR))
 624                         return;
 625                 panic("EXT4-fs panic from previous error\n");
 626         }
 627 }
 628
 629 void __ext4_msg(struct super_block *sb,
 630                 const char *prefix, const char *fmt, ...)
 631 {
 632         struct va_format vaf;
 633         va_list args;
 634
 635         if (!___ratelimit(&(EXT4_SB(sb)->s_msg_ratelimit_state), "EXT4-fs"))
 636                 return;
 637
 638         va_start(args, fmt);
 639         vaf.fmt = fmt;
 640         vaf.va = &args;
 641         printk("%sEXT4-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
 642         va_end(args);
 643 }
 644
 645 #define ext4_warning_ratelimit(sb)                                      \
 646                 ___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state), \
 647                              "EXT4-fs warning")
 648
 649 void __ext4_warning(struct super_block *sb, const char *function,
 650                     unsigned int line, const char *fmt, ...)
 651 {
 652         struct va_format vaf;
 653         va_list args;
 654
 655         if (!ext4_warning_ratelimit(sb))
 656                 return;
 657
 658         va_start(args, fmt);
 659         vaf.fmt = fmt;
 660         vaf.va = &args;
 661         printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: %pV\n",
 662                sb->s_id, function, line, &vaf);
 663         va_end(args);
 664 }
 665
 666 void __ext4_warning_inode(const struct inode *inode, const char *function,
 667                           unsigned int line, const char *fmt, ...)
 668 {
 669         struct va_format vaf;
 670         va_list args;
 671
 672         if (!ext4_warning_ratelimit(inode->i_sb))
 673                 return;
 674
 675         va_start(args, fmt);
 676         vaf.fmt = fmt;
 677         vaf.va = &args;
 678         printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: "
 679                "inode #%lu: comm %s: %pV\n", inode->i_sb->s_id,
 680                function, line, inode->i_ino, current->comm, &vaf);
 681         va_end(args);
 682 }
 683
 684 void __ext4_grp_locked_error(const char *function, unsigned int line,
 685                              struct super_block *sb, ext4_group_t grp,
 686                              unsigned long ino, ext4_fsblk_t block,
 687                              const char *fmt, ...)
 688 __releases(bitlock)
 689 __acquires(bitlock)
 690 {
 691         struct va_format vaf;
 692         va_list args;
 693         struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 694
 695         es->s_last_error_ino = cpu_to_le32(ino);
 696         es->s_last_error_block = cpu_to_le64(block);
 697         __save_error_info(sb, function, line);
 698
 699         if (ext4_error_ratelimit(sb)) {
 700                 va_start(args, fmt);
 701                 vaf.fmt = fmt;
 702                 vaf.va = &args;
 703                 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ",
 704                        sb->s_id, function, line, grp);
 705                 if (ino)
 706                         printk(KERN_CONT "inode %lu: ", ino);
 707                 if (block)
 708                         printk(KERN_CONT "block %llu:",
 709                                (unsigned long long) block);
 710                 printk(KERN_CONT "%pV\n", &vaf);
 711                 va_end(args);
 712         }
 713
 714         if (test_opt(sb, ERRORS_CONT)) {
 715                 ext4_commit_super(sb, 0);
 716                 return;
 717         }
 718
 719         ext4_unlock_group(sb, grp);
 720         ext4_handle_error(sb);
 721         /*
 722          * We only get here in the ERRORS_RO case; relocking the group
 723          * may be dangerous, but nothing bad will happen since the
 724          * filesystem will have already been marked read/only and the
 725          * journal has been aborted.  We return 1 as a hint to callers
 726          * who might what to use the return value from
 727          * ext4_grp_locked_error() to distinguish between the
 728          * ERRORS_CONT and ERRORS_RO case, and perhaps return more
 729          * aggressively from the ext4 function in question, with a
 730          * more appropriate error code.
 731          */
 732         ext4_lock_group(sb, grp);
 733         return;
 734 }
 735
 736 void ext4_update_dynamic_rev(struct super_block *sb)
 737 {
 738         struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 739
 740         if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV)
 741                 return;
 742
 743         ext4_warning(sb,
 744                      "updating to rev %d because of new feature flag, "
 745                      "running e2fsck is recommended",
 746                      EXT4_DYNAMIC_REV);
 747
 748         es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO);
 749         es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE);
 750         es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV);
 751         /* leave es->s_feature_*compat flags alone */
 752         /* es->s_uuid will be set by e2fsck if empty */
 753
 754         /*
 755          * The rest of the superblock fields should be zero, and if not it
 756          * means they are likely already in use, so leave them alone.  We
 757          * can leave it up to e2fsck to clean up any inconsistencies there.
 758          */
 759 }
 760
 761 /*
 762  * Open the external journal device
 763  */
 764 static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb)
 765 {
 766         struct block_device *bdev;
 767         char b[BDEVNAME_SIZE];
 768
 769         bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb);
 770         if (IS_ERR(bdev))
 771                 goto fail;
 772         return bdev;
 773
 774 fail:
 775         ext4_msg(sb, KERN_ERR, "failed to open journal device %s: %ld",
 776                         __bdevname(dev, b), PTR_ERR(bdev));
 777         return NULL;
 778 }
 779
 780 /*
 781  * Release the journal device
 782  */
 783 static void ext4_blkdev_put(struct block_device *bdev)
 784 {
 785         blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
 786 }
 787
 788 static void ext4_blkdev_remove(struct ext4_sb_info *sbi)
 789 {
 790         struct block_device *bdev;
 791         bdev = sbi->journal_bdev;
 792         if (bdev) {
 793                 ext4_blkdev_put(bdev);
 794                 sbi->journal_bdev = NULL;
 795         }
 796 }
 797
 798 static inline struct inode *orphan_list_entry(struct list_head *l)
 799 {
 800         return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode;
 801 }
 802
 803 static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi)
 804 {
 805         struct list_head *l;
 806
 807         ext4_msg(sb, KERN_ERR, "sb orphan head is %d",
 808                  le32_to_cpu(sbi->s_es->s_last_orphan));
 809
 810         printk(KERN_ERR "sb_info orphan list:\n");
 811         list_for_each(l, &sbi->s_orphan) {
 812                 struct inode *inode = orphan_list_entry(l);
 813                 printk(KERN_ERR "  "
 814                        "inode %s:%lu at %p: mode %o, nlink %d, next %d\n",
 815                        inode->i_sb->s_id, inode->i_ino, inode,
 816                        inode->i_mode, inode->i_nlink,
 817                        NEXT_ORPHAN(inode));
 818         }
 819 }
 820
 821 static void ext4_put_super(struct super_block *sb)
 822 {
 823         struct ext4_sb_info *sbi = EXT4_SB(sb);
 824         struct ext4_super_block *es = sbi->s_es;
 825         int i, err;
 826
 827         ext4_unregister_li_request(sb);
 828         dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
 829
 830         flush_workqueue(sbi->rsv_conversion_wq);
 831         destroy_workqueue(sbi->rsv_conversion_wq);
 832
 833         if (sbi->s_journal) {
 834                 err = jbd2_journal_destroy(sbi->s_journal);
 835                 sbi->s_journal = NULL;
 836                 if (err < 0)
 837                         ext4_abort(sb, "Couldn't clean up the journal");
 838         }
 839
 840         ext4_unregister_sysfs(sb);
 841         ext4_es_unregister_shrinker(sbi);
 842         del_timer_sync(&sbi->s_err_report);
 843         ext4_release_system_zone(sb);
 844         ext4_mb_release(sb);
 845         ext4_ext_release(sb);
 846
 847         if (!(sb->s_flags & MS_RDONLY)) {
 848                 ext4_clear_feature_journal_needs_recovery(sb);
 849                 es->s_state = cpu_to_le16(sbi->s_mount_state);
 850         }
 851         if (!(sb->s_flags & MS_RDONLY))
 852                 ext4_commit_super(sb, 1);
 853
 854         for (i = 0; i < sbi->s_gdb_count; i++)
 855                 brelse(sbi->s_group_desc[i]);
 856         kvfree(sbi->s_group_desc);
 857         kvfree(sbi->s_flex_groups);
 858         percpu_counter_destroy(&sbi->s_freeclusters_counter);
 859         percpu_counter_destroy(&sbi->s_freeinodes_counter);
 860         percpu_counter_destroy(&sbi->s_dirs_counter);
 861         percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
 862         percpu_free_rwsem(&sbi->s_journal_flag_rwsem);
 863         brelse(sbi->s_sbh);
 864 #ifdef CONFIG_QUOTA
 865         for (i = 0; i < EXT4_MAXQUOTAS; i++)
 866                 kfree(sbi->s_qf_names[i]);
 867 #endif
 868
 869         /* Debugging code just in case the in-memory inode orphan list
 870          * isn't empty.  The on-disk one can be non-empty if we've
 871          * detected an error and taken the fs readonly, but the
 872          * in-memory list had better be clean by this point. */
 873         if (!list_empty(&sbi->s_orphan))
 874                 dump_orphan_list(sb, sbi);
 875         J_ASSERT(list_empty(&sbi->s_orphan));
 876
 877         sync_blockdev(sb->s_bdev);
 878         invalidate_bdev(sb->s_bdev);
 879         if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) {
 880                 /*
 881                  * Invalidate the journal device's buffers.  We don't want them
 882                  * floating about in memory - the physical journal device may
 883                  * hotswapped, and it breaks the `ro-after' testing code.
 884                  */
 885                 sync_blockdev(sbi->journal_bdev);
 886                 invalidate_bdev(sbi->journal_bdev);
 887                 ext4_blkdev_remove(sbi);
 888         }
 889         if (sbi->s_mb_cache) {
 890                 ext4_xattr_destroy_cache(sbi->s_mb_cache);
 891                 sbi->s_mb_cache = NULL;
 892         }
 893         if (sbi->s_mmp_tsk)
 894                 kthread_stop(sbi->s_mmp_tsk);
 895         sb->s_fs_info = NULL;
 896         /*
 897          * Now that we are completely done shutting down the
 898          * superblock, we need to actually destroy the kobject.
 899          */
 900         kobject_put(&sbi->s_kobj);
 901         wait_for_completion(&sbi->s_kobj_unregister);
 902         if (sbi->s_chksum_driver)
 903                 crypto_free_shash(sbi->s_chksum_driver);
 904         kfree(sbi->s_blockgroup_lock);
 905         kfree(sbi);
 906 }
 907
 908 static struct kmem_cache *ext4_inode_cachep;
 909
 910 /*
 911  * Called inside transaction, so use GFP_NOFS
 912  */
 913 static struct inode *ext4_alloc_inode(struct super_block *sb)
 914 {
 915         struct ext4_inode_info *ei;
 916
 917         ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS);
 918         if (!ei)
 919                 return NULL;
 920
 921         ei->vfs_inode.i_version = 1;
 922         spin_lock_init(&ei->i_raw_lock);
 923         INIT_LIST_HEAD(&ei->i_prealloc_list);
 924         spin_lock_init(&ei->i_prealloc_lock);
 925         ext4_es_init_tree(&ei->i_es_tree);
 926         rwlock_init(&ei->i_es_lock);
 927         INIT_LIST_HEAD(&ei->i_es_list);
 928         ei->i_es_all_nr = 0;
 929         ei->i_es_shk_nr = 0;
 930         ei->i_es_shrink_lblk = 0;
 931         ei->i_reserved_data_blocks = 0;
 932         ei->i_reserved_meta_blocks = 0;
 933         ei->i_allocated_meta_blocks = 0;
 934         ei->i_da_metadata_calc_len = 0;
 935         ei->i_da_metadata_calc_last_lblock = 0;
 936         spin_lock_init(&(ei->i_block_reservation_lock));
 937 #ifdef CONFIG_QUOTA
 938         ei->i_reserved_quota = 0;
 939         memset(&ei->i_dquot, 0, sizeof(ei->i_dquot));
 940 #endif
 941         ei->jinode = NULL;
 942         INIT_LIST_HEAD(&ei->i_rsv_conversion_list);
 943         spin_lock_init(&ei->i_completed_io_lock);
 944         ei->i_sync_tid = 0;
 945         ei->i_datasync_tid = 0;
 946         atomic_set(&ei->i_unwritten, 0);
 947         INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work);
 948         return &ei->vfs_inode;
 949 }
 950
 951 static int ext4_drop_inode(struct inode *inode)
 952 {
 953         int drop = generic_drop_inode(inode);
 954
 955         trace_ext4_drop_inode(inode, drop);
 956         return drop;
 957 }
 958
 959 static void ext4_i_callback(struct rcu_head *head)
 960 {
 961         struct inode *inode = container_of(head, struct inode, i_rcu);
 962         kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
 963 }
 964
 965 static void ext4_destroy_inode(struct inode *inode)
 966 {
 967         if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
 968                 ext4_msg(inode->i_sb, KERN_ERR,
 969                          "Inode %lu (%p): orphan list check failed!",
 970                          inode->i_ino, EXT4_I(inode));
 971                 print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
 972                                 EXT4_I(inode), sizeof(struct ext4_inode_info),
 973                                 true);
 974                 dump_stack();
 975         }
 976         call_rcu(&inode->i_rcu, ext4_i_callback);
 977 }
 978
 979 static void init_once(void *foo)
 980 {
 981         struct ext4_inode_info *ei = (struct ext4_inode_info *) foo;
 982
 983         INIT_LIST_HEAD(&ei->i_orphan);
 984         init_rwsem(&ei->xattr_sem);
 985         init_rwsem(&ei->i_data_sem);
 986         init_rwsem(&ei->i_mmap_sem);
 987         inode_init_once(&ei->vfs_inode);
 988 }
 989
 990 static int __init init_inodecache(void)
 991 {
 992         ext4_inode_cachep = kmem_cache_create("ext4_inode_cache",
 993                                              sizeof(struct ext4_inode_info),
 994                                              0, (SLAB_RECLAIM_ACCOUNT|
 995                                                 SLAB_MEM_SPREAD|SLAB_ACCOUNT),
 996                                              init_once);
 997         if (ext4_inode_cachep == NULL)
 998                 return -ENOMEM;
 999         return 0;
1000 }
1001
1002 static void destroy_inodecache(void)
1003 {
1004         /*
1005          * Make sure all delayed rcu free inodes are flushed before we
1006          * destroy cache.
1007          */
1008         rcu_barrier();
1009         kmem_cache_destroy(ext4_inode_cachep);
1010 }
1011
1012 void ext4_clear_inode(struct inode *inode)
1013 {
1014         invalidate_inode_buffers(inode);
1015         clear_inode(inode);
1016         dquot_drop(inode);
1017         ext4_discard_preallocations(inode);
1018         ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
1019         if (EXT4_I(inode)->jinode) {
1020                 jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
1021                                                EXT4_I(inode)->jinode);
1022                 jbd2_free_inode(EXT4_I(inode)->jinode);
1023                 EXT4_I(inode)->jinode = NULL;
1024         }
1025 #ifdef CONFIG_EXT4_FS_ENCRYPTION
1026         fscrypt_put_encryption_info(inode, NULL);
1027 #endif
1028 }
1029
1030 static struct inode *ext4_nfs_get_inode(struct super_block *sb,
1031                                         u64 ino, u32 generation)
1032 {
1033         struct inode *inode;
1034
1035         if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)
1036                 return ERR_PTR(-ESTALE);
1037         if (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))
1038                 return ERR_PTR(-ESTALE);
1039
1040         /* iget isn't really right if the inode is currently unallocated!!
1041          *
1042          * ext4_read_inode will return a bad_inode if the inode had been
1043          * deleted, so we should be safe.
1044          *
1045          * Currently we don't know the generation for parent directory, so
1046          * a generation of 0 means "accept any"
1047          */
1048         inode = ext4_iget_normal(sb, ino);
1049         if (IS_ERR(inode))
1050                 return ERR_CAST(inode);
1051         if (generation && inode->i_generation != generation) {
1052                 iput(inode);
1053                 return ERR_PTR(-ESTALE);
1054         }
1055
1056         return inode;
1057 }
1058
1059 static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid,
1060                                         int fh_len, int fh_type)
1061 {
1062         return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
1063                                     ext4_nfs_get_inode);
1064 }
1065
1066 static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
1067                                         int fh_len, int fh_type)
1068 {
1069         return generic_fh_to_parent(sb, fid, fh_len, fh_type,
1070                                     ext4_nfs_get_inode);
1071 }
1072
1073 /*
1074  * Try to release metadata pages (indirect blocks, directories) which are
1075  * mapped via the block device.  Since these pages could have journal heads
1076  * which would prevent try_to_free_buffers() from freeing them, we must use
1077  * jbd2 layer's try_to_free_buffers() function to release them.
1078  */
1079 static int bdev_try_to_free_page(struct super_block *sb, struct page *page,
1080                                  gfp_t wait)
1081 {
1082         journal_t *journal = EXT4_SB(sb)->s_journal;
1083
1084         WARN_ON(PageChecked(page));
1085         if (!page_has_buffers(page))
1086                 return 0;
1087         if (journal)
1088                 return jbd2_journal_try_to_free_buffers(journal, page,
1089                                                 wait & ~__GFP_DIRECT_RECLAIM);
1090         return try_to_free_buffers(page);
1091 }
1092
1093 #ifdef CONFIG_EXT4_FS_ENCRYPTION
1094 static int ext4_get_context(struct inode *inode, void *ctx, size_t len)
1095 {
1096         return ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION,
1097                                  EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, ctx, len);
1098 }
1099
1100 static int ext4_key_prefix(struct inode *inode, u8 **key)
1101 {
1102         *key = EXT4_SB(inode->i_sb)->key_prefix;
1103         return EXT4_SB(inode->i_sb)->key_prefix_size;
1104 }
1105
1106 static int ext4_prepare_context(struct inode *inode)
1107 {
1108         return ext4_convert_inline_data(inode);
1109 }
1110
1111 static int ext4_set_context(struct inode *inode, const void *ctx, size_t len,
1112                                                         void *fs_data)
1113 {
1114         handle_t *handle;
1115         int res, res2;
1116
1117         /* fs_data is null when internally used. */
1118         if (fs_data) {
1119                 res  = ext4_xattr_set(inode, EXT4_XATTR_INDEX_ENCRYPTION,
1120                                 EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, ctx,
1121                                 len, 0);
1122                 if (!res) {
1123                         ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
1124                         ext4_clear_inode_state(inode,
1125                                         EXT4_STATE_MAY_INLINE_DATA);
1126                 }
1127                 return res;
1128         }
1129
1130         handle = ext4_journal_start(inode, EXT4_HT_MISC,
1131                         ext4_jbd2_credits_xattr(inode));
1132         if (IS_ERR(handle))
1133                 return PTR_ERR(handle);
1134
1135         res = ext4_xattr_set(inode, EXT4_XATTR_INDEX_ENCRYPTION,
1136                         EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, ctx,
1137                         len, 0);
1138         if (!res) {
1139                 ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
1140                 res = ext4_mark_inode_dirty(handle, inode);
1141                 if (res)
1142                         EXT4_ERROR_INODE(inode, "Failed to mark inode dirty");
1143         }
1144         res2 = ext4_journal_stop(handle);
1145         if (!res)
1146                 res = res2;
1147         return res;
1148 }
1149
1150 static int ext4_dummy_context(struct inode *inode)
1151 {
1152         return DUMMY_ENCRYPTION_ENABLED(EXT4_SB(inode->i_sb));
1153 }
1154
1155 static unsigned ext4_max_namelen(struct inode *inode)
1156 {
1157         return S_ISLNK(inode->i_mode) ? inode->i_sb->s_blocksize :
1158                 EXT4_NAME_LEN;
1159 }
1160
1161 static struct fscrypt_operations ext4_cryptops = {
1162         .get_context            = ext4_get_context,
1163         .key_prefix             = ext4_key_prefix,
1164         .prepare_context        = ext4_prepare_context,
1165         .set_context            = ext4_set_context,
1166         .dummy_context          = ext4_dummy_context,
1167         .is_encrypted           = ext4_encrypted_inode,
1168         .empty_dir              = ext4_empty_dir,
1169         .max_namelen            = ext4_max_namelen,
1170 };
1171 #else
1172 static struct fscrypt_operations ext4_cryptops = {
1173         .is_encrypted           = ext4_encrypted_inode,
1174 };
1175 #endif
1176
1177 #ifdef CONFIG_QUOTA
1178 static char *quotatypes[] = INITQFNAMES;
1179 #define QTYPE2NAME(t) (quotatypes[t])
1180
1181 static int ext4_write_dquot(struct dquot *dquot);
1182 static int ext4_acquire_dquot(struct dquot *dquot);
1183 static int ext4_release_dquot(struct dquot *dquot);
1184 static int ext4_mark_dquot_dirty(struct dquot *dquot);
1185 static int ext4_write_info(struct super_block *sb, int type);
1186 static int ext4_quota_on(struct super_block *sb, int type, int format_id,
1187                          struct path *path);
1188 static int ext4_quota_off(struct super_block *sb, int type);
1189 static int ext4_quota_on_mount(struct super_block *sb, int type);
1190 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
1191                                size_t len, loff_t off);
1192 static ssize_t ext4_quota_write(struct super_block *sb, int type,
1193                                 const char *data, size_t len, loff_t off);
1194 static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
1195                              unsigned int flags);
1196 static int ext4_enable_quotas(struct super_block *sb);
1197 static int ext4_get_next_id(struct super_block *sb, struct kqid *qid);
1198
1199 static struct dquot **ext4_get_dquots(struct inode *inode)
1200 {
1201         return EXT4_I(inode)->i_dquot;
1202 }
1203
1204 static const struct dquot_operations ext4_quota_operations = {
1205         .get_reserved_space = ext4_get_reserved_space,
1206         .write_dquot    = ext4_write_dquot,
1207         .acquire_dquot  = ext4_acquire_dquot,
1208         .release_dquot  = ext4_release_dquot,
1209         .mark_dirty     = ext4_mark_dquot_dirty,
1210         .write_info     = ext4_write_info,
1211         .alloc_dquot    = dquot_alloc,
1212         .destroy_dquot  = dquot_destroy,
1213         .get_projid     = ext4_get_projid,
1214         .get_next_id    = ext4_get_next_id,
1215 };
1216
1217 static const struct quotactl_ops ext4_qctl_operations = {
1218         .quota_on       = ext4_quota_on,
1219         .quota_off      = ext4_quota_off,
1220         .quota_sync     = dquot_quota_sync,
1221         .get_state      = dquot_get_state,
1222         .set_info       = dquot_set_dqinfo,
1223         .get_dqblk      = dquot_get_dqblk,
1224         .set_dqblk      = dquot_set_dqblk,
1225         .get_nextdqblk  = dquot_get_next_dqblk,
1226 };
1227 #endif
1228
1229 static const struct super_operations ext4_sops = {
1230         .alloc_inode    = ext4_alloc_inode,
1231         .destroy_inode  = ext4_destroy_inode,
1232         .write_inode    = ext4_write_inode,
1233         .dirty_inode    = ext4_dirty_inode,
1234         .drop_inode     = ext4_drop_inode,
1235         .evict_inode    = ext4_evict_inode,
1236         .put_super      = ext4_put_super,
1237         .sync_fs        = ext4_sync_fs,
1238         .freeze_fs      = ext4_freeze,
1239         .unfreeze_fs    = ext4_unfreeze,
1240         .statfs         = ext4_statfs,
1241         .remount_fs     = ext4_remount,
1242         .show_options   = ext4_show_options,
1243 #ifdef CONFIG_QUOTA
1244         .quota_read     = ext4_quota_read,
1245         .quota_write    = ext4_quota_write,
1246         .get_dquots     = ext4_get_dquots,
1247 #endif
1248         .bdev_try_to_free_page = bdev_try_to_free_page,
1249 };
1250
1251 static const struct export_operations ext4_export_ops = {
1252         .fh_to_dentry = ext4_fh_to_dentry,
1253         .fh_to_parent = ext4_fh_to_parent,
1254         .get_parent = ext4_get_parent,
1255 };
1256
1257 enum {
1258         Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
1259         Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
1260         Opt_nouid32, Opt_debug, Opt_removed,
1261         Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
1262         Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload,
1263         Opt_commit, Opt_min_batch_time, Opt_max_batch_time, Opt_journal_dev,
1264         Opt_journal_path, Opt_journal_checksum, Opt_journal_async_commit,
1265         Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
1266         Opt_data_err_abort, Opt_data_err_ignore, Opt_test_dummy_encryption,
1267         Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
1268         Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
1269         Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
1270         Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_i_version, Opt_dax,
1271         Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit,
1272         Opt_lazytime, Opt_nolazytime,
1273         Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
1274         Opt_inode_readahead_blks, Opt_journal_ioprio,
1275         Opt_dioread_nolock, Opt_dioread_lock,
1276         Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
1277         Opt_max_dir_size_kb, Opt_nojournal_checksum,
1278 };
1279
1280 static const match_table_t tokens = {
1281         {Opt_bsd_df, "bsddf"},
1282         {Opt_minix_df, "minixdf"},
1283         {Opt_grpid, "grpid"},
1284         {Opt_grpid, "bsdgroups"},
1285         {Opt_nogrpid, "nogrpid"},
1286         {Opt_nogrpid, "sysvgroups"},
1287         {Opt_resgid, "resgid=%u"},
1288         {Opt_resuid, "resuid=%u"},
1289         {Opt_sb, "sb=%u"},
1290         {Opt_err_cont, "errors=continue"},
1291         {Opt_err_panic, "errors=panic"},
1292         {Opt_err_ro, "errors=remount-ro"},
1293         {Opt_nouid32, "nouid32"},
1294         {Opt_debug, "debug"},
1295         {Opt_removed, "oldalloc"},
1296         {Opt_removed, "orlov"},
1297         {Opt_user_xattr, "user_xattr"},
1298         {Opt_nouser_xattr, "nouser_xattr"},
1299         {Opt_acl, "acl"},
1300         {Opt_noacl, "noacl"},
1301         {Opt_noload, "norecovery"},
1302         {Opt_noload, "noload"},
1303         {Opt_removed, "nobh"},
1304         {Opt_removed, "bh"},
1305         {Opt_commit, "commit=%u"},
1306         {Opt_min_batch_time, "min_batch_time=%u"},
1307         {Opt_max_batch_time, "max_batch_time=%u"},
1308         {Opt_journal_dev, "journal_dev=%u"},
1309         {Opt_journal_path, "journal_path=%s"},
1310         {Opt_journal_checksum, "journal_checksum"},
1311         {Opt_nojournal_checksum, "nojournal_checksum"},
1312         {Opt_journal_async_commit, "journal_async_commit"},
1313         {Opt_abort, "abort"},
1314         {Opt_data_journal, "data=journal"},
1315         {Opt_data_ordered, "data=ordered"},
1316         {Opt_data_writeback, "data=writeback"},
1317         {Opt_data_err_abort, "data_err=abort"},
1318         {Opt_data_err_ignore, "data_err=ignore"},
1319         {Opt_offusrjquota, "usrjquota="},
1320         {Opt_usrjquota, "usrjquota=%s"},
1321         {Opt_offgrpjquota, "grpjquota="},
1322         {Opt_grpjquota, "grpjquota=%s"},
1323         {Opt_jqfmt_vfsold, "jqfmt=vfsold"},
1324         {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
1325         {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"},
1326         {Opt_grpquota, "grpquota"},
1327         {Opt_noquota, "noquota"},
1328         {Opt_quota, "quota"},
1329         {Opt_usrquota, "usrquota"},
1330         {Opt_prjquota, "prjquota"},
1331         {Opt_barrier, "barrier=%u"},
1332         {Opt_barrier, "barrier"},
1333         {Opt_nobarrier, "nobarrier"},
1334         {Opt_i_version, "i_version"},
1335         {Opt_dax, "dax"},
1336         {Opt_stripe, "stripe=%u"},
1337         {Opt_delalloc, "delalloc"},
1338         {Opt_lazytime, "lazytime"},
1339         {Opt_nolazytime, "nolazytime"},
1340         {Opt_nodelalloc, "nodelalloc"},
1341         {Opt_removed, "mblk_io_submit"},
1342         {Opt_removed, "nomblk_io_submit"},
1343         {Opt_block_validity, "block_validity"},
1344         {Opt_noblock_validity, "noblock_validity"},
1345         {Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
1346         {Opt_journal_ioprio, "journal_ioprio=%u"},
1347         {Opt_auto_da_alloc, "auto_da_alloc=%u"},
1348         {Opt_auto_da_alloc, "auto_da_alloc"},
1349         {Opt_noauto_da_alloc, "noauto_da_alloc"},
1350         {Opt_dioread_nolock, "dioread_nolock"},
1351         {Opt_dioread_lock, "dioread_lock"},
1352         {Opt_discard, "discard"},
1353         {Opt_nodiscard, "nodiscard"},
1354         {Opt_init_itable, "init_itable=%u"},
1355         {Opt_init_itable, "init_itable"},
1356         {Opt_noinit_itable, "noinit_itable"},
1357         {Opt_max_dir_size_kb, "max_dir_size_kb=%u"},
1358         {Opt_test_dummy_encryption, "test_dummy_encryption"},
1359         {Opt_removed, "check=none"},    /* mount option from ext2/3 */
1360         {Opt_removed, "nocheck"},       /* mount option from ext2/3 */
1361         {Opt_removed, "reservation"},   /* mount option from ext2/3 */
1362         {Opt_removed, "noreservation"}, /* mount option from ext2/3 */
1363         {Opt_removed, "journal=%u"},    /* mount option from ext2/3 */
1364         {Opt_err, NULL},
1365 };
1366
1367 static ext4_fsblk_t get_sb_block(void **data)
1368 {
1369         ext4_fsblk_t    sb_block;
1370         char            *options = (char *) *data;
1371
1372         if (!options || strncmp(options, "sb=", 3) != 0)
1373                 return 1;       /* Default location */
1374
1375         options += 3;
1376         /* TODO: use simple_strtoll with >32bit ext4 */
1377         sb_block = simple_strtoul(options, &options, 0);
1378         if (*options && *options != ',') {
1379                 printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n",
1380                        (char *) *data);
1381                 return 1;
1382         }
1383         if (*options == ',')
1384                 options++;
1385         *data = (void *) options;
1386
1387         return sb_block;
1388 }
1389
1390 #define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
1391 static char deprecated_msg[] = "Mount option \"%s\" will be removed by %s\n"
1392         "Contact linux-ext4@vger.kernel.org if you think we should keep it.\n";
1393
1394 #ifdef CONFIG_QUOTA
1395 static int set_qf_name(struct super_block *sb, int qtype, substring_t *args)
1396 {
1397         struct ext4_sb_info *sbi = EXT4_SB(sb);
1398         char *qname;
1399         int ret = -1;
1400
1401         if (sb_any_quota_loaded(sb) &&
1402                 !sbi->s_qf_names[qtype]) {
1403                 ext4_msg(sb, KERN_ERR,
1404                         "Cannot change journaled "
1405                         "quota options when quota turned on");
1406                 return -1;
1407         }
1408         if (ext4_has_feature_quota(sb)) {
1409                 ext4_msg(sb, KERN_INFO, "Journaled quota options "
1410                          "ignored when QUOTA feature is enabled");
1411                 return 1;
1412         }
1413         qname = match_strdup(args);
1414         if (!qname) {
1415                 ext4_msg(sb, KERN_ERR,
1416                         "Not enough memory for storing quotafile name");
1417                 return -1;
1418         }
1419         if (sbi->s_qf_names[qtype]) {
1420                 if (strcmp(sbi->s_qf_names[qtype], qname) == 0)
1421                         ret = 1;
1422                 else
1423                         ext4_msg(sb, KERN_ERR,
1424                                  "%s quota file already specified",
1425                                  QTYPE2NAME(qtype));
1426                 goto errout;
1427         }
1428         if (strchr(qname, '/')) {
1429                 ext4_msg(sb, KERN_ERR,
1430                         "quotafile must be on filesystem root");
1431                 goto errout;
1432         }
1433         sbi->s_qf_names[qtype] = qname;
1434         set_opt(sb, QUOTA);
1435         return 1;
1436 errout:
1437         kfree(qname);
1438         return ret;
1439 }
1440
1441 static int clear_qf_name(struct super_block *sb, int qtype)
1442 {
1443
1444         struct ext4_sb_info *sbi = EXT4_SB(sb);
1445
1446         if (sb_any_quota_loaded(sb) &&
1447                 sbi->s_qf_names[qtype]) {
1448                 ext4_msg(sb, KERN_ERR, "Cannot change journaled quota options"
1449                         " when quota turned on");
1450                 return -1;
1451         }
1452         kfree(sbi->s_qf_names[qtype]);
1453         sbi->s_qf_names[qtype] = NULL;
1454         return 1;
1455 }
1456 #endif
1457
1458 #define MOPT_SET        0x0001
1459 #define MOPT_CLEAR      0x0002
1460 #define MOPT_NOSUPPORT  0x0004
1461 #define MOPT_EXPLICIT   0x0008
1462 #define MOPT_CLEAR_ERR  0x0010
1463 #define MOPT_GTE0       0x0020
1464 #ifdef CONFIG_QUOTA
1465 #define MOPT_Q          0
1466 #define MOPT_QFMT       0x0040
1467 #else
1468 #define MOPT_Q          MOPT_NOSUPPORT
1469 #define MOPT_QFMT       MOPT_NOSUPPORT
1470 #endif
1471 #define MOPT_DATAJ      0x0080
1472 #define MOPT_NO_EXT2    0x0100
1473 #define MOPT_NO_EXT3    0x0200
1474 #define MOPT_EXT4_ONLY  (MOPT_NO_EXT2 | MOPT_NO_EXT3)
1475 #define MOPT_STRING     0x0400
1476
1477 static const struct mount_opts {
1478         int     token;
1479         int     mount_opt;
1480         int     flags;
1481 } ext4_mount_opts[] = {
1482         {Opt_minix_df, EXT4_MOUNT_MINIX_DF, MOPT_SET},
1483         {Opt_bsd_df, EXT4_MOUNT_MINIX_DF, MOPT_CLEAR},
1484         {Opt_grpid, EXT4_MOUNT_GRPID, MOPT_SET},
1485         {Opt_nogrpid, EXT4_MOUNT_GRPID, MOPT_CLEAR},
1486         {Opt_block_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_SET},
1487         {Opt_noblock_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_CLEAR},
1488         {Opt_dioread_nolock, EXT4_MOUNT_DIOREAD_NOLOCK,
1489          MOPT_EXT4_ONLY | MOPT_SET},
1490         {Opt_dioread_lock, EXT4_MOUNT_DIOREAD_NOLOCK,
1491          MOPT_EXT4_ONLY | MOPT_CLEAR},
1492         {Opt_discard, EXT4_MOUNT_DISCARD, MOPT_SET},
1493         {Opt_nodiscard, EXT4_MOUNT_DISCARD, MOPT_CLEAR},
1494         {Opt_delalloc, EXT4_MOUNT_DELALLOC,
1495          MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
1496         {Opt_nodelalloc, EXT4_MOUNT_DELALLOC,
1497          MOPT_EXT4_ONLY | MOPT_CLEAR},
1498         {Opt_nojournal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
1499          MOPT_EXT4_ONLY | MOPT_CLEAR},
1500         {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
1501          MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
1502         {Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT |
1503                                     EXT4_MOUNT_JOURNAL_CHECKSUM),
1504          MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
1505         {Opt_noload, EXT4_MOUNT_NOLOAD, MOPT_NO_EXT2 | MOPT_SET},
1506         {Opt_err_panic, EXT4_MOUNT_ERRORS_PANIC, MOPT_SET | MOPT_CLEAR_ERR},
1507         {Opt_err_ro, EXT4_MOUNT_ERRORS_RO, MOPT_SET | MOPT_CLEAR_ERR},
1508         {Opt_err_cont, EXT4_MOUNT_ERRORS_CONT, MOPT_SET | MOPT_CLEAR_ERR},
1509         {Opt_data_err_abort, EXT4_MOUNT_DATA_ERR_ABORT,
1510          MOPT_NO_EXT2},
1511         {Opt_data_err_ignore, EXT4_MOUNT_DATA_ERR_ABORT,
1512          MOPT_NO_EXT2},
1513         {Opt_barrier, EXT4_MOUNT_BARRIER, MOPT_SET},
1514         {Opt_nobarrier, EXT4_MOUNT_BARRIER, MOPT_CLEAR},
1515         {Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET},
1516         {Opt_auto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_CLEAR},
1517         {Opt_noinit_itable, EXT4_MOUNT_INIT_INODE_TABLE, MOPT_CLEAR},
1518         {Opt_commit, 0, MOPT_GTE0},
1519         {Opt_max_batch_time, 0, MOPT_GTE0},
1520         {Opt_min_batch_time, 0, MOPT_GTE0},
1521         {Opt_inode_readahead_blks, 0, MOPT_GTE0},
1522         {Opt_init_itable, 0, MOPT_GTE0},
1523         {Opt_dax, EXT4_MOUNT_DAX, MOPT_SET},
1524         {Opt_stripe, 0, MOPT_GTE0},
1525         {Opt_resuid, 0, MOPT_GTE0},
1526         {Opt_resgid, 0, MOPT_GTE0},
1527         {Opt_journal_dev, 0, MOPT_NO_EXT2 | MOPT_GTE0},
1528         {Opt_journal_path, 0, MOPT_NO_EXT2 | MOPT_STRING},
1529         {Opt_journal_ioprio, 0, MOPT_NO_EXT2 | MOPT_GTE0},
1530         {Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_NO_EXT2 | MOPT_DATAJ},
1531         {Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_NO_EXT2 | MOPT_DATAJ},
1532         {Opt_data_writeback, EXT4_MOUNT_WRITEBACK_DATA,
1533          MOPT_NO_EXT2 | MOPT_DATAJ},
1534         {Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET},
1535         {Opt_nouser_xattr, EXT4_MOUNT_XATTR_USER, MOPT_CLEAR},
1536 #ifdef CONFIG_EXT4_FS_POSIX_ACL
1537         {Opt_acl, EXT4_MOUNT_POSIX_ACL, MOPT_SET},
1538         {Opt_noacl, EXT4_MOUNT_POSIX_ACL, MOPT_CLEAR},
1539 #else
1540         {Opt_acl, 0, MOPT_NOSUPPORT},
1541         {Opt_noacl, 0, MOPT_NOSUPPORT},
1542 #endif
1543         {Opt_nouid32, EXT4_MOUNT_NO_UID32, MOPT_SET},
1544         {Opt_debug, EXT4_MOUNT_DEBUG, MOPT_SET},
1545         {Opt_quota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA, MOPT_SET | MOPT_Q},
1546         {Opt_usrquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA,
1547                                                         MOPT_SET | MOPT_Q},
1548         {Opt_grpquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_GRPQUOTA,
1549                                                         MOPT_SET | MOPT_Q},
1550         {Opt_prjquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_PRJQUOTA,
1551                                                         MOPT_SET | MOPT_Q},
1552         {Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA |
1553                        EXT4_MOUNT_GRPQUOTA | EXT4_MOUNT_PRJQUOTA),
1554                                                         MOPT_CLEAR | MOPT_Q},
1555         {Opt_usrjquota, 0, MOPT_Q},
1556         {Opt_grpjquota, 0, MOPT_Q},
1557         {Opt_offusrjquota, 0, MOPT_Q},
1558         {Opt_offgrpjquota, 0, MOPT_Q},
1559         {Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT},
1560         {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT},
1561         {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT},
1562         {Opt_max_dir_size_kb, 0, MOPT_GTE0},
1563         {Opt_test_dummy_encryption, 0, MOPT_GTE0},
1564         {Opt_err, 0, 0}
1565 };
1566
1567 static int handle_mount_opt(struct super_block *sb, char *opt, int token,
1568                             substring_t *args, unsigned long *journal_devnum,
1569                             unsigned int *journal_ioprio, int is_remount)
1570 {
1571         struct ext4_sb_info *sbi = EXT4_SB(sb);
1572         const struct mount_opts *m;
1573         kuid_t uid;
1574         kgid_t gid;
1575         int arg = 0;
1576
1577 #ifdef CONFIG_QUOTA
1578         if (token == Opt_usrjquota)
1579                 return set_qf_name(sb, USRQUOTA, &args[0]);
1580         else if (token == Opt_grpjquota)
1581                 return set_qf_name(sb, GRPQUOTA, &args[0]);
1582         else if (token == Opt_offusrjquota)
1583                 return clear_qf_name(sb, USRQUOTA);
1584         else if (token == Opt_offgrpjquota)
1585                 return clear_qf_name(sb, GRPQUOTA);
1586 #endif
1587         switch (token) {
1588         case Opt_noacl:
1589         case Opt_nouser_xattr:
1590                 ext4_msg(sb, KERN_WARNING, deprecated_msg, opt, "3.5");
1591                 break;
1592         case Opt_sb:
1593                 return 1;       /* handled by get_sb_block() */
1594         case Opt_removed:
1595                 ext4_msg(sb, KERN_WARNING, "Ignoring removed %s option", opt);
1596                 return 1;
1597         case Opt_abort:
1598                 sbi->s_mount_flags |= EXT4_MF_FS_ABORTED;
1599                 return 1;
1600         case Opt_i_version:
1601                 sb->s_flags |= MS_I_VERSION;
1602                 return 1;
1603         case Opt_lazytime:
1604                 sb->s_flags |= MS_LAZYTIME;
1605                 return 1;
1606         case Opt_nolazytime:
1607                 sb->s_flags &= ~MS_LAZYTIME;
1608                 return 1;
1609         }
1610
1611         for (m = ext4_mount_opts; m->token != Opt_err; m++)
1612                 if (token == m->token)
1613                         break;
1614
1615         if (m->token == Opt_err) {
1616                 ext4_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" "
1617                          "or missing value", opt);
1618                 return -1;
1619         }
1620
1621         if ((m->flags & MOPT_NO_EXT2) && IS_EXT2_SB(sb)) {
1622                 ext4_msg(sb, KERN_ERR,
1623                          "Mount option \"%s\" incompatible with ext2", opt);
1624                 return -1;
1625         }
1626         if ((m->flags & MOPT_NO_EXT3) && IS_EXT3_SB(sb)) {
1627                 ext4_msg(sb, KERN_ERR,
1628                          "Mount option \"%s\" incompatible with ext3", opt);
1629                 return -1;
1630         }
1631
1632         if (args->from && !(m->flags & MOPT_STRING) && match_int(args, &arg))
1633                 return -1;
1634         if (args->from && (m->flags & MOPT_GTE0) && (arg < 0))
1635                 return -1;
1636         if (m->flags & MOPT_EXPLICIT) {
1637                 if (m->mount_opt & EXT4_MOUNT_DELALLOC) {
1638                         set_opt2(sb, EXPLICIT_DELALLOC);
1639                 } else if (m->mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) {
1640                         set_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM);
1641                 } else
1642                         return -1;
1643         }
1644         if (m->flags & MOPT_CLEAR_ERR)
1645                 clear_opt(sb, ERRORS_MASK);
1646         if (token == Opt_noquota && sb_any_quota_loaded(sb)) {
1647                 ext4_msg(sb, KERN_ERR, "Cannot change quota "
1648                          "options when quota turned on");
1649                 return -1;
1650         }
1651
1652         if (m->flags & MOPT_NOSUPPORT) {
1653                 ext4_msg(sb, KERN_ERR, "%s option not supported", opt);
1654         } else if (token == Opt_commit) {
1655                 if (arg == 0)
1656                         arg = JBD2_DEFAULT_MAX_COMMIT_AGE;
1657                 sbi->s_commit_interval = HZ * arg;
1658         } else if (token == Opt_max_batch_time) {
1659                 sbi->s_max_batch_time = arg;
1660         } else if (token == Opt_min_batch_time) {
1661                 sbi->s_min_batch_time = arg;
1662         } else if (token == Opt_inode_readahead_blks) {
1663                 if (arg && (arg > (1 << 30) || !is_power_of_2(arg))) {
1664                         ext4_msg(sb, KERN_ERR,
1665                                  "EXT4-fs: inode_readahead_blks must be "
1666                                  "0 or a power of 2 smaller than 2^31");
1667                         return -1;
1668                 }
1669                 sbi->s_inode_readahead_blks = arg;
1670         } else if (token == Opt_init_itable) {
1671                 set_opt(sb, INIT_INODE_TABLE);
1672                 if (!args->from)
1673                         arg = EXT4_DEF_LI_WAIT_MULT;
1674                 sbi->s_li_wait_mult = arg;
1675         } else if (token == Opt_max_dir_size_kb) {
1676                 sbi->s_max_dir_size_kb = arg;
1677         } else if (token == Opt_stripe) {
1678                 sbi->s_stripe = arg;
1679         } else if (token == Opt_resuid) {
1680                 uid = make_kuid(current_user_ns(), arg);
1681                 if (!uid_valid(uid)) {
1682                         ext4_msg(sb, KERN_ERR, "Invalid uid value %d", arg);
1683                         return -1;
1684                 }
1685                 sbi->s_resuid = uid;
1686         } else if (token == Opt_resgid) {
1687                 gid = make_kgid(current_user_ns(), arg);
1688                 if (!gid_valid(gid)) {
1689                         ext4_msg(sb, KERN_ERR, "Invalid gid value %d", arg);
1690                         return -1;
1691                 }
1692                 sbi->s_resgid = gid;
1693         } else if (token == Opt_journal_dev) {
1694                 if (is_remount) {
1695                         ext4_msg(sb, KERN_ERR,
1696                                  "Cannot specify journal on remount");
1697                         return -1;
1698                 }
1699                 *journal_devnum = arg;
1700         } else if (token == Opt_journal_path) {
1701                 char *journal_path;
1702                 struct inode *journal_inode;
1703                 struct path path;
1704                 int error;
1705
1706                 if (is_remount) {
1707                         ext4_msg(sb, KERN_ERR,
1708                                  "Cannot specify journal on remount");
1709                         return -1;
1710                 }
1711                 journal_path = match_strdup(&args[0]);
1712                 if (!journal_path) {
1713                         ext4_msg(sb, KERN_ERR, "error: could not dup "
1714                                 "journal device string");
1715                         return -1;
1716                 }
1717
1718                 error = kern_path(journal_path, LOOKUP_FOLLOW, &path);
1719                 if (error) {
1720                         ext4_msg(sb, KERN_ERR, "error: could not find "
1721                                 "journal device path: error %d", error);
1722                         kfree(journal_path);
1723                         return -1;
1724                 }
1725
1726                 journal_inode = d_inode(path.dentry);
1727                 if (!S_ISBLK(journal_inode->i_mode)) {
1728                         ext4_msg(sb, KERN_ERR, "error: journal path %s "
1729                                 "is not a block device", journal_path);
1730                         path_put(&path);
1731                         kfree(journal_path);
1732                         return -1;
1733                 }
1734
1735                 *journal_devnum = new_encode_dev(journal_inode->i_rdev);
1736                 path_put(&path);
1737                 kfree(journal_path);
1738         } else if (token == Opt_journal_ioprio) {
1739                 if (arg > 7) {
1740                         ext4_msg(sb, KERN_ERR, "Invalid journal IO priority"
1741                                  " (must be 0-7)");
1742                         return -1;
1743                 }
1744                 *journal_ioprio =
1745                         IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg);
1746         } else if (token == Opt_test_dummy_encryption) {
1747 #ifdef CONFIG_EXT4_FS_ENCRYPTION
1748                 sbi->s_mount_flags |= EXT4_MF_TEST_DUMMY_ENCRYPTION;
1749                 ext4_msg(sb, KERN_WARNING,
1750                          "Test dummy encryption mode enabled");
1751 #else
1752                 ext4_msg(sb, KERN_WARNING,
1753                          "Test dummy encryption mount option ignored");
1754 #endif
1755         } else if (m->flags & MOPT_DATAJ) {
1756                 if (is_remount) {
1757                         if (!sbi->s_journal)
1758                                 ext4_msg(sb, KERN_WARNING, "Remounting file system with no journal so ignoring journalled data option");
1759                         else if (test_opt(sb, DATA_FLAGS) != m->mount_opt) {
1760                                 ext4_msg(sb, KERN_ERR,
1761                                          "Cannot change data mode on remount");
1762                                 return -1;
1763                         }
1764                 } else {
1765                         clear_opt(sb, DATA_FLAGS);
1766                         sbi->s_mount_opt |= m->mount_opt;
1767                 }
1768 #ifdef CONFIG_QUOTA
1769         } else if (m->flags & MOPT_QFMT) {
1770                 if (sb_any_quota_loaded(sb) &&
1771                     sbi->s_jquota_fmt != m->mount_opt) {
1772                         ext4_msg(sb, KERN_ERR, "Cannot change journaled "
1773                                  "quota options when quota turned on");
1774                         return -1;
1775                 }
1776                 if (ext4_has_feature_quota(sb)) {
1777                         ext4_msg(sb, KERN_INFO,
1778                                  "Quota format mount options ignored "
1779                                  "when QUOTA feature is enabled");
1780                         return 1;
1781                 }
1782                 sbi->s_jquota_fmt = m->mount_opt;
1783 #endif
1784         } else if (token == Opt_dax) {
1785 #ifdef CONFIG_FS_DAX
1786                 ext4_msg(sb, KERN_WARNING,
1787                 "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
1788                         sbi->s_mount_opt |= m->mount_opt;
1789 #else
1790                 ext4_msg(sb, KERN_INFO, "dax option not supported");
1791                 return -1;
1792 #endif
1793         } else if (token == Opt_data_err_abort) {
1794                 sbi->s_mount_opt |= m->mount_opt;
1795         } else if (token == Opt_data_err_ignore) {
1796                 sbi->s_mount_opt &= ~m->mount_opt;
1797         } else {
1798                 if (!args->from)
1799                         arg = 1;
1800                 if (m->flags & MOPT_CLEAR)
1801                         arg = !arg;
1802                 else if (unlikely(!(m->flags & MOPT_SET))) {
1803                         ext4_msg(sb, KERN_WARNING,
1804                                  "buggy handling of option %s", opt);
1805                         WARN_ON(1);
1806                         return -1;
1807                 }
1808                 if (arg != 0)
1809                         sbi->s_mount_opt |= m->mount_opt;
1810                 else
1811                         sbi->s_mount_opt &= ~m->mount_opt;
1812         }
1813         return 1;
1814 }
1815
1816 static int parse_options(char *options, struct super_block *sb,
1817                          unsigned long *journal_devnum,
1818                          unsigned int *journal_ioprio,
1819                          int is_remount)
1820 {
1821         struct ext4_sb_info *sbi = EXT4_SB(sb);
1822         char *p;
1823         substring_t args[MAX_OPT_ARGS];
1824         int token;
1825
1826         if (!options)
1827                 return 1;
1828
1829         while ((p = strsep(&options, ",")) != NULL) {
1830                 if (!*p)
1831                         continue;
1832                 /*
1833                  * Initialize args struct so we know whether arg was
1834                  * found; some options take optional arguments.
1835                  */
1836                 args[0].to = args[0].from = NULL;
1837                 token = match_token(p, tokens, args);
1838                 if (handle_mount_opt(sb, p, token, args, journal_devnum,
1839                                      journal_ioprio, is_remount) < 0)
1840                         return 0;
1841         }
1842 #ifdef CONFIG_QUOTA
1843         /*
1844          * We do the test below only for project quotas. 'usrquota' and
1845          * 'grpquota' mount options are allowed even without quota feature
1846          * to support legacy quotas in quota files.
1847          */
1848         if (test_opt(sb, PRJQUOTA) && !ext4_has_feature_project(sb)) {
1849                 ext4_msg(sb, KERN_ERR, "Project quota feature not enabled. "
1850                          "Cannot enable project quota enforcement.");
1851                 return 0;
1852         }
1853         if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
1854                 if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA])
1855                         clear_opt(sb, USRQUOTA);
1856
1857                 if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA])
1858                         clear_opt(sb, GRPQUOTA);
1859
1860                 if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) {
1861                         ext4_msg(sb, KERN_ERR, "old and new quota "
1862                                         "format mixing");
1863                         return 0;
1864                 }
1865
1866                 if (!sbi->s_jquota_fmt) {
1867                         ext4_msg(sb, KERN_ERR, "journaled quota format "
1868                                         "not specified");
1869                         return 0;
1870                 }
1871         }
1872 #endif
1873         if (test_opt(sb, DIOREAD_NOLOCK)) {
1874                 int blocksize =
1875                         BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
1876
1877                 if (blocksize < PAGE_SIZE) {
1878                         ext4_msg(sb, KERN_ERR, "can't mount with "
1879                                  "dioread_nolock if block size != PAGE_SIZE");
1880                         return 0;
1881                 }
1882         }
1883         if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA &&
1884             test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
1885                 ext4_msg(sb, KERN_ERR, "can't mount with journal_async_commit "
1886                          "in data=ordered mode");
1887                 return 0;
1888         }
1889         return 1;
1890 }
1891
1892 static inline void ext4_show_quota_options(struct seq_file *seq,
1893                                            struct super_block *sb)
1894 {
1895 #if defined(CONFIG_QUOTA)
1896         struct ext4_sb_info *sbi = EXT4_SB(sb);
1897
1898         if (sbi->s_jquota_fmt) {
1899                 char *fmtname = "";
1900
1901                 switch (sbi->s_jquota_fmt) {
1902                 case QFMT_VFS_OLD:
1903                         fmtname = "vfsold";
1904                         break;
1905                 case QFMT_VFS_V0:
1906                         fmtname = "vfsv0";
1907                         break;
1908                 case QFMT_VFS_V1:
1909                         fmtname = "vfsv1";
1910                         break;
1911                 }
1912                 seq_printf(seq, ",jqfmt=%s", fmtname);
1913         }
1914
1915         if (sbi->s_qf_names[USRQUOTA])
1916                 seq_show_option(seq, "usrjquota", sbi->s_qf_names[USRQUOTA]);
1917
1918         if (sbi->s_qf_names[GRPQUOTA])
1919                 seq_show_option(seq, "grpjquota", sbi->s_qf_names[GRPQUOTA]);
1920 #endif
1921 }
1922
1923 static const char *token2str(int token)
1924 {
1925         const struct match_token *t;
1926
1927         for (t = tokens; t->token != Opt_err; t++)
1928                 if (t->token == token && !strchr(t->pattern, '='))
1929                         break;
1930         return t->pattern;
1931 }
1932
1933 /*
1934  * Show an option if
1935  *  - it's set to a non-default value OR
1936  *  - if the per-sb default is different from the global default
1937  */
1938 static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
1939                               int nodefs)
1940 {
1941         struct ext4_sb_info *sbi = EXT4_SB(sb);
1942         struct ext4_super_block *es = sbi->s_es;
1943         int def_errors, def_mount_opt = nodefs ? 0 : sbi->s_def_mount_opt;
1944         const struct mount_opts *m;
1945         char sep = nodefs ? '\n' : ',';
1946
1947 #define SEQ_OPTS_PUTS(str) seq_printf(seq, "%c" str, sep)
1948 #define SEQ_OPTS_PRINT(str, arg) seq_printf(seq, "%c" str, sep, arg)
1949
1950         if (sbi->s_sb_block != 1)
1951                 SEQ_OPTS_PRINT("sb=%llu", sbi->s_sb_block);
1952
1953         for (m = ext4_mount_opts; m->token != Opt_err; m++) {
1954                 int want_set = m->flags & MOPT_SET;
1955                 if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) ||
1956                     (m->flags & MOPT_CLEAR_ERR))
1957                         continue;
1958                 if (!(m->mount_opt & (sbi->s_mount_opt ^ def_mount_opt)))
1959                         continue; /* skip if same as the default */
1960                 if ((want_set &&
1961                      (sbi->s_mount_opt & m->mount_opt) != m->mount_opt) ||
1962                     (!want_set && (sbi->s_mount_opt & m->mount_opt)))
1963                         continue; /* select Opt_noFoo vs Opt_Foo */
1964                 SEQ_OPTS_PRINT("%s", token2str(m->token));
1965         }
1966
1967         if (nodefs || !uid_eq(sbi->s_resuid, make_kuid(&init_user_ns, EXT4_DEF_RESUID)) ||
1968             le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID)
1969                 SEQ_OPTS_PRINT("resuid=%u",
1970                                 from_kuid_munged(&init_user_ns, sbi->s_resuid));
1971         if (nodefs || !gid_eq(sbi->s_resgid, make_kgid(&init_user_ns, EXT4_DEF_RESGID)) ||
1972             le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID)
1973                 SEQ_OPTS_PRINT("resgid=%u",
1974                                 from_kgid_munged(&init_user_ns, sbi->s_resgid));
1975         def_errors = nodefs ? -1 : le16_to_cpu(es->s_errors);
1976         if (test_opt(sb, ERRORS_RO) && def_errors != EXT4_ERRORS_RO)
1977                 SEQ_OPTS_PUTS("errors=remount-ro");
1978         if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE)
1979                 SEQ_OPTS_PUTS("errors=continue");
1980         if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC)
1981                 SEQ_OPTS_PUTS("errors=panic");
1982         if (nodefs || sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ)
1983                 SEQ_OPTS_PRINT("commit=%lu", sbi->s_commit_interval / HZ);
1984         if (nodefs || sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME)
1985                 SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time);
1986         if (nodefs || sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME)
1987                 SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time);
1988         if (sb->s_flags & MS_I_VERSION)
1989                 SEQ_OPTS_PUTS("i_version");
1990         if (nodefs || sbi->s_stripe)
1991                 SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe);
1992         if (EXT4_MOUNT_DATA_FLAGS & (sbi->s_mount_opt ^ def_mount_opt)) {
1993                 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
1994                         SEQ_OPTS_PUTS("data=journal");
1995                 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
1996                         SEQ_OPTS_PUTS("data=ordered");
1997                 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
1998                         SEQ_OPTS_PUTS("data=writeback");
1999         }
2000         if (nodefs ||
2001             sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS)
2002                 SEQ_OPTS_PRINT("inode_readahead_blks=%u",
2003                                sbi->s_inode_readahead_blks);
2004
2005         if (nodefs || (test_opt(sb, INIT_INODE_TABLE) &&
2006                        (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT)))
2007                 SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult);
2008         if (nodefs || sbi->s_max_dir_size_kb)
2009                 SEQ_OPTS_PRINT("max_dir_size_kb=%u", sbi->s_max_dir_size_kb);
2010         if (test_opt(sb, DATA_ERR_ABORT))
2011                 SEQ_OPTS_PUTS("data_err=abort");
2012
2013         ext4_show_quota_options(seq, sb);
2014         return 0;
2015 }
2016
2017 static int ext4_show_options(struct seq_file *seq, struct dentry *root)
2018 {
2019         return _ext4_show_options(seq, root->d_sb, 0);
2020 }
2021
2022 int ext4_seq_options_show(struct seq_file *seq, void *offset)
2023 {
2024         struct super_block *sb = seq->private;
2025         int rc;
2026
2027         seq_puts(seq, (sb->s_flags & MS_RDONLY) ? "ro" : "rw");
2028         rc = _ext4_show_options(seq, sb, 1);
2029         seq_puts(seq, "\n");
2030         return rc;
2031 }
2032
2033 static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
2034                             int read_only)
2035 {
2036         struct ext4_sb_info *sbi = EXT4_SB(sb);
2037         int res = 0;
2038
2039         if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {
2040                 ext4_msg(sb, KERN_ERR, "revision level too high, "
2041                          "forcing read-only mode");
2042                 res = MS_RDONLY;
2043         }
2044         if (read_only)
2045                 goto done;
2046         if (!(sbi->s_mount_state & EXT4_VALID_FS))
2047                 ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, "
2048                          "running e2fsck is recommended");
2049         else if (sbi->s_mount_state & EXT4_ERROR_FS)
2050                 ext4_msg(sb, KERN_WARNING,
2051                          "warning: mounting fs with errors, "
2052                          "running e2fsck is recommended");
2053         else if ((__s16) le16_to_cpu(es->s_max_mnt_count) > 0 &&
2054                  le16_to_cpu(es->s_mnt_count) >=
2055                  (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
2056                 ext4_msg(sb, KERN_WARNING,
2057                          "warning: maximal mount count reached, "
2058                          "running e2fsck is recommended");
2059         else if (le32_to_cpu(es->s_checkinterval) &&
2060                 (le32_to_cpu(es->s_lastcheck) +
2061                         le32_to_cpu(es->s_checkinterval) <= get_seconds()))
2062                 ext4_msg(sb, KERN_WARNING,
2063                          "warning: checktime reached, "
2064                          "running e2fsck is recommended");
2065         if (!sbi->s_journal)
2066                 es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
2067         if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
2068                 es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
2069         le16_add_cpu(&es->s_mnt_count, 1);
2070         es->s_mtime = cpu_to_le32(get_seconds());
2071         ext4_update_dynamic_rev(sb);
2072         if (sbi->s_journal)
2073                 ext4_set_feature_journal_needs_recovery(sb);
2074
2075         ext4_commit_super(sb, 1);
2076 done:
2077         if (test_opt(sb, DEBUG))
2078                 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
2079                                 "bpg=%lu, ipg=%lu, mo=%04x, mo2=%04x]\n",
2080                         sb->s_blocksize,
2081                         sbi->s_groups_count,
2082                         EXT4_BLOCKS_PER_GROUP(sb),
2083                         EXT4_INODES_PER_GROUP(sb),
2084                         sbi->s_mount_opt, sbi->s_mount_opt2);
2085
2086         cleancache_init_fs(sb);
2087         return res;
2088 }
2089
2090 int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup)
2091 {
2092         struct ext4_sb_info *sbi = EXT4_SB(sb);
2093         struct flex_groups *new_groups;
2094         int size;
2095
2096         if (!sbi->s_log_groups_per_flex)
2097                 return 0;
2098
2099         size = ext4_flex_group(sbi, ngroup - 1) + 1;
2100         if (size <= sbi->s_flex_groups_allocated)
2101                 return 0;
2102
2103         size = roundup_pow_of_two(size * sizeof(struct flex_groups));
2104         new_groups = ext4_kvzalloc(size, GFP_KERNEL);
2105         if (!new_groups) {
2106                 ext4_msg(sb, KERN_ERR, "not enough memory for %d flex groups",
2107                          size / (int) sizeof(struct flex_groups));
2108                 return -ENOMEM;
2109         }
2110
2111         if (sbi->s_flex_groups) {
2112                 memcpy(new_groups, sbi->s_flex_groups,
2113                        (sbi->s_flex_groups_allocated *
2114                         sizeof(struct flex_groups)));
2115                 kvfree(sbi->s_flex_groups);
2116         }
2117         sbi->s_flex_groups = new_groups;
2118         sbi->s_flex_groups_allocated = size / sizeof(struct flex_groups);
2119         return 0;
2120 }
2121
2122 static int ext4_fill_flex_info(struct super_block *sb)
2123 {
2124         struct ext4_sb_info *sbi = EXT4_SB(sb);
2125         struct ext4_group_desc *gdp = NULL;
2126         ext4_group_t flex_group;
2127         int i, err;
2128
2129         sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
2130         if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) {
2131                 sbi->s_log_groups_per_flex = 0;
2132                 return 1;
2133         }
2134
2135         err = ext4_alloc_flex_bg_array(sb, sbi->s_groups_count);
2136         if (err)
2137                 goto failed;
2138
2139         for (i = 0; i < sbi->s_groups_count; i++) {
2140                 gdp = ext4_get_group_desc(sb, i, NULL);
2141
2142                 flex_group = ext4_flex_group(sbi, i);
2143                 atomic_add(ext4_free_inodes_count(sb, gdp),
2144                            &sbi->s_flex_groups[flex_group].free_inodes);
2145                 atomic64_add(ext4_free_group_clusters(sb, gdp),
2146                              &sbi->s_flex_groups[flex_group].free_clusters);
2147                 atomic_add(ext4_used_dirs_count(sb, gdp),
2148                            &sbi->s_flex_groups[flex_group].used_dirs);
2149         }
2150
2151         return 1;
2152 failed:
2153         return 0;
2154 }
2155
2156 static __le16 ext4_group_desc_csum(struct super_block *sb, __u32 block_group,
2157                                    struct ext4_group_desc *gdp)
2158 {
2159         int offset = offsetof(struct ext4_group_desc, bg_checksum);
2160         __u16 crc = 0;
2161         __le32 le_group = cpu_to_le32(block_group);
2162         struct ext4_sb_info *sbi = EXT4_SB(sb);
2163
2164         if (ext4_has_metadata_csum(sbi->s_sb)) {
2165                 /* Use new metadata_csum algorithm */
2166                 __u32 csum32;
2167                 __u16 dummy_csum = 0;
2168
2169                 csum32 = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&le_group,
2170                                      sizeof(le_group));
2171                 csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp, offset);
2172                 csum32 = ext4_chksum(sbi, csum32, (__u8 *)&dummy_csum,
2173                                      sizeof(dummy_csum));
2174                 offset += sizeof(dummy_csum);
2175                 if (offset < sbi->s_desc_size)
2176                         csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp + offset,
2177                                              sbi->s_desc_size - offset);
2178
2179                 crc = csum32 & 0xFFFF;
2180                 goto out;
2181         }
2182
2183         /* old crc16 code */
2184         if (!ext4_has_feature_gdt_csum(sb))
2185                 return 0;
2186
2187         crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
2188         crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
2189         crc = crc16(crc, (__u8 *)gdp, offset);
2190         offset += sizeof(gdp->bg_checksum); /* skip checksum */
2191         /* for checksum of struct ext4_group_desc do the rest...*/
2192         if (ext4_has_feature_64bit(sb) &&
2193             offset < le16_to_cpu(sbi->s_es->s_desc_size))
2194                 crc = crc16(crc, (__u8 *)gdp + offset,
2195                             le16_to_cpu(sbi->s_es->s_desc_size) -
2196                                 offset);
2197
2198 out:
2199         return cpu_to_le16(crc);
2200 }
2201
2202 int ext4_group_desc_csum_verify(struct super_block *sb, __u32 block_group,
2203                                 struct ext4_group_desc *gdp)
2204 {
2205         if (ext4_has_group_desc_csum(sb) &&
2206             (gdp->bg_checksum != ext4_group_desc_csum(sb, block_group, gdp)))
2207                 return 0;
2208
2209         return 1;
2210 }
2211
2212 void ext4_group_desc_csum_set(struct super_block *sb, __u32 block_group,
2213                               struct ext4_group_desc *gdp)
2214 {
2215         if (!ext4_has_group_desc_csum(sb))
2216                 return;
2217         gdp->bg_checksum = ext4_group_desc_csum(sb, block_group, gdp);
2218 }
2219
2220 /* Called at mount-time, super-block is locked */
2221 static int ext4_check_descriptors(struct super_block *sb,
2222                                   ext4_fsblk_t sb_block,
2223                                   ext4_group_t *first_not_zeroed)
2224 {
2225         struct ext4_sb_info *sbi = EXT4_SB(sb);
2226         ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
2227         ext4_fsblk_t last_block;
2228         ext4_fsblk_t block_bitmap;
2229         ext4_fsblk_t inode_bitmap;
2230         ext4_fsblk_t inode_table;
2231         int flexbg_flag = 0;
2232         ext4_group_t i, grp = sbi->s_groups_count;
2233
2234         if (ext4_has_feature_flex_bg(sb))
2235                 flexbg_flag = 1;
2236
2237         ext4_debug("Checking group descriptors");
2238
2239         for (i = 0; i < sbi->s_groups_count; i++) {
2240                 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
2241
2242                 if (i == sbi->s_groups_count - 1 || flexbg_flag)
2243                         last_block = ext4_blocks_count(sbi->s_es) - 1;
2244                 else
2245                         last_block = first_block +
2246                                 (EXT4_BLOCKS_PER_GROUP(sb) - 1);
2247
2248                 if ((grp == sbi->s_groups_count) &&
2249                    !(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
2250                         grp = i;
2251
2252                 block_bitmap = ext4_block_bitmap(sb, gdp);
2253                 if (block_bitmap == sb_block) {
2254                         ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2255                                  "Block bitmap for group %u overlaps "
2256                                  "superblock", i);
2257                 }
2258                 if (block_bitmap < first_block || block_bitmap > last_block) {
2259                         ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2260                                "Block bitmap for group %u not in group "
2261                                "(block %llu)!", i, block_bitmap);
2262                         return 0;
2263                 }
2264                 inode_bitmap = ext4_inode_bitmap(sb, gdp);
2265                 if (inode_bitmap == sb_block) {
2266                         ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2267                                  "Inode bitmap for group %u overlaps "
2268                                  "superblock", i);
2269                 }
2270                 if (inode_bitmap < first_block || inode_bitmap > last_block) {
2271                         ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2272                                "Inode bitmap for group %u not in group "
2273                                "(block %llu)!", i, inode_bitmap);
2274                         return 0;
2275                 }
2276                 inode_table = ext4_inode_table(sb, gdp);
2277                 if (inode_table == sb_block) {
2278                         ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2279                                  "Inode table for group %u overlaps "
2280                                  "superblock", i);
2281                 }
2282                 if (inode_table < first_block ||
2283                     inode_table + sbi->s_itb_per_group - 1 > last_block) {
2284                         ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2285                                "Inode table for group %u not in group "
2286                                "(block %llu)!", i, inode_table);
2287                         return 0;
2288                 }
2289                 ext4_lock_group(sb, i);
2290                 if (!ext4_group_desc_csum_verify(sb, i, gdp)) {
2291                         ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2292                                  "Checksum for group %u failed (%u!=%u)",
2293                                  i, le16_to_cpu(ext4_group_desc_csum(sb, i,
2294                                      gdp)), le16_to_cpu(gdp->bg_checksum));
2295                         if (!(sb->s_flags & MS_RDONLY)) {
2296                                 ext4_unlock_group(sb, i);
2297                                 return 0;
2298                         }
2299                 }
2300                 ext4_unlock_group(sb, i);
2301                 if (!flexbg_flag)
2302                         first_block += EXT4_BLOCKS_PER_GROUP(sb);
2303         }
2304         if (NULL != first_not_zeroed)
2305                 *first_not_zeroed = grp;
2306         return 1;
2307 }
2308
2309 /* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at
2310  * the superblock) which were deleted from all directories, but held open by
2311  * a process at the time of a crash.  We walk the list and try to delete these
2312  * inodes at recovery time (only with a read-write filesystem).
2313  *
2314  * In order to keep the orphan inode chain consistent during traversal (in
2315  * case of crash during recovery), we link each inode into the superblock
2316  * orphan list_head and handle it the same way as an inode deletion during
2317  * normal operation (which journals the operations for us).
2318  *
2319  * We only do an iget() and an iput() on each inode, which is very safe if we
2320  * accidentally point at an in-use or already deleted inode.  The worst that
2321  * can happen in this case is that we get a "bit already cleared" message from
2322  * ext4_free_inode().  The only reason we would point at a wrong inode is if
2323  * e2fsck was run on this filesystem, and it must have already done the orphan
2324  * inode cleanup for us, so we can safely abort without any further action.
2325  */
2326 static void ext4_orphan_cleanup(struct super_block *sb,
2327                                 struct ext4_super_block *es)
2328 {
2329         unsigned int s_flags = sb->s_flags;
2330         int nr_orphans = 0, nr_truncates = 0;
2331 #ifdef CONFIG_QUOTA
2332         int i;
2333 #endif
2334         if (!es->s_last_orphan) {
2335                 jbd_debug(4, "no orphan inodes to clean up\n");
2336                 return;
2337         }
2338
2339         if (bdev_read_only(sb->s_bdev)) {
2340                 ext4_msg(sb, KERN_ERR, "write access "
2341                         "unavailable, skipping orphan cleanup");
2342                 return;
2343         }
2344
2345         /* Check if feature set would not allow a r/w mount */
2346         if (!ext4_feature_set_ok(sb, 0)) {
2347                 ext4_msg(sb, KERN_INFO, "Skipping orphan cleanup due to "
2348                          "unknown ROCOMPAT features");
2349                 return;
2350         }
2351
2352         if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
2353                 /* don't clear list on RO mount w/ errors */
2354                 if (es->s_last_orphan && !(s_flags & MS_RDONLY)) {
2355                         ext4_msg(sb, KERN_INFO, "Errors on filesystem, "
2356                                   "clearing orphan list.\n");
2357                         es->s_last_orphan = 0;
2358                 }
2359                 jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
2360                 return;
2361         }
2362
2363         if (s_flags & MS_RDONLY) {
2364                 ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs");
2365                 sb->s_flags &= ~MS_RDONLY;
2366         }
2367 #ifdef CONFIG_QUOTA
2368         /* Needed for iput() to work correctly and not trash data */
2369         sb->s_flags |= MS_ACTIVE;
2370         /* Turn on quotas so that they are updated correctly */
2371         for (i = 0; i < EXT4_MAXQUOTAS; i++) {
2372                 if (EXT4_SB(sb)->s_qf_names[i]) {
2373                         int ret = ext4_quota_on_mount(sb, i);
2374                         if (ret < 0)
2375                                 ext4_msg(sb, KERN_ERR,
2376                                         "Cannot turn on journaled "
2377                                         "quota: error %d", ret);
2378                 }
2379         }
2380 #endif
2381
2382         while (es->s_last_orphan) {
2383                 struct inode *inode;
2384
2385                 /*
2386                  * We may have encountered an error during cleanup; if
2387                  * so, skip the rest.
2388                  */
2389                 if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
2390                         jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
2391                         es->s_last_orphan = 0;
2392                         break;
2393                 }
2394
2395                 inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
2396                 if (IS_ERR(inode)) {
2397                         es->s_last_orphan = 0;
2398                         break;
2399                 }
2400
2401                 list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
2402                 dquot_initialize(inode);
2403                 if (inode->i_nlink) {
2404                         if (test_opt(sb, DEBUG))
2405                                 ext4_msg(sb, KERN_DEBUG,
2406                                         "%s: truncating inode %lu to %lld bytes",
2407                                         __func__, inode->i_ino, inode->i_size);
2408                         jbd_debug(2, "truncating inode %lu to %lld bytes\n",
2409                                   inode->i_ino, inode->i_size);
2410                         inode_lock(inode);
2411                         truncate_inode_pages(inode->i_mapping, inode->i_size);
2412                         ext4_truncate(inode);
2413                         inode_unlock(inode);
2414                         nr_truncates++;
2415                 } else {
2416                         if (test_opt(sb, DEBUG))
2417                                 ext4_msg(sb, KERN_DEBUG,
2418                                         "%s: deleting unreferenced inode %lu",
2419                                         __func__, inode->i_ino);
2420                         jbd_debug(2, "deleting unreferenced inode %lu\n",
2421                                   inode->i_ino);
2422                         nr_orphans++;
2423                 }
2424                 iput(inode);  /* The delete magic happens here! */
2425         }
2426
2427 #define PLURAL(x) (x), ((x) == 1) ? "" : "s"
2428
2429         if (nr_orphans)
2430                 ext4_msg(sb, KERN_INFO, "%d orphan inode%s deleted",
2431                        PLURAL(nr_orphans));
2432         if (nr_truncates)
2433                 ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up",
2434                        PLURAL(nr_truncates));
2435 #ifdef CONFIG_QUOTA
2436         /* Turn quotas off */
2437         for (i = 0; i < EXT4_MAXQUOTAS; i++) {
2438                 if (sb_dqopt(sb)->files[i])
2439                         dquot_quota_off(sb, i);
2440         }
2441 #endif
2442         sb->s_flags = s_flags; /* Restore MS_RDONLY status */
2443 }
2444
2445 /*
2446  * Maximal extent format file size.
2447  * Resulting logical blkno at s_maxbytes must fit in our on-disk
2448  * extent format containers, within a sector_t, and within i_blocks
2449  * in the vfs.  ext4 inode has 48 bits of i_block in fsblock units,
2450  * so that won't be a limiting factor.
2451  *
2452  * However there is other limiting factor. We do store extents in the form
2453  * of starting block and length, hence the resulting length of the extent
2454  * covering maximum file size must fit into on-disk format containers as
2455  * well. Given that length is always by 1 unit bigger than max unit (because
2456  * we count 0 as well) we have to lower the s_maxbytes by one fs block.
2457  *
2458  * Note, this does *not* consider any metadata overhead for vfs i_blocks.
2459  */
2460 static loff_t ext4_max_size(int blkbits, int has_huge_files)
2461 {
2462         loff_t res;
2463         loff_t upper_limit = MAX_LFS_FILESIZE;
2464
2465         /* small i_blocks in vfs inode? */
2466         if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
2467                 /*
2468                  * CONFIG_LBDAF is not enabled implies the inode
2469                  * i_block represent total blocks in 512 bytes
2470                  * 32 == size of vfs inode i_blocks * 8
2471                  */
2472                 upper_limit = (1LL << 32) - 1;
2473
2474                 /* total blocks in file system block size */
2475                 upper_limit >>= (blkbits - 9);
2476                 upper_limit <<= blkbits;
2477         }
2478
2479         /*
2480          * 32-bit extent-start container, ee_block. We lower the maxbytes
2481          * by one fs block, so ee_len can cover the extent of maximum file
2482          * size
2483          */
2484         res = (1LL << 32) - 1;
2485         res <<= blkbits;
2486
2487         /* Sanity check against vm- & vfs- imposed limits */
2488         if (res > upper_limit)
2489                 res = upper_limit;
2490
2491         return res;
2492 }
2493
2494 /*
2495  * Maximal bitmap file size.  There is a direct, and {,double-,triple-}indirect
2496  * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks.
2497  * We need to be 1 filesystem block less than the 2^48 sector limit.
2498  */
2499 static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
2500 {
2501         loff_t res = EXT4_NDIR_BLOCKS;
2502         int meta_blocks;
2503         loff_t upper_limit;
2504         /* This is calculated to be the largest file size for a dense, block
2505          * mapped file such that the file's total number of 512-byte sectors,
2506          * including data and all indirect blocks, does not exceed (2^48 - 1).
2507          *
2508          * __u32 i_blocks_lo and _u16 i_blocks_high represent the total
2509          * number of 512-byte sectors of the file.
2510          */
2511
2512         if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
2513                 /*
2514                  * !has_huge_files or CONFIG_LBDAF not enabled implies that
2515                  * the inode i_block field represents total file blocks in
2516                  * 2^32 512-byte sectors == size of vfs inode i_blocks * 8
2517                  */
2518                 upper_limit = (1LL << 32) - 1;
2519
2520                 /* total blocks in file system block size */
2521                 upper_limit >>= (bits - 9);
2522
2523         } else {
2524                 /*
2525                  * We use 48 bit ext4_inode i_blocks
2526                  * With EXT4_HUGE_FILE_FL set the i_blocks
2527                  * represent total number of blocks in
2528                  * file system block size
2529                  */
2530                 upper_limit = (1LL << 48) - 1;
2531
2532         }
2533
2534         /* indirect blocks */
2535         meta_blocks = 1;
2536         /* double indirect blocks */
2537         meta_blocks += 1 + (1LL << (bits-2));
2538         /* tripple indirect blocks */
2539         meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2)));
2540
2541         upper_limit -= meta_blocks;
2542         upper_limit <<= bits;
2543
2544         res += 1LL << (bits-2);
2545         res += 1LL << (2*(bits-2));
2546         res += 1LL << (3*(bits-2));
2547         res <<= bits;
2548         if (res > upper_limit)
2549                 res = upper_limit;
2550
2551         if (res > MAX_LFS_FILESIZE)
2552                 res = MAX_LFS_FILESIZE;
2553
2554         return res;
2555 }
2556
2557 static ext4_fsblk_t descriptor_loc(struct super_block *sb,
2558                                    ext4_fsblk_t logical_sb_block, int nr)
2559 {
2560         struct ext4_sb_info *sbi = EXT4_SB(sb);
2561         ext4_group_t bg, first_meta_bg;
2562         int has_super = 0;
2563
2564         first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
2565
2566         if (!ext4_has_feature_meta_bg(sb) || nr < first_meta_bg)
2567                 return logical_sb_block + nr + 1;
2568         bg = sbi->s_desc_per_block * nr;
2569         if (ext4_bg_has_super(sb, bg))
2570                 has_super = 1;
2571
2572         /*
2573          * If we have a meta_bg fs with 1k blocks, group 0's GDT is at
2574          * block 2, not 1.  If s_first_data_block == 0 (bigalloc is enabled
2575          * on modern mke2fs or blksize > 1k on older mke2fs) then we must
2576          * compensate.
2577          */
2578         if (sb->s_blocksize == 1024 && nr == 0 &&
2579             le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) == 0)
2580                 has_super++;
2581
2582         return (has_super + ext4_group_first_block_no(sb, bg));
2583 }
2584
2585 /**
2586  * ext4_get_stripe_size: Get the stripe size.
2587  * @sbi: In memory super block info
2588  *
2589  * If we have specified it via mount option, then
2590  * use the mount option value. If the value specified at mount time is
2591  * greater than the blocks per group use the super block value.
2592  * If the super block value is greater than blocks per group return 0.
2593  * Allocator needs it be less than blocks per group.
2594  *
2595  */
2596 static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
2597 {
2598         unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride);
2599         unsigned long stripe_width =
2600                         le32_to_cpu(sbi->s_es->s_raid_stripe_width);
2601         int ret;
2602
2603         if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group)
2604                 ret = sbi->s_stripe;
2605         else if (stripe_width <= sbi->s_blocks_per_group)
2606                 ret = stripe_width;
2607         else if (stride <= sbi->s_blocks_per_group)
2608                 ret = stride;
2609         else
2610                 ret = 0;
2611
2612         /*
2613          * If the stripe width is 1, this makes no sense and
2614          * we set it to 0 to turn off stripe handling code.
2615          */
2616         if (ret <= 1)
2617                 ret = 0;
2618
2619         return ret;
2620 }
2621
2622 /*
2623  * Check whether this filesystem can be mounted based on
2624  * the features present and the RDONLY/RDWR mount requested.
2625  * Returns 1 if this filesystem can be mounted as requested,
2626  * 0 if it cannot be.
2627  */
2628 static int ext4_feature_set_ok(struct super_block *sb, int readonly)
2629 {
2630         if (ext4_has_unknown_ext4_incompat_features(sb)) {
2631                 ext4_msg(sb, KERN_ERR,
2632                         "Couldn't mount because of "
2633                         "unsupported optional features (%x)",
2634                         (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) &
2635                         ~EXT4_FEATURE_INCOMPAT_SUPP));
2636                 return 0;
2637         }
2638
2639         if (readonly)
2640                 return 1;
2641
2642         if (ext4_has_feature_readonly(sb)) {
2643                 ext4_msg(sb, KERN_INFO, "filesystem is read-only");
2644                 sb->s_flags |= MS_RDONLY;
2645                 return 1;
2646         }
2647
2648         /* Check that feature set is OK for a read-write mount */
2649         if (ext4_has_unknown_ext4_ro_compat_features(sb)) {
2650                 ext4_msg(sb, KERN_ERR, "couldn't mount RDWR because of "
2651                          "unsupported optional features (%x)",
2652                          (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) &
2653                                 ~EXT4_FEATURE_RO_COMPAT_SUPP));
2654                 return 0;
2655         }
2656         /*
2657          * Large file size enabled file system can only be mounted
2658          * read-write on 32-bit systems if kernel is built with CONFIG_LBDAF
2659          */
2660         if (ext4_has_feature_huge_file(sb)) {
2661                 if (sizeof(blkcnt_t) < sizeof(u64)) {
2662                         ext4_msg(sb, KERN_ERR, "Filesystem with huge files "
2663                                  "cannot be mounted RDWR without "
2664                                  "CONFIG_LBDAF");
2665                         return 0;
2666                 }
2667         }
2668         if (ext4_has_feature_bigalloc(sb) && !ext4_has_feature_extents(sb)) {
2669                 ext4_msg(sb, KERN_ERR,
2670                          "Can't support bigalloc feature without "
2671                          "extents feature\n");
2672                 return 0;
2673         }
2674
2675 #ifndef CONFIG_QUOTA
2676         if (ext4_has_feature_quota(sb) && !readonly) {
2677                 ext4_msg(sb, KERN_ERR,
2678                          "Filesystem with quota feature cannot be mounted RDWR "
2679                          "without CONFIG_QUOTA");
2680                 return 0;
2681         }
2682         if (ext4_has_feature_project(sb) && !readonly) {
2683                 ext4_msg(sb, KERN_ERR,
2684                          "Filesystem with project quota feature cannot be mounted RDWR "
2685                          "without CONFIG_QUOTA");
2686                 return 0;
2687         }
2688 #endif  /* CONFIG_QUOTA */
2689         return 1;
2690 }
2691
2692 /*
2693  * This function is called once a day if we have errors logged
2694  * on the file system
2695  */
2696 static void print_daily_error_info(unsigned long arg)
2697 {
2698         struct super_block *sb = (struct super_block *) arg;
2699         struct ext4_sb_info *sbi;
2700         struct ext4_super_block *es;
2701
2702         sbi = EXT4_SB(sb);
2703         es = sbi->s_es;
2704
2705         if (es->s_error_count)
2706                 /* fsck newer than v1.41.13 is needed to clean this condition. */
2707                 ext4_msg(sb, KERN_NOTICE, "error count since last fsck: %u",
2708                          le32_to_cpu(es->s_error_count));
2709         if (es->s_first_error_time) {
2710                 printk(KERN_NOTICE "EXT4-fs (%s): initial error at time %u: %.*s:%d",
2711                        sb->s_id, le32_to_cpu(es->s_first_error_time),
2712                        (int) sizeof(es->s_first_error_func),
2713                        es->s_first_error_func,
2714                        le32_to_cpu(es->s_first_error_line));
2715                 if (es->s_first_error_ino)
2716                         printk(": inode %u",
2717                                le32_to_cpu(es->s_first_error_ino));
2718                 if (es->s_first_error_block)
2719                         printk(": block %llu", (unsigned long long)
2720                                le64_to_cpu(es->s_first_error_block));
2721                 printk("\n");
2722         }
2723         if (es->s_last_error_time) {
2724                 printk(KERN_NOTICE "EXT4-fs (%s): last error at time %u: %.*s:%d",
2725                        sb->s_id, le32_to_cpu(es->s_last_error_time),
2726                        (int) sizeof(es->s_last_error_func),
2727                        es->s_last_error_func,
2728                        le32_to_cpu(es->s_last_error_line));
2729                 if (es->s_last_error_ino)
2730                         printk(": inode %u",
2731                                le32_to_cpu(es->s_last_error_ino));
2732                 if (es->s_last_error_block)
2733                         printk(": block %llu", (unsigned long long)
2734                                le64_to_cpu(es->s_last_error_block));
2735                 printk("\n");
2736         }
2737         mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ);  /* Once a day */
2738 }
2739
2740 /* Find next suitable group and run ext4_init_inode_table */
2741 static int ext4_run_li_request(struct ext4_li_request *elr)
2742 {
2743         struct ext4_group_desc *gdp = NULL;
2744         ext4_group_t group, ngroups;
2745         struct super_block *sb;
2746         unsigned long timeout = 0;
2747         int ret = 0;
2748
2749         sb = elr->lr_super;
2750         ngroups = EXT4_SB(sb)->s_groups_count;
2751
2752         for (group = elr->lr_next_group; group < ngroups; group++) {
2753                 gdp = ext4_get_group_desc(sb, group, NULL);
2754                 if (!gdp) {
2755                         ret = 1;
2756                         break;
2757                 }
2758
2759                 if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
2760                         break;
2761         }
2762
2763         if (group >= ngroups)
2764                 ret = 1;
2765
2766         if (!ret) {
2767                 timeout = jiffies;
2768                 ret = ext4_init_inode_table(sb, group,
2769                                             elr->lr_timeout ? 0 : 1);
2770                 if (elr->lr_timeout == 0) {
2771                         timeout = (jiffies - timeout) *
2772                                   elr->lr_sbi->s_li_wait_mult;
2773                         elr->lr_timeout = timeout;
2774                 }
2775                 elr->lr_next_sched = jiffies + elr->lr_timeout;
2776                 elr->lr_next_group = group + 1;
2777         }
2778         return ret;
2779 }
2780
2781 /*
2782  * Remove lr_request from the list_request and free the
2783  * request structure. Should be called with li_list_mtx held
2784  */
2785 static void ext4_remove_li_request(struct ext4_li_request *elr)
2786 {
2787         struct ext4_sb_info *sbi;
2788
2789         if (!elr)
2790                 return;
2791
2792         sbi = elr->lr_sbi;
2793
2794         list_del(&elr->lr_request);
2795         sbi->s_li_request = NULL;
2796         kfree(elr);
2797 }
2798
2799 static void ext4_unregister_li_request(struct super_block *sb)
2800 {
2801         mutex_lock(&ext4_li_mtx);
2802         if (!ext4_li_info) {
2803                 mutex_unlock(&ext4_li_mtx);
2804                 return;
2805         }
2806
2807         mutex_lock(&ext4_li_info->li_list_mtx);
2808         ext4_remove_li_request(EXT4_SB(sb)->s_li_request);
2809         mutex_unlock(&ext4_li_info->li_list_mtx);
2810         mutex_unlock(&ext4_li_mtx);
2811 }
2812
2813 static struct task_struct *ext4_lazyinit_task;
2814
2815 /*
2816  * This is the function where ext4lazyinit thread lives. It walks
2817  * through the request list searching for next scheduled filesystem.
2818  * When such a fs is found, run the lazy initialization request
2819  * (ext4_rn_li_request) and keep track of the time spend in this
2820  * function. Based on that time we compute next schedule time of
2821  * the request. When walking through the list is complete, compute
2822  * next waking time and put itself into sleep.
2823  */
2824 static int ext4_lazyinit_thread(void *arg)
2825 {
2826         struct ext4_lazy_init *eli = (struct ext4_lazy_init *)arg;
2827         struct list_head *pos, *n;
2828         struct ext4_li_request *elr;
2829         unsigned long next_wakeup, cur;
2830
2831         BUG_ON(NULL == eli);
2832
2833 cont_thread:
2834         while (true) {
2835                 next_wakeup = MAX_JIFFY_OFFSET;
2836
2837                 mutex_lock(&eli->li_list_mtx);
2838                 if (list_empty(&eli->li_request_list)) {
2839                         mutex_unlock(&eli->li_list_mtx);
2840                         goto exit_thread;
2841                 }
2842                 list_for_each_safe(pos, n, &eli->li_request_list) {
2843                         int err = 0;
2844                         int progress = 0;
2845                         elr = list_entry(pos, struct ext4_li_request,
2846                                          lr_request);
2847
2848                         if (time_before(jiffies, elr->lr_next_sched)) {
2849                                 if (time_before(elr->lr_next_sched, next_wakeup))
2850                                         next_wakeup = elr->lr_next_sched;
2851                                 continue;
2852                         }
2853                         if (down_read_trylock(&elr->lr_super->s_umount)) {
2854                                 if (sb_start_write_trylock(elr->lr_super)) {
2855                                         progress = 1;
2856                                         /*
2857                                          * We hold sb->s_umount, sb can not
2858                                          * be removed from the list, it is
2859                                          * now safe to drop li_list_mtx
2860                                          */
2861                                         mutex_unlock(&eli->li_list_mtx);
2862                                         err = ext4_run_li_request(elr);
2863                                         sb_end_write(elr->lr_super);
2864                                         mutex_lock(&eli->li_list_mtx);
2865                                         n = pos->next;
2866                                 }
2867                                 up_read((&elr->lr_super->s_umount));
2868                         }
2869                         /* error, remove the lazy_init job */
2870                         if (err) {
2871                                 ext4_remove_li_request(elr);
2872                                 continue;
2873                         }
2874                         if (!progress) {
2875                                 elr->lr_next_sched = jiffies +
2876                                         (prandom_u32()
2877                                          % (EXT4_DEF_LI_MAX_START_DELAY * HZ));
2878                                 if (time_before(elr->lr_next_sched,
2879                                                 next_wakeup))
2880                                         next_wakeup = elr->lr_next_sched;
2881                         }
2882                 }
2883                 mutex_unlock(&eli->li_list_mtx);
2884
2885                 try_to_freeze();
2886
2887                 cur = jiffies;
2888                 if ((time_after_eq(cur, next_wakeup)) ||
2889                     (MAX_JIFFY_OFFSET == next_wakeup)) {
2890                         cond_resched();
2891                         continue;
2892                 }
2893
2894                 schedule_timeout_interruptible(next_wakeup - cur);
2895
2896                 if (kthread_should_stop()) {
2897                         ext4_clear_request_list();
2898                         goto exit_thread;
2899                 }
2900         }
2901
2902 exit_thread:
2903         /*
2904          * It looks like the request list is empty, but we need
2905          * to check it under the li_list_mtx lock, to prevent any
2906          * additions into it, and of course we should lock ext4_li_mtx
2907          * to atomically free the list and ext4_li_info, because at
2908          * this point another ext4 filesystem could be registering
2909          * new one.
2910          */
2911         mutex_lock(&ext4_li_mtx);
2912         mutex_lock(&eli->li_list_mtx);
2913         if (!list_empty(&eli->li_request_list)) {
2914                 mutex_unlock(&eli->li_list_mtx);
2915                 mutex_unlock(&ext4_li_mtx);
2916                 goto cont_thread;
2917         }
2918         mutex_unlock(&eli->li_list_mtx);
2919         kfree(ext4_li_info);
2920         ext4_li_info = NULL;
2921         mutex_unlock(&ext4_li_mtx);
2922
2923         return 0;
2924 }
2925
2926 static void ext4_clear_request_list(void)
2927 {
2928         struct list_head *pos, *n;
2929         struct ext4_li_request *elr;
2930
2931         mutex_lock(&ext4_li_info->li_list_mtx);
2932         list_for_each_safe(pos, n, &ext4_li_info->li_request_list) {
2933                 elr = list_entry(pos, struct ext4_li_request,
2934                                  lr_request);
2935                 ext4_remove_li_request(elr);
2936         }
2937         mutex_unlock(&ext4_li_info->li_list_mtx);
2938 }
2939
2940 static int ext4_run_lazyinit_thread(void)
2941 {
2942         ext4_lazyinit_task = kthread_run(ext4_lazyinit_thread,
2943                                          ext4_li_info, "ext4lazyinit");
2944         if (IS_ERR(ext4_lazyinit_task)) {
2945                 int err = PTR_ERR(ext4_lazyinit_task);
2946                 ext4_clear_request_list();
2947                 kfree(ext4_li_info);
2948                 ext4_li_info = NULL;
2949                 printk(KERN_CRIT "EXT4-fs: error %d creating inode table "
2950                                  "initialization thread\n",
2951                                  err);
2952                 return err;
2953         }
2954         ext4_li_info->li_state |= EXT4_LAZYINIT_RUNNING;
2955         return 0;
2956 }
2957
2958 /*
2959  * Check whether it make sense to run itable init. thread or not.
2960  * If there is at least one uninitialized inode table, return
2961  * corresponding group number, else the loop goes through all
2962  * groups and return total number of groups.
2963  */
2964 static ext4_group_t ext4_has_uninit_itable(struct super_block *sb)
2965 {
2966         ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count;
2967         struct ext4_group_desc *gdp = NULL;
2968
2969         for (group = 0; group < ngroups; group++) {
2970                 gdp = ext4_get_group_desc(sb, group, NULL);
2971                 if (!gdp)
2972                         continue;
2973
2974                 if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
2975                         break;
2976         }
2977
2978         return group;
2979 }
2980
2981 static int ext4_li_info_new(void)
2982 {
2983         struct ext4_lazy_init *eli = NULL;
2984
2985         eli = kzalloc(sizeof(*eli), GFP_KERNEL);
2986         if (!eli)
2987                 return -ENOMEM;
2988
2989         INIT_LIST_HEAD(&eli->li_request_list);
2990         mutex_init(&eli->li_list_mtx);
2991
2992         eli->li_state |= EXT4_LAZYINIT_QUIT;
2993
2994         ext4_li_info = eli;
2995
2996         return 0;
2997 }
2998
2999 static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
3000                                             ext4_group_t start)
3001 {
3002         struct ext4_sb_info *sbi = EXT4_SB(sb);
3003         struct ext4_li_request *elr;
3004
3005         elr = kzalloc(sizeof(*elr), GFP_KERNEL);
3006         if (!elr)
3007                 return NULL;
3008
3009         elr->lr_super = sb;
3010         elr->lr_sbi = sbi;
3011         elr->lr_next_group = start;
3012
3013         /*
3014          * Randomize first schedule time of the request to
3015          * spread the inode table initialization requests
3016          * better.
3017          */
3018         elr->lr_next_sched = jiffies + (prandom_u32() %
3019                                 (EXT4_DEF_LI_MAX_START_DELAY * HZ));
3020         return elr;
3021 }
3022
3023 int ext4_register_li_request(struct super_block *sb,
3024                              ext4_group_t first_not_zeroed)
3025 {
3026         struct ext4_sb_info *sbi = EXT4_SB(sb);
3027         struct ext4_li_request *elr = NULL;
3028         ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
3029         int ret = 0;
3030
3031         mutex_lock(&ext4_li_mtx);
3032         if (sbi->s_li_request != NULL) {
3033                 /*
3034                  * Reset timeout so it can be computed again, because
3035                  * s_li_wait_mult might have changed.
3036                  */
3037                 sbi->s_li_request->lr_timeout = 0;
3038                 goto out;
3039         }
3040
3041         if (first_not_zeroed == ngroups ||
3042             (sb->s_flags & MS_RDONLY) ||
3043             !test_opt(sb, INIT_INODE_TABLE))
3044                 goto out;
3045
3046         elr = ext4_li_request_new(sb, first_not_zeroed);
3047         if (!elr) {
3048                 ret = -ENOMEM;
3049                 goto out;
3050         }
3051
3052         if (NULL == ext4_li_info) {
3053                 ret = ext4_li_info_new();
3054                 if (ret)
3055                         goto out;
3056         }
3057
3058         mutex_lock(&ext4_li_info->li_list_mtx);
3059         list_add(&elr->lr_request, &ext4_li_info->li_request_list);
3060         mutex_unlock(&ext4_li_info->li_list_mtx);
3061
3062         sbi->s_li_request = elr;
3063         /*
3064          * set elr to NULL here since it has been inserted to
3065          * the request_list and the removal and free of it is
3066          * handled by ext4_clear_request_list from now on.
3067          */
3068         elr = NULL;
3069
3070         if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) {
3071                 ret = ext4_run_lazyinit_thread();
3072                 if (ret)
3073                         goto out;
3074         }
3075 out:
3076         mutex_unlock(&ext4_li_mtx);
3077         if (ret)
3078                 kfree(elr);
3079         return ret;
3080 }
3081
3082 /*
3083  * We do not need to lock anything since this is called on
3084  * module unload.
3085  */
3086 static void ext4_destroy_lazyinit_thread(void)
3087 {
3088         /*
3089          * If thread exited earlier
3090          * there's nothing to be done.
3091          */
3092         if (!ext4_li_info || !ext4_lazyinit_task)
3093                 return;
3094
3095         kthread_stop(ext4_lazyinit_task);
3096 }
3097
3098 static int set_journal_csum_feature_set(struct super_block *sb)
3099 {
3100         int ret = 1;
3101         int compat, incompat;
3102         struct ext4_sb_info *sbi = EXT4_SB(sb);
3103
3104         if (ext4_has_metadata_csum(sb)) {
3105                 /* journal checksum v3 */
3106                 compat = 0;
3107                 incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3;
3108         } else {
3109                 /* journal checksum v1 */
3110                 compat = JBD2_FEATURE_COMPAT_CHECKSUM;
3111                 incompat = 0;
3112         }
3113
3114         jbd2_journal_clear_features(sbi->s_journal,
3115                         JBD2_FEATURE_COMPAT_CHECKSUM, 0,
3116                         JBD2_FEATURE_INCOMPAT_CSUM_V3 |
3117                         JBD2_FEATURE_INCOMPAT_CSUM_V2);
3118         if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
3119                 ret = jbd2_journal_set_features(sbi->s_journal,
3120                                 compat, 0,
3121                                 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT |
3122                                 incompat);
3123         } else if (test_opt(sb, JOURNAL_CHECKSUM)) {
3124                 ret = jbd2_journal_set_features(sbi->s_journal,
3125                                 compat, 0,
3126                                 incompat);
3127                 jbd2_journal_clear_features(sbi->s_journal, 0, 0,
3128                                 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
3129         } else {
3130                 jbd2_journal_clear_features(sbi->s_journal, 0, 0,
3131                                 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
3132         }
3133
3134         return ret;
3135 }
3136
3137 /*
3138  * Note: calculating the overhead so we can be compatible with
3139  * historical BSD practice is quite difficult in the face of
3140  * clusters/bigalloc.  This is because multiple metadata blocks from
3141  * different block group can end up in the same allocation cluster.
3142  * Calculating the exact overhead in the face of clustered allocation
3143  * requires either O(all block bitmaps) in memory or O(number of block
3144  * groups**2) in time.  We will still calculate the superblock for
3145  * older file systems --- and if we come across with a bigalloc file
3146  * system with zero in s_overhead_clusters the estimate will be close to
3147  * correct especially for very large cluster sizes --- but for newer
3148  * file systems, it's better to calculate this figure once at mkfs
3149  * time, and store it in the superblock.  If the superblock value is
3150  * present (even for non-bigalloc file systems), we will use it.
3151  */
3152 static int count_overhead(struct super_block *sb, ext4_group_t grp,
3153                           char *buf)
3154 {
3155         struct ext4_sb_info     *sbi = EXT4_SB(sb);
3156         struct ext4_group_desc  *gdp;
3157         ext4_fsblk_t            first_block, last_block, b;
3158         ext4_group_t            i, ngroups = ext4_get_groups_count(sb);
3159         int                     s, j, count = 0;
3160
3161         if (!ext4_has_feature_bigalloc(sb))
3162                 return (ext4_bg_has_super(sb, grp) + ext4_bg_num_gdb(sb, grp) +
3163                         sbi->s_itb_per_group + 2);
3164
3165         first_block = le32_to_cpu(sbi->s_es->s_first_data_block) +
3166                 (grp * EXT4_BLOCKS_PER_GROUP(sb));
3167         last_block = first_block + EXT4_BLOCKS_PER_GROUP(sb) - 1;
3168         for (i = 0; i < ngroups; i++) {
3169                 gdp = ext4_get_group_desc(sb, i, NULL);
3170                 b = ext4_block_bitmap(sb, gdp);
3171                 if (b >= first_block && b <= last_block) {
3172                         ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
3173                         count++;
3174                 }
3175                 b = ext4_inode_bitmap(sb, gdp);
3176                 if (b >= first_block && b <= last_block) {
3177                         ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
3178                         count++;
3179                 }
3180                 b = ext4_inode_table(sb, gdp);
3181                 if (b >= first_block && b + sbi->s_itb_per_group <= last_block)
3182                         for (j = 0; j < sbi->s_itb_per_group; j++, b++) {
3183                                 int c = EXT4_B2C(sbi, b - first_block);
3184                                 ext4_set_bit(c, buf);
3185                                 count++;
3186                         }
3187                 if (i != grp)
3188                         continue;
3189                 s = 0;
3190                 if (ext4_bg_has_super(sb, grp)) {
3191                         ext4_set_bit(s++, buf);
3192                         count++;
3193                 }
3194                 for (j = ext4_bg_num_gdb(sb, grp); j > 0; j--) {
3195                         ext4_set_bit(EXT4_B2C(sbi, s++), buf);
3196                         count++;
3197                 }
3198         }
3199         if (!count)
3200                 return 0;
3201         return EXT4_CLUSTERS_PER_GROUP(sb) -
3202                 ext4_count_free(buf, EXT4_CLUSTERS_PER_GROUP(sb) / 8);
3203 }
3204
3205 /*
3206  * Compute the overhead and stash it in sbi->s_overhead
3207  */
3208 int ext4_calculate_overhead(struct super_block *sb)
3209 {
3210         struct ext4_sb_info *sbi = EXT4_SB(sb);
3211         struct ext4_super_block *es = sbi->s_es;
3212         ext4_group_t i, ngroups = ext4_get_groups_count(sb);
3213         ext4_fsblk_t overhead = 0;
3214         char *buf = (char *) get_zeroed_page(GFP_NOFS);
3215
3216         if (!buf)
3217                 return -ENOMEM;
3218
3219         /*
3220          * Compute the overhead (FS structures).  This is constant
3221          * for a given filesystem unless the number of block groups
3222          * changes so we cache the previous value until it does.
3223          */
3224
3225         /*
3226          * All of the blocks before first_data_block are overhead
3227          */
3228         overhead = EXT4_B2C(sbi, le32_to_cpu(es->s_first_data_block));
3229
3230         /*
3231          * Add the overhead found in each block group
3232          */
3233         for (i = 0; i < ngroups; i++) {
3234                 int blks;
3235
3236                 blks = count_overhead(sb, i, buf);
3237                 overhead += blks;
3238                 if (blks)
3239                         memset(buf, 0, PAGE_SIZE);
3240                 cond_resched();
3241         }
3242         /* Add the internal journal blocks as well */
3243         if (sbi->s_journal && !sbi->journal_bdev)
3244                 overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_maxlen);
3245
3246         sbi->s_overhead = overhead;
3247         smp_wmb();
3248         free_page((unsigned long) buf);
3249         return 0;
3250 }
3251
3252 static void ext4_set_resv_clusters(struct super_block *sb)
3253 {
3254         ext4_fsblk_t resv_clusters;
3255         struct ext4_sb_info *sbi = EXT4_SB(sb);
3256
3257         /*
3258          * There's no need to reserve anything when we aren't using extents.
3259          * The space estimates are exact, there are no unwritten extents,
3260          * hole punching doesn't need new metadata... This is needed especially
3261          * to keep ext2/3 backward compatibility.
3262          */
3263         if (!ext4_has_feature_extents(sb))
3264                 return;
3265         /*
3266          * By default we reserve 2% or 4096 clusters, whichever is smaller.
3267          * This should cover the situations where we can not afford to run
3268          * out of space like for example punch hole, or converting
3269          * unwritten extents in delalloc path. In most cases such
3270          * allocation would require 1, or 2 blocks, higher numbers are
3271          * very rare.
3272          */
3273         resv_clusters = (ext4_blocks_count(sbi->s_es) >>
3274                          sbi->s_cluster_bits);
3275
3276         do_div(resv_clusters, 50);
3277         resv_clusters = min_t(ext4_fsblk_t, resv_clusters, 4096);
3278
3279         atomic64_set(&sbi->s_resv_clusters, resv_clusters);
3280 }
3281
3282 static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3283 {
3284         char *orig_data = kstrdup(data, GFP_KERNEL);
3285         struct buffer_head *bh;
3286         struct ext4_super_block *es = NULL;
3287         struct ext4_sb_info *sbi;
3288         ext4_fsblk_t block;
3289         ext4_fsblk_t sb_block = get_sb_block(&data);
3290         ext4_fsblk_t logical_sb_block;
3291         unsigned long offset = 0;
3292         unsigned long journal_devnum = 0;
3293         unsigned long def_mount_opts;
3294         struct inode *root;
3295         const char *descr;
3296         int ret = -ENOMEM;
3297         int blocksize, clustersize;
3298         unsigned int db_count;
3299         unsigned int i;
3300         int needs_recovery, has_huge_files, has_bigalloc;
3301         __u64 blocks_count;
3302         int err = 0;
3303         unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
3304         ext4_group_t first_not_zeroed;
3305
3306         sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
3307         if (!sbi)
3308                 goto out_free_orig;
3309
3310         sbi->s_blockgroup_lock =
3311                 kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
3312         if (!sbi->s_blockgroup_lock) {
3313                 kfree(sbi);
3314                 goto out_free_orig;
3315         }
3316         sb->s_fs_info = sbi;
3317         sbi->s_sb = sb;
3318         sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
3319         sbi->s_sb_block = sb_block;
3320         if (sb->s_bdev->bd_part)
3321                 sbi->s_sectors_written_start =
3322                         part_stat_read(sb->s_bdev->bd_part, sectors[1]);
3323
3324         /* Cleanup superblock name */
3325         strreplace(sb->s_id, '/', '!');
3326
3327         /* -EINVAL is default */
3328         ret = -EINVAL;
3329         blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
3330         if (!blocksize) {
3331                 ext4_msg(sb, KERN_ERR, "unable to set blocksize");
3332                 goto out_fail;
3333         }
3334
3335         /*
3336          * The ext4 superblock will not be buffer aligned for other than 1kB
3337          * block sizes.  We need to calculate the offset from buffer start.
3338          */
3339         if (blocksize != EXT4_MIN_BLOCK_SIZE) {
3340                 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
3341                 offset = do_div(logical_sb_block, blocksize);
3342         } else {
3343                 logical_sb_block = sb_block;
3344         }
3345
3346         if (!(bh = sb_bread_unmovable(sb, logical_sb_block))) {
3347                 ext4_msg(sb, KERN_ERR, "unable to read superblock");
3348                 goto out_fail;
3349         }
3350         /*
3351          * Note: s_es must be initialized as soon as possible because
3352          *       some ext4 macro-instructions depend on its value
3353          */
3354         es = (struct ext4_super_block *) (bh->b_data + offset);
3355         sbi->s_es = es;
3356         sb->s_magic = le16_to_cpu(es->s_magic);
3357         if (sb->s_magic != EXT4_SUPER_MAGIC)
3358                 goto cantfind_ext4;
3359         sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written);
3360
3361         /* Warn if metadata_csum and gdt_csum are both set. */
3362         if (ext4_has_feature_metadata_csum(sb) &&
3363             ext4_has_feature_gdt_csum(sb))
3364                 ext4_warning(sb, "metadata_csum and uninit_bg are "
3365                              "redundant flags; please run fsck.");
3366
3367         /* Check for a known checksum algorithm */
3368         if (!ext4_verify_csum_type(sb, es)) {
3369                 ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
3370                          "unknown checksum algorithm.");
3371                 silent = 1;
3372                 goto cantfind_ext4;
3373         }
3374
3375         /* Load the checksum driver */
3376         if (ext4_has_feature_metadata_csum(sb)) {
3377                 sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
3378                 if (IS_ERR(sbi->s_chksum_driver)) {
3379                         ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver.");
3380                         ret = PTR_ERR(sbi->s_chksum_driver);
3381                         sbi->s_chksum_driver = NULL;
3382                         goto failed_mount;
3383                 }
3384         }
3385
3386         /* Check superblock checksum */
3387         if (!ext4_superblock_csum_verify(sb, es)) {
3388                 ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
3389                          "invalid superblock checksum.  Run e2fsck?");
3390                 silent = 1;
3391                 ret = -EFSBADCRC;
3392                 goto cantfind_ext4;
3393         }
3394
3395         /* Precompute checksum seed for all metadata */
3396         if (ext4_has_feature_csum_seed(sb))
3397                 sbi->s_csum_seed = le32_to_cpu(es->s_checksum_seed);
3398         else if (ext4_has_metadata_csum(sb))
3399                 sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid,
3400                                                sizeof(es->s_uuid));
3401
3402         /* Set defaults before we parse the mount options */
3403         def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
3404         set_opt(sb, INIT_INODE_TABLE);
3405         if (def_mount_opts & EXT4_DEFM_DEBUG)
3406                 set_opt(sb, DEBUG);
3407         if (def_mount_opts & EXT4_DEFM_BSDGROUPS)
3408                 set_opt(sb, GRPID);
3409         if (def_mount_opts & EXT4_DEFM_UID16)
3410                 set_opt(sb, NO_UID32);
3411         /* xattr user namespace & acls are now defaulted on */
3412         set_opt(sb, XATTR_USER);
3413 #ifdef CONFIG_EXT4_FS_POSIX_ACL
3414         set_opt(sb, POSIX_ACL);
3415 #endif
3416         /* don't forget to enable journal_csum when metadata_csum is enabled. */
3417         if (ext4_has_metadata_csum(sb))
3418                 set_opt(sb, JOURNAL_CHECKSUM);
3419
3420         if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
3421                 set_opt(sb, JOURNAL_DATA);
3422         else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
3423                 set_opt(sb, ORDERED_DATA);
3424         else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK)
3425                 set_opt(sb, WRITEBACK_DATA);
3426
3427         if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC)
3428                 set_opt(sb, ERRORS_PANIC);
3429         else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE)
3430                 set_opt(sb, ERRORS_CONT);
3431         else
3432                 set_opt(sb, ERRORS_RO);
3433         /* block_validity enabled by default; disable with noblock_validity */
3434         set_opt(sb, BLOCK_VALIDITY);
3435         if (def_mount_opts & EXT4_DEFM_DISCARD)
3436                 set_opt(sb, DISCARD);
3437
3438         sbi->s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid));
3439         sbi->s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid));
3440         sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
3441         sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
3442         sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
3443
3444         if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0)
3445                 set_opt(sb, BARRIER);
3446
3447         /*
3448          * enable delayed allocation by default
3449          * Use -o nodelalloc to turn it off
3450          */
3451         if (!IS_EXT3_SB(sb) && !IS_EXT2_SB(sb) &&
3452             ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
3453                 set_opt(sb, DELALLOC);
3454
3455         /*
3456          * set default s_li_wait_mult for lazyinit, for the case there is
3457          * no mount option specified.
3458          */
3459         sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
3460
3461         if (!parse_options((char *) sbi->s_es->s_mount_opts, sb,
3462                            &journal_devnum, &journal_ioprio, 0)) {
3463                 ext4_msg(sb, KERN_WARNING,
3464                          "failed to parse options in superblock: %s",
3465                          sbi->s_es->s_mount_opts);
3466         }
3467         sbi->s_def_mount_opt = sbi->s_mount_opt;
3468         if (!parse_options((char *) data, sb, &journal_devnum,
3469                            &journal_ioprio, 0))
3470                 goto failed_mount;
3471
3472         if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
3473                 printk_once(KERN_WARNING "EXT4-fs: Warning: mounting "
3474                             "with data=journal disables delayed "
3475                             "allocation and O_DIRECT support!\n");
3476                 if (test_opt2(sb, EXPLICIT_DELALLOC)) {
3477                         ext4_msg(sb, KERN_ERR, "can't mount with "
3478                                  "both data=journal and delalloc");
3479                         goto failed_mount;
3480                 }
3481                 if (test_opt(sb, DIOREAD_NOLOCK)) {
3482                         ext4_msg(sb, KERN_ERR, "can't mount with "
3483                                  "both data=journal and dioread_nolock");
3484                         goto failed_mount;
3485                 }
3486                 if (test_opt(sb, DAX)) {
3487                         ext4_msg(sb, KERN_ERR, "can't mount with "
3488                                  "both data=journal and dax");
3489                         goto failed_mount;
3490                 }
3491                 if (test_opt(sb, DELALLOC))
3492                         clear_opt(sb, DELALLOC);
3493         } else {
3494                 sb->s_iflags |= SB_I_CGROUPWB;
3495         }
3496
3497         sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
3498                 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
3499
3500         if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV &&
3501             (ext4_has_compat_features(sb) ||
3502              ext4_has_ro_compat_features(sb) ||
3503              ext4_has_incompat_features(sb)))
3504                 ext4_msg(sb, KERN_WARNING,
3505                        "feature flags set on rev 0 fs, "
3506                        "running e2fsck is recommended");
3507
3508         if (es->s_creator_os == cpu_to_le32(EXT4_OS_HURD)) {
3509                 set_opt2(sb, HURD_COMPAT);
3510                 if (ext4_has_feature_64bit(sb)) {
3511                         ext4_msg(sb, KERN_ERR,
3512                                  "The Hurd can't support 64-bit file systems");
3513                         goto failed_mount;
3514                 }
3515         }
3516
3517         if (IS_EXT2_SB(sb)) {
3518                 if (ext2_feature_set_ok(sb))
3519                         ext4_msg(sb, KERN_INFO, "mounting ext2 file system "
3520                                  "using the ext4 subsystem");
3521                 else {
3522                         ext4_msg(sb, KERN_ERR, "couldn't mount as ext2 due "
3523                                  "to feature incompatibilities");
3524                         goto failed_mount;
3525                 }
3526         }
3527
3528         if (IS_EXT3_SB(sb)) {
3529                 if (ext3_feature_set_ok(sb))
3530                         ext4_msg(sb, KERN_INFO, "mounting ext3 file system "
3531                                  "using the ext4 subsystem");
3532                 else {
3533                         ext4_msg(sb, KERN_ERR, "couldn't mount as ext3 due "
3534                                  "to feature incompatibilities");
3535                         goto failed_mount;
3536                 }
3537         }
3538
3539         /*
3540          * Check feature flags regardless of the revision level, since we
3541          * previously didn't change the revision level when setting the flags,
3542          * so there is a chance incompat flags are set on a rev 0 filesystem.
3543          */
3544         if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY)))
3545                 goto failed_mount;
3546
3547         blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
3548         if (blocksize < EXT4_MIN_BLOCK_SIZE ||
3549             blocksize > EXT4_MAX_BLOCK_SIZE) {
3550                 ext4_msg(sb, KERN_ERR,
3551                        "Unsupported filesystem blocksize %d", blocksize);
3552                 goto failed_mount;
3553         }
3554
3555         if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (blocksize / 4)) {
3556                 ext4_msg(sb, KERN_ERR,
3557                          "Number of reserved GDT blocks insanely large: %d",
3558                          le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks));
3559                 goto failed_mount;
3560         }
3561
3562         if (sbi->s_mount_opt & EXT4_MOUNT_DAX) {
3563                 err = bdev_dax_supported(sb, blocksize);
3564                 if (err)
3565                         goto failed_mount;
3566         }
3567
3568         if (ext4_has_feature_encrypt(sb) && es->s_encryption_level) {
3569                 ext4_msg(sb, KERN_ERR, "Unsupported encryption level %d",
3570                          es->s_encryption_level);
3571                 goto failed_mount;
3572         }
3573
3574         if (sb->s_blocksize != blocksize) {
3575                 /* Validate the filesystem blocksize */
3576                 if (!sb_set_blocksize(sb, blocksize)) {
3577                         ext4_msg(sb, KERN_ERR, "bad block size %d",
3578                                         blocksize);
3579                         goto failed_mount;
3580                 }
3581
3582                 brelse(bh);
3583                 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
3584                 offset = do_div(logical_sb_block, blocksize);
3585                 bh = sb_bread_unmovable(sb, logical_sb_block);
3586                 if (!bh) {
3587                         ext4_msg(sb, KERN_ERR,
3588                                "Can't read superblock on 2nd try");
3589                         goto failed_mount;
3590                 }
3591                 es = (struct ext4_super_block *)(bh->b_data + offset);
3592                 sbi->s_es = es;
3593                 if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
3594                         ext4_msg(sb, KERN_ERR,
3595                                "Magic mismatch, very weird!");
3596                         goto failed_mount;
3597                 }
3598         }
3599
3600         has_huge_files = ext4_has_feature_huge_file(sb);
3601         sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits,
3602                                                       has_huge_files);
3603         sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files);
3604
3605         if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
3606                 sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
3607                 sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO;
3608         } else {
3609                 sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
3610                 sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
3611                 if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
3612                     (!is_power_of_2(sbi->s_inode_size)) ||
3613                     (sbi->s_inode_size > blocksize)) {
3614                         ext4_msg(sb, KERN_ERR,
3615                                "unsupported inode size: %d",
3616                                sbi->s_inode_size);
3617                         goto failed_mount;
3618                 }
3619                 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE)
3620                         sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2);
3621         }
3622
3623         sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
3624         if (ext4_has_feature_64bit(sb)) {
3625                 if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT ||
3626                     sbi->s_desc_size > EXT4_MAX_DESC_SIZE ||
3627                     !is_power_of_2(sbi->s_desc_size)) {
3628                         ext4_msg(sb, KERN_ERR,
3629                                "unsupported descriptor size %lu",
3630                                sbi->s_desc_size);
3631                         goto failed_mount;
3632                 }
3633         } else
3634                 sbi->s_desc_size = EXT4_MIN_DESC_SIZE;
3635
3636         sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
3637         sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
3638         if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0)
3639                 goto cantfind_ext4;
3640
3641         sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb);
3642         if (sbi->s_inodes_per_block == 0)
3643                 goto cantfind_ext4;
3644         sbi->s_itb_per_group = sbi->s_inodes_per_group /
3645                                         sbi->s_inodes_per_block;
3646         sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb);
3647         sbi->s_sbh = bh;
3648         sbi->s_mount_state = le16_to_cpu(es->s_state);
3649         sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
3650         sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
3651
3652         for (i = 0; i < 4; i++)
3653                 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
3654         sbi->s_def_hash_version = es->s_def_hash_version;
3655         if (ext4_has_feature_dir_index(sb)) {
3656                 i = le32_to_cpu(es->s_flags);
3657                 if (i & EXT2_FLAGS_UNSIGNED_HASH)
3658                         sbi->s_hash_unsigned = 3;
3659                 else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
3660 #ifdef __CHAR_UNSIGNED__
3661                         if (!(sb->s_flags & MS_RDONLY))
3662                                 es->s_flags |=
3663                                         cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
3664                         sbi->s_hash_unsigned = 3;
3665 #else
3666                         if (!(sb->s_flags & MS_RDONLY))
3667                                 es->s_flags |=
3668                                         cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
3669 #endif
3670                 }
3671         }
3672
3673         /* Handle clustersize */
3674         clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size);
3675         has_bigalloc = ext4_has_feature_bigalloc(sb);
3676         if (has_bigalloc) {
3677                 if (clustersize < blocksize) {
3678                         ext4_msg(sb, KERN_ERR,
3679                                  "cluster size (%d) smaller than "
3680                                  "block size (%d)", clustersize, blocksize);
3681                         goto failed_mount;
3682                 }
3683                 sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) -
3684                         le32_to_cpu(es->s_log_block_size);
3685                 sbi->s_clusters_per_group =
3686                         le32_to_cpu(es->s_clusters_per_group);
3687                 if (sbi->s_clusters_per_group > blocksize * 8) {
3688                         ext4_msg(sb, KERN_ERR,
3689                                  "#clusters per group too big: %lu",
3690                                  sbi->s_clusters_per_group);
3691                         goto failed_mount;
3692                 }
3693                 if (sbi->s_blocks_per_group !=
3694                     (sbi->s_clusters_per_group * (clustersize / blocksize))) {
3695                         ext4_msg(sb, KERN_ERR, "blocks per group (%lu) and "
3696                                  "clusters per group (%lu) inconsistent",
3697                                  sbi->s_blocks_per_group,
3698                                  sbi->s_clusters_per_group);
3699                         goto failed_mount;
3700                 }
3701         } else {
3702                 if (clustersize != blocksize) {
3703                         ext4_warning(sb, "fragment/cluster size (%d) != "
3704                                      "block size (%d)", clustersize,
3705                                      blocksize);
3706                         clustersize = blocksize;
3707                 }
3708                 if (sbi->s_blocks_per_group > blocksize * 8) {
3709                         ext4_msg(sb, KERN_ERR,
3710                                  "#blocks per group too big: %lu",
3711                                  sbi->s_blocks_per_group);
3712                         goto failed_mount;
3713                 }
3714                 sbi->s_clusters_per_group = sbi->s_blocks_per_group;
3715                 sbi->s_cluster_bits = 0;
3716         }
3717         sbi->s_cluster_ratio = clustersize / blocksize;
3718
3719         if (sbi->s_inodes_per_group > blocksize * 8) {
3720                 ext4_msg(sb, KERN_ERR,
3721                        "#inodes per group too big: %lu",
3722                        sbi->s_inodes_per_group);
3723                 goto failed_mount;
3724         }
3725
3726         /* Do we have standard group size of clustersize * 8 blocks ? */
3727         if (sbi->s_blocks_per_group == clustersize << 3)
3728                 set_opt2(sb, STD_GROUP_SIZE);
3729
3730         /*
3731          * Test whether we have more sectors than will fit in sector_t,
3732          * and whether the max offset is addressable by the page cache.
3733          */
3734         err = generic_check_addressable(sb->s_blocksize_bits,
3735                                         ext4_blocks_count(es));
3736         if (err) {
3737                 ext4_msg(sb, KERN_ERR, "filesystem"
3738                          " too large to mount safely on this system");
3739                 if (sizeof(sector_t) < 8)
3740                         ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled");
3741                 goto failed_mount;
3742         }
3743
3744         if (EXT4_BLOCKS_PER_GROUP(sb) == 0)
3745                 goto cantfind_ext4;
3746
3747         /* check blocks count against device size */
3748         blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits;
3749         if (blocks_count && ext4_blocks_count(es) > blocks_count) {
3750                 ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu "
3751                        "exceeds size of device (%llu blocks)",
3752                        ext4_blocks_count(es), blocks_count);
3753                 goto failed_mount;
3754         }
3755
3756         /*
3757          * It makes no sense for the first data block to be beyond the end
3758          * of the filesystem.
3759          */
3760         if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
3761                 ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
3762                          "block %u is beyond end of filesystem (%llu)",
3763                          le32_to_cpu(es->s_first_data_block),
3764                          ext4_blocks_count(es));
3765                 goto failed_mount;
3766         }
3767         blocks_count = (ext4_blocks_count(es) -
3768                         le32_to_cpu(es->s_first_data_block) +
3769                         EXT4_BLOCKS_PER_GROUP(sb) - 1);
3770         do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb));
3771         if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) {
3772                 ext4_msg(sb, KERN_WARNING, "groups count too large: %u "
3773                        "(block count %llu, first data block %u, "
3774                        "blocks per group %lu)", sbi->s_groups_count,
3775                        ext4_blocks_count(es),
3776                        le32_to_cpu(es->s_first_data_block),
3777                        EXT4_BLOCKS_PER_GROUP(sb));
3778                 goto failed_mount;
3779         }
3780         sbi->s_groups_count = blocks_count;
3781         sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count,
3782                         (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
3783         db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
3784                    EXT4_DESC_PER_BLOCK(sb);
3785         sbi->s_group_desc = ext4_kvmalloc(db_count *
3786                                           sizeof(struct buffer_head *),
3787                                           GFP_KERNEL);
3788         if (sbi->s_group_desc == NULL) {
3789                 ext4_msg(sb, KERN_ERR, "not enough memory");
3790                 ret = -ENOMEM;
3791                 goto failed_mount;
3792         }
3793
3794         bgl_lock_init(sbi->s_blockgroup_lock);
3795
3796         for (i = 0; i < db_count; i++) {
3797                 block = descriptor_loc(sb, logical_sb_block, i);
3798                 sbi->s_group_desc[i] = sb_bread_unmovable(sb, block);
3799                 if (!sbi->s_group_desc[i]) {
3800                         ext4_msg(sb, KERN_ERR,
3801                                "can't read group descriptor %d", i);
3802                         db_count = i;
3803                         goto failed_mount2;
3804                 }
3805         }
3806         if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) {
3807                 ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
3808                 ret = -EFSCORRUPTED;
3809                 goto failed_mount2;
3810         }
3811
3812         sbi->s_gdb_count = db_count;
3813         get_random_bytes(&sbi->s_next_generation, sizeof(u32));
3814         spin_lock_init(&sbi->s_next_gen_lock);
3815
3816         setup_timer(&sbi->s_err_report, print_daily_error_info,
3817                 (unsigned long) sb);
3818
3819         /* Register extent status tree shrinker */
3820         if (ext4_es_register_shrinker(sbi))
3821                 goto failed_mount3;
3822
3823         sbi->s_stripe = ext4_get_stripe_size(sbi);
3824         sbi->s_extent_max_zeroout_kb = 32;
3825
3826         /*
3827          * set up enough so that it can read an inode
3828          */
3829         sb->s_op = &ext4_sops;
3830         sb->s_export_op = &ext4_export_ops;
3831         sb->s_xattr = ext4_xattr_handlers;
3832         sb->s_cop = &ext4_cryptops;
3833 #ifdef CONFIG_QUOTA
3834         sb->dq_op = &ext4_quota_operations;
3835         if (ext4_has_feature_quota(sb))
3836                 sb->s_qcop = &dquot_quotactl_sysfile_ops;
3837         else
3838                 sb->s_qcop = &ext4_qctl_operations;
3839         sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
3840 #endif
3841         memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
3842
3843         INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
3844         mutex_init(&sbi->s_orphan_lock);
3845
3846         sb->s_root = NULL;
3847
3848         needs_recovery = (es->s_last_orphan != 0 ||
3849                           ext4_has_feature_journal_needs_recovery(sb));
3850
3851         if (ext4_has_feature_mmp(sb) && !(sb->s_flags & MS_RDONLY))
3852                 if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block)))
3853                         goto failed_mount3a;
3854
3855         /*
3856          * The first inode we look at is the journal inode.  Don't try
3857          * root first: it may be modified in the journal!
3858          */
3859         if (!test_opt(sb, NOLOAD) && ext4_has_feature_journal(sb)) {
3860                 if (ext4_load_journal(sb, es, journal_devnum))
3861                         goto failed_mount3a;
3862         } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) &&
3863                    ext4_has_feature_journal_needs_recovery(sb)) {
3864                 ext4_msg(sb, KERN_ERR, "required journal recovery "
3865                        "suppressed and not mounted read-only");
3866                 goto failed_mount_wq;
3867         } else {
3868                 /* Nojournal mode, all journal mount options are illegal */
3869                 if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM)) {
3870                         ext4_msg(sb, KERN_ERR, "can't mount with "
3871                                  "journal_checksum, fs mounted w/o journal");
3872                         goto failed_mount_wq;
3873                 }
3874                 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
3875                         ext4_msg(sb, KERN_ERR, "can't mount with "
3876                                  "journal_async_commit, fs mounted w/o journal");
3877                         goto failed_mount_wq;
3878                 }
3879                 if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) {
3880                         ext4_msg(sb, KERN_ERR, "can't mount with "
3881                                  "commit=%lu, fs mounted w/o journal",
3882                                  sbi->s_commit_interval / HZ);
3883                         goto failed_mount_wq;
3884                 }
3885                 if (EXT4_MOUNT_DATA_FLAGS &
3886                     (sbi->s_mount_opt ^ sbi->s_def_mount_opt)) {
3887                         ext4_msg(sb, KERN_ERR, "can't mount with "
3888                                  "data=, fs mounted w/o journal");
3889                         goto failed_mount_wq;
3890                 }
3891                 sbi->s_def_mount_opt &= EXT4_MOUNT_JOURNAL_CHECKSUM;
3892                 clear_opt(sb, JOURNAL_CHECKSUM);
3893                 clear_opt(sb, DATA_FLAGS);
3894                 sbi->s_journal = NULL;
3895                 needs_recovery = 0;
3896                 goto no_journal;
3897         }
3898
3899         if (ext4_has_feature_64bit(sb) &&
3900             !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
3901                                        JBD2_FEATURE_INCOMPAT_64BIT)) {
3902                 ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature");
3903                 goto failed_mount_wq;
3904         }
3905
3906         if (!set_journal_csum_feature_set(sb)) {
3907                 ext4_msg(sb, KERN_ERR, "Failed to set journal checksum "
3908                          "feature set");
3909                 goto failed_mount_wq;
3910         }
3911
3912         /* We have now updated the journal if required, so we can
3913          * validate the data journaling mode. */
3914         switch (test_opt(sb, DATA_FLAGS)) {
3915         case 0:
3916                 /* No mode set, assume a default based on the journal
3917                  * capabilities: ORDERED_DATA if the journal can
3918                  * cope, else JOURNAL_DATA
3919                  */
3920                 if (jbd2_journal_check_available_features
3921                     (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE))
3922                         set_opt(sb, ORDERED_DATA);
3923                 else
3924                         set_opt(sb, JOURNAL_DATA);
3925                 break;
3926
3927         case EXT4_MOUNT_ORDERED_DATA:
3928         case EXT4_MOUNT_WRITEBACK_DATA:
3929                 if (!jbd2_journal_check_available_features
3930                     (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
3931                         ext4_msg(sb, KERN_ERR, "Journal does not support "
3932                                "requested data journaling mode");
3933                         goto failed_mount_wq;
3934                 }
3935         default:
3936                 break;
3937         }
3938         set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
3939
3940         sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
3941
3942 no_journal:
3943         sbi->s_mb_cache = ext4_xattr_create_cache();
3944         if (!sbi->s_mb_cache) {
3945                 ext4_msg(sb, KERN_ERR, "Failed to create an mb_cache");
3946                 goto failed_mount_wq;
3947         }
3948
3949         if ((DUMMY_ENCRYPTION_ENABLED(sbi) || ext4_has_feature_encrypt(sb)) &&
3950             (blocksize != PAGE_SIZE)) {
3951                 ext4_msg(sb, KERN_ERR,
3952                          "Unsupported blocksize for fs encryption");
3953                 goto failed_mount_wq;
3954         }
3955
3956         if (DUMMY_ENCRYPTION_ENABLED(sbi) && !(sb->s_flags & MS_RDONLY) &&
3957             !ext4_has_feature_encrypt(sb)) {
3958                 ext4_set_feature_encrypt(sb);
3959                 ext4_commit_super(sb, 1);
3960         }
3961
3962         /*
3963          * Get the # of file system overhead blocks from the
3964          * superblock if present.
3965          */
3966         if (es->s_overhead_clusters)
3967                 sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters);
3968         else {
3969                 err = ext4_calculate_overhead(sb);
3970                 if (err)
3971                         goto failed_mount_wq;
3972         }
3973
3974         /*
3975          * The maximum number of concurrent works can be high and
3976          * concurrency isn't really necessary.  Limit it to 1.
3977          */
3978         EXT4_SB(sb)->rsv_conversion_wq =
3979                 alloc_workqueue("ext4-rsv-conversion", WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
3980         if (!EXT4_SB(sb)->rsv_conversion_wq) {
3981                 printk(KERN_ERR "EXT4-fs: failed to create workqueue\n");
3982                 ret = -ENOMEM;
3983                 goto failed_mount4;
3984         }
3985
3986         /*
3987          * The jbd2_journal_load will have done any necessary log recovery,
3988          * so we can safely mount the rest of the filesystem now.
3989          */
3990
3991         root = ext4_iget(sb, EXT4_ROOT_INO);
3992         if (IS_ERR(root)) {
3993                 ext4_msg(sb, KERN_ERR, "get root inode failed");
3994                 ret = PTR_ERR(root);
3995                 root = NULL;
3996                 goto failed_mount4;
3997         }
3998         if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
3999                 ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck");
4000                 iput(root);
4001                 goto failed_mount4;
4002         }
4003         sb->s_root = d_make_root(root);
4004         if (!sb->s_root) {
4005                 ext4_msg(sb, KERN_ERR, "get root dentry failed");
4006                 ret = -ENOMEM;
4007                 goto failed_mount4;
4008         }
4009
4010         if (ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY))
4011                 sb->s_flags |= MS_RDONLY;
4012
4013         /* determine the minimum size of new large inodes, if present */
4014         if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
4015                 sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
4016                                                      EXT4_GOOD_OLD_INODE_SIZE;
4017                 if (ext4_has_feature_extra_isize(sb)) {
4018                         if (sbi->s_want_extra_isize <
4019                             le16_to_cpu(es->s_want_extra_isize))
4020                                 sbi->s_want_extra_isize =
4021                                         le16_to_cpu(es->s_want_extra_isize);
4022                         if (sbi->s_want_extra_isize <
4023                             le16_to_cpu(es->s_min_extra_isize))
4024                                 sbi->s_want_extra_isize =
4025                                         le16_to_cpu(es->s_min_extra_isize);
4026                 }
4027         }
4028         /* Check if enough inode space is available */
4029         if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize >
4030                                                         sbi->s_inode_size) {
4031                 sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
4032                                                        EXT4_GOOD_OLD_INODE_SIZE;
4033                 ext4_msg(sb, KERN_INFO, "required extra inode space not"
4034                          "available");
4035         }
4036
4037         ext4_set_resv_clusters(sb);
4038
4039         err = ext4_setup_system_zone(sb);
4040         if (err) {
4041                 ext4_msg(sb, KERN_ERR, "failed to initialize system "
4042                          "zone (%d)", err);
4043                 goto failed_mount4a;
4044         }
4045
4046         ext4_ext_init(sb);
4047         err = ext4_mb_init(sb);
4048         if (err) {
4049                 ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
4050                          err);
4051                 goto failed_mount5;
4052         }
4053
4054         block = ext4_count_free_clusters(sb);
4055         ext4_free_blocks_count_set(sbi->s_es,
4056                                    EXT4_C2B(sbi, block));
4057         err = percpu_counter_init(&sbi->s_freeclusters_counter, block,
4058                                   GFP_KERNEL);
4059         if (!err) {
4060                 unsigned long freei = ext4_count_free_inodes(sb);
4061                 sbi->s_es->s_free_inodes_count = cpu_to_le32(freei);
4062                 err = percpu_counter_init(&sbi->s_freeinodes_counter, freei,
4063                                           GFP_KERNEL);
4064         }
4065         if (!err)
4066                 err = percpu_counter_init(&sbi->s_dirs_counter,
4067                                           ext4_count_dirs(sb), GFP_KERNEL);
4068         if (!err)
4069                 err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0,
4070                                           GFP_KERNEL);
4071         if (!err)
4072                 err = percpu_init_rwsem(&sbi->s_journal_flag_rwsem);
4073
4074         if (err) {
4075                 ext4_msg(sb, KERN_ERR, "insufficient memory");
4076                 goto failed_mount6;
4077         }
4078
4079         if (ext4_has_feature_flex_bg(sb))
4080                 if (!ext4_fill_flex_info(sb)) {
4081                         ext4_msg(sb, KERN_ERR,
4082                                "unable to initialize "
4083                                "flex_bg meta info!");
4084                         goto failed_mount6;
4085                 }
4086
4087         err = ext4_register_li_request(sb, first_not_zeroed);
4088         if (err)
4089                 goto failed_mount6;
4090
4091         err = ext4_register_sysfs(sb);
4092         if (err)
4093                 goto failed_mount7;
4094
4095 #ifdef CONFIG_QUOTA
4096         /* Enable quota usage during mount. */
4097         if (ext4_has_feature_quota(sb) && !(sb->s_flags & MS_RDONLY)) {
4098                 err = ext4_enable_quotas(sb);
4099                 if (err)
4100                         goto failed_mount8;
4101         }
4102 #endif  /* CONFIG_QUOTA */
4103
4104         EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
4105         ext4_orphan_cleanup(sb, es);
4106         EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
4107         if (needs_recovery) {
4108                 ext4_msg(sb, KERN_INFO, "recovery complete");
4109                 ext4_mark_recovery_complete(sb, es);
4110         }
4111         if (EXT4_SB(sb)->s_journal) {
4112                 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
4113                         descr = " journalled data mode";
4114                 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
4115                         descr = " ordered data mode";
4116                 else
4117                         descr = " writeback data mode";
4118         } else
4119                 descr = "out journal";
4120
4121         if (test_opt(sb, DISCARD)) {
4122                 struct request_queue *q = bdev_get_queue(sb->s_bdev);
4123                 if (!blk_queue_discard(q))
4124                         ext4_msg(sb, KERN_WARNING,
4125                                  "mounting with \"discard\" option, but "
4126                                  "the device does not support discard");
4127         }
4128
4129         if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs mount"))
4130                 ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
4131                          "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts,
4132                          *sbi->s_es->s_mount_opts ? "; " : "", orig_data);
4133
4134         if (es->s_error_count)
4135                 mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
4136
4137         /* Enable message ratelimiting. Default is 10 messages per 5 secs. */
4138         ratelimit_state_init(&sbi->s_err_ratelimit_state, 5 * HZ, 10);
4139         ratelimit_state_init(&sbi->s_warning_ratelimit_state, 5 * HZ, 10);
4140         ratelimit_state_init(&sbi->s_msg_ratelimit_state, 5 * HZ, 10);
4141
4142         kfree(orig_data);
4143 #ifdef CONFIG_EXT4_FS_ENCRYPTION
4144         memcpy(sbi->key_prefix, EXT4_KEY_DESC_PREFIX,
4145                                 EXT4_KEY_DESC_PREFIX_SIZE);
4146         sbi->key_prefix_size = EXT4_KEY_DESC_PREFIX_SIZE;
4147 #endif
4148         return 0;
4149
4150 cantfind_ext4:
4151         if (!silent)
4152                 ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
4153         goto failed_mount;
4154
4155 #ifdef CONFIG_QUOTA
4156 failed_mount8:
4157         ext4_unregister_sysfs(sb);
4158 #endif
4159 failed_mount7:
4160         ext4_unregister_li_request(sb);
4161 failed_mount6:
4162         ext4_mb_release(sb);
4163         if (sbi->s_flex_groups)
4164                 kvfree(sbi->s_flex_groups);
4165         percpu_counter_destroy(&sbi->s_freeclusters_counter);
4166         percpu_counter_destroy(&sbi->s_freeinodes_counter);
4167         percpu_counter_destroy(&sbi->s_dirs_counter);
4168         percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
4169 failed_mount5:
4170         ext4_ext_release(sb);
4171         ext4_release_system_zone(sb);
4172 failed_mount4a:
4173         dput(sb->s_root);
4174         sb->s_root = NULL;
4175 failed_mount4:
4176         ext4_msg(sb, KERN_ERR, "mount failed");
4177         if (EXT4_SB(sb)->rsv_conversion_wq)
4178                 destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
4179 failed_mount_wq:
4180         if (sbi->s_mb_cache) {
4181                 ext4_xattr_destroy_cache(sbi->s_mb_cache);
4182                 sbi->s_mb_cache = NULL;
4183         }
4184         if (sbi->s_journal) {
4185                 jbd2_journal_destroy(sbi->s_journal);
4186                 sbi->s_journal = NULL;
4187         }
4188 failed_mount3a:
4189         ext4_es_unregister_shrinker(sbi);
4190 failed_mount3:
4191         del_timer_sync(&sbi->s_err_report);
4192         if (sbi->s_mmp_tsk)
4193                 kthread_stop(sbi->s_mmp_tsk);
4194 failed_mount2:
4195         for (i = 0; i < db_count; i++)
4196                 brelse(sbi->s_group_desc[i]);
4197         kvfree(sbi->s_group_desc);
4198 failed_mount:
4199         if (sbi->s_chksum_driver)
4200                 crypto_free_shash(sbi->s_chksum_driver);
4201 #ifdef CONFIG_QUOTA
4202         for (i = 0; i < EXT4_MAXQUOTAS; i++)
4203                 kfree(sbi->s_qf_names[i]);
4204 #endif
4205         ext4_blkdev_remove(sbi);
4206         brelse(bh);
4207 out_fail:
4208         sb->s_fs_info = NULL;
4209         kfree(sbi->s_blockgroup_lock);
4210         kfree(sbi);
4211 out_free_orig:
4212         kfree(orig_data);
4213         return err ? err : ret;
4214 }
4215
4216 /*
4217  * Setup any per-fs journal parameters now.  We'll do this both on
4218  * initial mount, once the journal has been initialised but before we've
4219  * done any recovery; and again on any subsequent remount.
4220  */
4221 static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
4222 {
4223         struct ext4_sb_info *sbi = EXT4_SB(sb);
4224
4225         journal->j_commit_interval = sbi->s_commit_interval;
4226         journal->j_min_batch_time = sbi->s_min_batch_time;
4227         journal->j_max_batch_time = sbi->s_max_batch_time;
4228
4229         write_lock(&journal->j_state_lock);
4230         if (test_opt(sb, BARRIER))
4231                 journal->j_flags |= JBD2_BARRIER;
4232         else
4233                 journal->j_flags &= ~JBD2_BARRIER;
4234         if (test_opt(sb, DATA_ERR_ABORT))
4235                 journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR;
4236         else
4237                 journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR;
4238         write_unlock(&journal->j_state_lock);
4239 }
4240
4241 static journal_t *ext4_get_journal(struct super_block *sb,
4242                                    unsigned int journal_inum)
4243 {
4244         struct inode *journal_inode;
4245         journal_t *journal;
4246
4247         BUG_ON(!ext4_has_feature_journal(sb));
4248
4249         /* First, test for the existence of a valid inode on disk.  Bad
4250          * things happen if we iget() an unused inode, as the subsequent
4251          * iput() will try to delete it. */
4252
4253         journal_inode = ext4_iget(sb, journal_inum);
4254         if (IS_ERR(journal_inode)) {
4255                 ext4_msg(sb, KERN_ERR, "no journal found");
4256                 return NULL;
4257         }
4258         if (!journal_inode->i_nlink) {
4259                 make_bad_inode(journal_inode);
4260                 iput(journal_inode);
4261                 ext4_msg(sb, KERN_ERR, "journal inode is deleted");
4262                 return NULL;
4263         }
4264
4265         jbd_debug(2, "Journal inode found at %p: %lld bytes\n",
4266                   journal_inode, journal_inode->i_size);
4267         if (!S_ISREG(journal_inode->i_mode)) {
4268                 ext4_msg(sb, KERN_ERR, "invalid journal inode");
4269                 iput(journal_inode);
4270                 return NULL;
4271         }
4272
4273         journal = jbd2_journal_init_inode(journal_inode);
4274         if (!journal) {
4275                 ext4_msg(sb, KERN_ERR, "Could not load journal inode");
4276                 iput(journal_inode);
4277                 return NULL;
4278         }
4279         journal->j_private = sb;
4280         ext4_init_journal_params(sb, journal);
4281         return journal;
4282 }
4283
4284 static journal_t *ext4_get_dev_journal(struct super_block *sb,
4285                                        dev_t j_dev)
4286 {
4287         struct buffer_head *bh;
4288         journal_t *journal;
4289         ext4_fsblk_t start;
4290         ext4_fsblk_t len;
4291         int hblock, blocksize;
4292         ext4_fsblk_t sb_block;
4293         unsigned long offset;
4294         struct ext4_super_block *es;
4295         struct block_device *bdev;
4296
4297         BUG_ON(!ext4_has_feature_journal(sb));
4298
4299         bdev = ext4_blkdev_get(j_dev, sb);
4300         if (bdev == NULL)
4301                 return NULL;
4302
4303         blocksize = sb->s_blocksize;
4304         hblock = bdev_logical_block_size(bdev);
4305         if (blocksize < hblock) {
4306                 ext4_msg(sb, KERN_ERR,
4307                         "blocksize too small for journal device");
4308                 goto out_bdev;
4309         }
4310
4311         sb_block = EXT4_MIN_BLOCK_SIZE / blocksize;
4312         offset = EXT4_MIN_BLOCK_SIZE % blocksize;
4313         set_blocksize(bdev, blocksize);
4314         if (!(bh = __bread(bdev, sb_block, blocksize))) {
4315                 ext4_msg(sb, KERN_ERR, "couldn't read superblock of "
4316                        "external journal");
4317                 goto out_bdev;
4318         }
4319
4320         es = (struct ext4_super_block *) (bh->b_data + offset);
4321         if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) ||
4322             !(le32_to_cpu(es->s_feature_incompat) &
4323               EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) {
4324                 ext4_msg(sb, KERN_ERR, "external journal has "
4325                                         "bad superblock");
4326                 brelse(bh);
4327                 goto out_bdev;
4328         }
4329
4330         if ((le32_to_cpu(es->s_feature_ro_compat) &
4331              EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
4332             es->s_checksum != ext4_superblock_csum(sb, es)) {
4333                 ext4_msg(sb, KERN_ERR, "external journal has "
4334                                        "corrupt superblock");
4335                 brelse(bh);
4336                 goto out_bdev;
4337         }
4338
4339         if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
4340                 ext4_msg(sb, KERN_ERR, "journal UUID does not match");
4341                 brelse(bh);
4342                 goto out_bdev;
4343         }
4344
4345         len = ext4_blocks_count(es);
4346         start = sb_block + 1;
4347         brelse(bh);     /* we're done with the superblock */
4348
4349         journal = jbd2_journal_init_dev(bdev, sb->s_bdev,
4350                                         start, len, blocksize);
4351         if (!journal) {
4352                 ext4_msg(sb, KERN_ERR, "failed to create device journal");
4353                 goto out_bdev;
4354         }
4355         journal->j_private = sb;
4356         ll_rw_block(REQ_OP_READ, REQ_META | REQ_PRIO, 1, &journal->j_sb_buffer);
4357         wait_on_buffer(journal->j_sb_buffer);
4358         if (!buffer_uptodate(journal->j_sb_buffer)) {
4359                 ext4_msg(sb, KERN_ERR, "I/O error on journal device");
4360                 goto out_journal;
4361         }
4362         if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
4363                 ext4_msg(sb, KERN_ERR, "External journal has more than one "
4364                                         "user (unsupported) - %d",
4365                         be32_to_cpu(journal->j_superblock->s_nr_users));
4366                 goto out_journal;
4367         }
4368         EXT4_SB(sb)->journal_bdev = bdev;
4369         ext4_init_journal_params(sb, journal);
4370         return journal;
4371
4372 out_journal:
4373         jbd2_journal_destroy(journal);
4374 out_bdev:
4375         ext4_blkdev_put(bdev);
4376         return NULL;
4377 }
4378
4379 static int ext4_load_journal(struct super_block *sb,
4380                              struct ext4_super_block *es,
4381                              unsigned long journal_devnum)
4382 {
4383         journal_t *journal;
4384         unsigned int journal_inum = le32_to_cpu(es->s_journal_inum);
4385         dev_t journal_dev;
4386         int err = 0;
4387         int really_read_only;
4388
4389         BUG_ON(!ext4_has_feature_journal(sb));
4390
4391         if (journal_devnum &&
4392             journal_devnum != le32_to_cpu(es->s_journal_dev)) {
4393                 ext4_msg(sb, KERN_INFO, "external journal device major/minor "
4394                         "numbers have changed");
4395                 journal_dev = new_decode_dev(journal_devnum);
4396         } else
4397                 journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
4398
4399         really_read_only = bdev_read_only(sb->s_bdev);
4400
4401         /*
4402          * Are we loading a blank journal or performing recovery after a
4403          * crash?  For recovery, we need to check in advance whether we
4404          * can get read-write access to the device.
4405          */
4406         if (ext4_has_feature_journal_needs_recovery(sb)) {
4407                 if (sb->s_flags & MS_RDONLY) {
4408                         ext4_msg(sb, KERN_INFO, "INFO: recovery "
4409                                         "required on readonly filesystem");
4410                         if (really_read_only) {
4411                                 ext4_msg(sb, KERN_ERR, "write access "
4412                                         "unavailable, cannot proceed");
4413                                 return -EROFS;
4414                         }
4415                         ext4_msg(sb, KERN_INFO, "write access will "
4416                                "be enabled during recovery");
4417                 }
4418         }
4419
4420         if (journal_inum && journal_dev) {
4421                 ext4_msg(sb, KERN_ERR, "filesystem has both journal "
4422                        "and inode journals!");
4423                 return -EINVAL;
4424         }
4425
4426         if (journal_inum) {
4427                 if (!(journal = ext4_get_journal(sb, journal_inum)))
4428                         return -EINVAL;
4429         } else {
4430                 if (!(journal = ext4_get_dev_journal(sb, journal_dev)))
4431                         return -EINVAL;
4432         }
4433
4434         if (!(journal->j_flags & JBD2_BARRIER))
4435                 ext4_msg(sb, KERN_INFO, "barriers disabled");
4436
4437         if (!ext4_has_feature_journal_needs_recovery(sb))
4438                 err = jbd2_journal_wipe(journal, !really_read_only);
4439         if (!err) {
4440                 char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL);
4441                 if (save)
4442                         memcpy(save, ((char *) es) +
4443                                EXT4_S_ERR_START, EXT4_S_ERR_LEN);
4444                 err = jbd2_journal_load(journal);
4445                 if (save)
4446                         memcpy(((char *) es) + EXT4_S_ERR_START,
4447                                save, EXT4_S_ERR_LEN);
4448                 kfree(save);
4449         }
4450
4451         if (err) {
4452                 ext4_msg(sb, KERN_ERR, "error loading journal");
4453                 jbd2_journal_destroy(journal);
4454                 return err;
4455         }
4456
4457         EXT4_SB(sb)->s_journal = journal;
4458         ext4_clear_journal_err(sb, es);
4459
4460         if (!really_read_only && journal_devnum &&
4461             journal_devnum != le32_to_cpu(es->s_journal_dev)) {
4462                 es->s_journal_dev = cpu_to_le32(journal_devnum);
4463
4464                 /* Make sure we flush the recovery flag to disk. */
4465                 ext4_commit_super(sb, 1);
4466         }
4467
4468         return 0;
4469 }
4470
4471 static int ext4_commit_super(struct super_block *sb, int sync)
4472 {
4473         struct ext4_super_block *es = EXT4_SB(sb)->s_es;
4474         struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
4475         int error = 0;
4476
4477         if (!sbh || block_device_ejected(sb))
4478                 return error;
4479         /*
4480          * If the file system is mounted read-only, don't update the
4481          * superblock write time.  This avoids updating the superblock
4482          * write time when we are mounting the root file system
4483          * read/only but we need to replay the journal; at that point,
4484          * for people who are east of GMT and who make their clock
4485          * tick in localtime for Windows bug-for-bug compatibility,
4486          * the clock is set in the future, and this will cause e2fsck
4487          * to complain and force a full file system check.
4488          */
4489         if (!(sb->s_flags & MS_RDONLY))
4490                 es->s_wtime = cpu_to_le32(get_seconds());
4491         if (sb->s_bdev->bd_part)
4492                 es->s_kbytes_written =
4493                         cpu_to_le64(EXT4_SB(sb)->s_kbytes_written +
4494                             ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
4495                               EXT4_SB(sb)->s_sectors_written_start) >> 1));
4496         else
4497                 es->s_kbytes_written =
4498                         cpu_to_le64(EXT4_SB(sb)->s_kbytes_written);
4499         if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeclusters_counter))
4500                 ext4_free_blocks_count_set(es,
4501                         EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive(
4502                                 &EXT4_SB(sb)->s_freeclusters_counter)));
4503         if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeinodes_counter))
4504                 es->s_free_inodes_count =
4505                         cpu_to_le32(percpu_counter_sum_positive(
4506                                 &EXT4_SB(sb)->s_freeinodes_counter));
4507         BUFFER_TRACE(sbh, "marking dirty");
4508         ext4_superblock_csum_set(sb);
4509         lock_buffer(sbh);
4510         if (buffer_write_io_error(sbh)) {
4511                 /*
4512                  * Oh, dear.  A previous attempt to write the
4513                  * superblock failed.  This could happen because the
4514                  * USB device was yanked out.  Or it could happen to
4515                  * be a transient write error and maybe the block will
4516                  * be remapped.  Nothing we can do but to retry the
4517                  * write and hope for the best.
4518                  */
4519                 ext4_msg(sb, KERN_ERR, "previous I/O error to "
4520                        "superblock detected");
4521                 clear_buffer_write_io_error(sbh);
4522                 set_buffer_uptodate(sbh);
4523         }
4524         mark_buffer_dirty(sbh);
4525         unlock_buffer(sbh);
4526         if (sync) {
4527                 error = __sync_dirty_buffer(sbh,
4528                         test_opt(sb, BARRIER) ? WRITE_FUA : WRITE_SYNC);
4529                 if (error)
4530                         return error;
4531
4532                 error = buffer_write_io_error(sbh);
4533                 if (error) {
4534                         ext4_msg(sb, KERN_ERR, "I/O error while writing "
4535                                "superblock");
4536                         clear_buffer_write_io_error(sbh);
4537                         set_buffer_uptodate(sbh);
4538                 }
4539         }
4540         return error;
4541 }
4542
4543 /*
4544  * Have we just finished recovery?  If so, and if we are mounting (or
4545  * remounting) the filesystem readonly, then we will end up with a
4546  * consistent fs on disk.  Record that fact.
4547  */
4548 static void ext4_mark_recovery_complete(struct super_block *sb,
4549                                         struct ext4_super_block *es)
4550 {
4551         journal_t *journal = EXT4_SB(sb)->s_journal;
4552
4553         if (!ext4_has_feature_journal(sb)) {
4554                 BUG_ON(journal != NULL);
4555                 return;
4556         }
4557         jbd2_journal_lock_updates(journal);
4558         if (jbd2_journal_flush(journal) < 0)
4559                 goto out;
4560
4561         if (ext4_has_feature_journal_needs_recovery(sb) &&
4562             sb->s_flags & MS_RDONLY) {
4563                 ext4_clear_feature_journal_needs_recovery(sb);
4564                 ext4_commit_super(sb, 1);
4565         }
4566
4567 out:
4568         jbd2_journal_unlock_updates(journal);
4569 }
4570
4571 /*
4572  * If we are mounting (or read-write remounting) a filesystem whose journal
4573  * has recorded an error from a previous lifetime, move that error to the
4574  * main filesystem now.
4575  */
4576 static void ext4_clear_journal_err(struct super_block *sb,
4577                                    struct ext4_super_block *es)
4578 {
4579         journal_t *journal;
4580         int j_errno;
4581         const char *errstr;
4582
4583         BUG_ON(!ext4_has_feature_journal(sb));
4584
4585         journal = EXT4_SB(sb)->s_journal;
4586
4587         /*
4588          * Now check for any error status which may have been recorded in the
4589          * journal by a prior ext4_error() or ext4_abort()
4590          */
4591
4592         j_errno = jbd2_journal_errno(journal);
4593         if (j_errno) {
4594                 char nbuf[16];
4595
4596                 errstr = ext4_decode_error(sb, j_errno, nbuf);
4597                 ext4_warning(sb, "Filesystem error recorded "
4598                              "from previous mount: %s", errstr);
4599                 ext4_warning(sb, "Marking fs in need of filesystem check.");
4600
4601                 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
4602                 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
4603                 ext4_commit_super(sb, 1);
4604
4605                 jbd2_journal_clear_err(journal);
4606                 jbd2_journal_update_sb_errno(journal);
4607         }
4608 }
4609
4610 /*
4611  * Force the running and committing transactions to commit,
4612  * and wait on the commit.
4613  */
4614 int ext4_force_commit(struct super_block *sb)
4615 {
4616         journal_t *journal;
4617
4618         if (sb->s_flags & MS_RDONLY)
4619                 return 0;
4620
4621         journal = EXT4_SB(sb)->s_journal;
4622         return ext4_journal_force_commit(journal);
4623 }
4624
4625 static int ext4_sync_fs(struct super_block *sb, int wait)
4626 {
4627         int ret = 0;
4628         tid_t target;
4629         bool needs_barrier = false;
4630         struct ext4_sb_info *sbi = EXT4_SB(sb);
4631
4632         trace_ext4_sync_fs(sb, wait);
4633         flush_workqueue(sbi->rsv_conversion_wq);
4634         /*
4635          * Writeback quota in non-journalled quota case - journalled quota has
4636          * no dirty dquots
4637          */
4638         dquot_writeback_dquots(sb, -1);
4639         /*
4640          * Data writeback is possible w/o journal transaction, so barrier must
4641          * being sent at the end of the function. But we can skip it if
4642          * transaction_commit will do it for us.
4643          */
4644         if (sbi->s_journal) {
4645                 target = jbd2_get_latest_transaction(sbi->s_journal);
4646                 if (wait && sbi->s_journal->j_flags & JBD2_BARRIER &&
4647                     !jbd2_trans_will_send_data_barrier(sbi->s_journal, target))
4648                         needs_barrier = true;
4649
4650                 if (jbd2_journal_start_commit(sbi->s_journal, &target)) {
4651                         if (wait)
4652                                 ret = jbd2_log_wait_commit(sbi->s_journal,
4653                                                            target);
4654                 }
4655         } else if (wait && test_opt(sb, BARRIER))
4656                 needs_barrier = true;
4657         if (needs_barrier) {
4658                 int err;
4659                 err = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL);
4660                 if (!ret)
4661                         ret = err;
4662         }
4663
4664         return ret;
4665 }
4666
4667 /*
4668  * LVM calls this function before a (read-only) snapshot is created.  This
4669  * gives us a chance to flush the journal completely and mark the fs clean.
4670  *
4671  * Note that only this function cannot bring a filesystem to be in a clean
4672  * state independently. It relies on upper layer to stop all data & metadata
4673  * modifications.
4674  */
4675 static int ext4_freeze(struct super_block *sb)
4676 {
4677         int error = 0;
4678         journal_t *journal;
4679
4680         if (sb->s_flags & MS_RDONLY)
4681                 return 0;
4682
4683         journal = EXT4_SB(sb)->s_journal;
4684
4685         if (journal) {
4686                 /* Now we set up the journal barrier. */
4687                 jbd2_journal_lock_updates(journal);
4688
4689                 /*
4690                  * Don't clear the needs_recovery flag if we failed to
4691                  * flush the journal.
4692                  */
4693                 error = jbd2_journal_flush(journal);
4694                 if (error < 0)
4695                         goto out;
4696
4697                 /* Journal blocked and flushed, clear needs_recovery flag. */
4698                 ext4_clear_feature_journal_needs_recovery(sb);
4699         }
4700
4701         error = ext4_commit_super(sb, 1);
4702 out:
4703         if (journal)
4704                 /* we rely on upper layer to stop further updates */
4705                 jbd2_journal_unlock_updates(journal);
4706         return error;
4707 }
4708
4709 /*
4710  * Called by LVM after the snapshot is done.  We need to reset the RECOVER
4711  * flag here, even though the filesystem is not technically dirty yet.
4712  */
4713 static int ext4_unfreeze(struct super_block *sb)
4714 {
4715         if (sb->s_flags & MS_RDONLY)
4716                 return 0;
4717
4718         if (EXT4_SB(sb)->s_journal) {
4719                 /* Reset the needs_recovery flag before the fs is unlocked. */
4720                 ext4_set_feature_journal_needs_recovery(sb);
4721         }
4722
4723         ext4_commit_super(sb, 1);
4724         return 0;
4725 }
4726
4727 /*
4728  * Structure to save mount options for ext4_remount's benefit
4729  */
4730 struct ext4_mount_options {
4731         unsigned long s_mount_opt;
4732         unsigned long s_mount_opt2;
4733         kuid_t s_resuid;
4734         kgid_t s_resgid;
4735         unsigned long s_commit_interval;
4736         u32 s_min_batch_time, s_max_batch_time;
4737 #ifdef CONFIG_QUOTA
4738         int s_jquota_fmt;
4739         char *s_qf_names[EXT4_MAXQUOTAS];
4740 #endif
4741 };
4742
4743 static int ext4_remount(struct super_block *sb, int *flags, char *data)
4744 {
4745         struct ext4_super_block *es;
4746         struct ext4_sb_info *sbi = EXT4_SB(sb);
4747         unsigned long old_sb_flags;
4748         struct ext4_mount_options old_opts;
4749         int enable_quota = 0;
4750         ext4_group_t g;
4751         unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
4752         int err = 0;
4753 #ifdef CONFIG_QUOTA
4754         int i, j;
4755 #endif
4756         char *orig_data = kstrdup(data, GFP_KERNEL);
4757
4758         /* Store the original options */
4759         old_sb_flags = sb->s_flags;
4760         old_opts.s_mount_opt = sbi->s_mount_opt;
4761         old_opts.s_mount_opt2 = sbi->s_mount_opt2;
4762         old_opts.s_resuid = sbi->s_resuid;
4763         old_opts.s_resgid = sbi->s_resgid;
4764         old_opts.s_commit_interval = sbi->s_commit_interval;
4765         old_opts.s_min_batch_time = sbi->s_min_batch_time;
4766         old_opts.s_max_batch_time = sbi->s_max_batch_time;
4767 #ifdef CONFIG_QUOTA
4768         old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
4769         for (i = 0; i < EXT4_MAXQUOTAS; i++)
4770                 if (sbi->s_qf_names[i]) {
4771                         old_opts.s_qf_names[i] = kstrdup(sbi->s_qf_names[i],
4772                                                          GFP_KERNEL);
4773                         if (!old_opts.s_qf_names[i]) {
4774                                 for (j = 0; j < i; j++)
4775                                         kfree(old_opts.s_qf_names[j]);
4776                                 kfree(orig_data);
4777                                 return -ENOMEM;
4778                         }
4779                 } else
4780                         old_opts.s_qf_names[i] = NULL;
4781 #endif
4782         if (sbi->s_journal && sbi->s_journal->j_task->io_context)
4783                 journal_ioprio = sbi->s_journal->j_task->io_context->ioprio;
4784
4785         if (!parse_options(data, sb, NULL, &journal_ioprio, 1)) {
4786                 err = -EINVAL;
4787                 goto restore_opts;
4788         }
4789
4790         if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^
4791             test_opt(sb, JOURNAL_CHECKSUM)) {
4792                 ext4_msg(sb, KERN_ERR, "changing journal_checksum "
4793                          "during remount not supported; ignoring");
4794                 sbi->s_mount_opt ^= EXT4_MOUNT_JOURNAL_CHECKSUM;
4795         }
4796
4797         if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
4798                 if (test_opt2(sb, EXPLICIT_DELALLOC)) {
4799                         ext4_msg(sb, KERN_ERR, "can't mount with "
4800                                  "both data=journal and delalloc");
4801                         err = -EINVAL;
4802                         goto restore_opts;
4803                 }
4804                 if (test_opt(sb, DIOREAD_NOLOCK)) {
4805                         ext4_msg(sb, KERN_ERR, "can't mount with "
4806                                  "both data=journal and dioread_nolock");
4807                         err = -EINVAL;
4808                         goto restore_opts;
4809                 }
4810                 if (test_opt(sb, DAX)) {
4811                         ext4_msg(sb, KERN_ERR, "can't mount with "
4812                                  "both data=journal and dax");
4813                         err = -EINVAL;
4814                         goto restore_opts;
4815                 }
4816         }
4817
4818         if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX) {
4819                 ext4_msg(sb, KERN_WARNING, "warning: refusing change of "
4820                         "dax flag with busy inodes while remounting");
4821                 sbi->s_mount_opt ^= EXT4_MOUNT_DAX;
4822         }
4823
4824         if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
4825                 ext4_abort(sb, "Abort forced by user");
4826
4827         sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
4828                 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
4829
4830         es = sbi->s_es;
4831
4832         if (sbi->s_journal) {
4833                 ext4_init_journal_params(sb, sbi->s_journal);
4834                 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
4835         }
4836
4837         if (*flags & MS_LAZYTIME)
4838                 sb->s_flags |= MS_LAZYTIME;
4839
4840         if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
4841                 if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) {
4842                         err = -EROFS;
4843                         goto restore_opts;
4844                 }
4845
4846                 if (*flags & MS_RDONLY) {
4847                         err = sync_filesystem(sb);
4848                         if (err < 0)
4849                                 goto restore_opts;
4850                         err = dquot_suspend(sb, -1);
4851                         if (err < 0)
4852                                 goto restore_opts;
4853
4854                         /*
4855                          * First of all, the unconditional stuff we have to do
4856                          * to disable replay of the journal when we next remount
4857                          */
4858                         sb->s_flags |= MS_RDONLY;
4859
4860                         /*
4861                          * OK, test if we are remounting a valid rw partition
4862                          * readonly, and if so set the rdonly flag and then
4863                          * mark the partition as valid again.
4864                          */
4865                         if (!(es->s_state & cpu_to_le16(EXT4_VALID_FS)) &&
4866                             (sbi->s_mount_state & EXT4_VALID_FS))
4867                                 es->s_state = cpu_to_le16(sbi->s_mount_state);
4868
4869                         if (sbi->s_journal)
4870                                 ext4_mark_recovery_complete(sb, es);
4871                 } else {
4872                         /* Make sure we can mount this feature set readwrite */
4873                         if (ext4_has_feature_readonly(sb) ||
4874                             !ext4_feature_set_ok(sb, 0)) {
4875                                 err = -EROFS;
4876                                 goto restore_opts;
4877                         }
4878                         /*
4879                          * Make sure the group descriptor checksums
4880                          * are sane.  If they aren't, refuse to remount r/w.
4881                          */
4882                         for (g = 0; g < sbi->s_groups_count; g++) {
4883                                 struct ext4_group_desc *gdp =
4884                                         ext4_get_group_desc(sb, g, NULL);
4885
4886                                 if (!ext4_group_desc_csum_verify(sb, g, gdp)) {
4887                                         ext4_msg(sb, KERN_ERR,
4888                "ext4_remount: Checksum for group %u failed (%u!=%u)",
4889                 g, le16_to_cpu(ext4_group_desc_csum(sb, g, gdp)),
4890                                                le16_to_cpu(gdp->bg_checksum));
4891                                         err = -EFSBADCRC;
4892                                         goto restore_opts;
4893                                 }
4894                         }
4895
4896                         /*
4897                          * If we have an unprocessed orphan list hanging
4898                          * around from a previously readonly bdev mount,
4899                          * require a full umount/remount for now.
4900                          */
4901                         if (es->s_last_orphan) {
4902                                 ext4_msg(sb, KERN_WARNING, "Couldn't "
4903                                        "remount RDWR because of unprocessed "
4904                                        "orphan inode list.  Please "
4905                                        "umount/remount instead");
4906                                 err = -EINVAL;
4907                                 goto restore_opts;
4908                         }
4909
4910                         /*
4911                          * Mounting a RDONLY partition read-write, so reread
4912                          * and store the current valid flag.  (It may have
4913                          * been changed by e2fsck since we originally mounted
4914                          * the partition.)
4915                          */
4916                         if (sbi->s_journal)
4917                                 ext4_clear_journal_err(sb, es);
4918                         sbi->s_mount_state = le16_to_cpu(es->s_state);
4919                         if (!ext4_setup_super(sb, es, 0))
4920                                 sb->s_flags &= ~MS_RDONLY;
4921                         if (ext4_has_feature_mmp(sb))
4922                                 if (ext4_multi_mount_protect(sb,
4923                                                 le64_to_cpu(es->s_mmp_block))) {
4924                                         err = -EROFS;
4925                                         goto restore_opts;
4926                                 }
4927                         enable_quota = 1;
4928                 }
4929         }
4930
4931         /*
4932          * Reinitialize lazy itable initialization thread based on
4933          * current settings
4934          */
4935         if ((sb->s_flags & MS_RDONLY) || !test_opt(sb, INIT_INODE_TABLE))
4936                 ext4_unregister_li_request(sb);
4937         else {
4938                 ext4_group_t first_not_zeroed;
4939                 first_not_zeroed = ext4_has_uninit_itable(sb);
4940                 ext4_register_li_request(sb, first_not_zeroed);
4941         }
4942
4943         ext4_setup_system_zone(sb);
4944         if (sbi->s_journal == NULL && !(old_sb_flags & MS_RDONLY))
4945                 ext4_commit_super(sb, 1);
4946
4947 #ifdef CONFIG_QUOTA
4948         /* Release old quota file names */
4949         for (i = 0; i < EXT4_MAXQUOTAS; i++)
4950                 kfree(old_opts.s_qf_names[i]);
4951         if (enable_quota) {
4952                 if (sb_any_quota_suspended(sb))
4953                         dquot_resume(sb, -1);
4954                 else if (ext4_has_feature_quota(sb)) {
4955                         err = ext4_enable_quotas(sb);
4956                         if (err)
4957                                 goto restore_opts;
4958                 }
4959         }
4960 #endif
4961
4962         *flags = (*flags & ~MS_LAZYTIME) | (sb->s_flags & MS_LAZYTIME);
4963         ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data);
4964         kfree(orig_data);
4965         return 0;
4966
4967 restore_opts:
4968         sb->s_flags = old_sb_flags;
4969         sbi->s_mount_opt = old_opts.s_mount_opt;
4970         sbi->s_mount_opt2 = old_opts.s_mount_opt2;
4971         sbi->s_resuid = old_opts.s_resuid;
4972         sbi->s_resgid = old_opts.s_resgid;
4973         sbi->s_commit_interval = old_opts.s_commit_interval;
4974         sbi->s_min_batch_time = old_opts.s_min_batch_time;
4975         sbi->s_max_batch_time = old_opts.s_max_batch_time;
4976 #ifdef CONFIG_QUOTA
4977         sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
4978         for (i = 0; i < EXT4_MAXQUOTAS; i++) {
4979                 kfree(sbi->s_qf_names[i]);
4980                 sbi->s_qf_names[i] = old_opts.s_qf_names[i];
4981         }
4982 #endif
4983         kfree(orig_data);
4984         return err;
4985 }
4986
4987 #ifdef CONFIG_QUOTA
4988 static int ext4_statfs_project(struct super_block *sb,
4989                                kprojid_t projid, struct kstatfs *buf)
4990 {
4991         struct kqid qid;
4992         struct dquot *dquot;
4993         u64 limit;
4994         u64 curblock;
4995
4996         qid = make_kqid_projid(projid);
4997         dquot = dqget(sb, qid);
4998         if (IS_ERR(dquot))
4999                 return PTR_ERR(dquot);
5000         spin_lock(&dq_data_lock);
5001
5002         limit = (dquot->dq_dqb.dqb_bsoftlimit ?
5003                  dquot->dq_dqb.dqb_bsoftlimit :
5004                  dquot->dq_dqb.dqb_bhardlimit) >> sb->s_blocksize_bits;
5005         if (limit && buf->f_blocks > limit) {
5006                 curblock = dquot->dq_dqb.dqb_curspace >> sb->s_blocksize_bits;
5007                 buf->f_blocks = limit;
5008                 buf->f_bfree = buf->f_bavail =
5009                         (buf->f_blocks > curblock) ?
5010                          (buf->f_blocks - curblock) : 0;
5011         }
5012
5013         limit = dquot->dq_dqb.dqb_isoftlimit ?
5014                 dquot->dq_dqb.dqb_isoftlimit :
5015                 dquot->dq_dqb.dqb_ihardlimit;
5016         if (limit && buf->f_files > limit) {
5017                 buf->f_files = limit;
5018                 buf->f_ffree =
5019                         (buf->f_files > dquot->dq_dqb.dqb_curinodes) ?
5020                          (buf->f_files - dquot->dq_dqb.dqb_curinodes) : 0;
5021         }
5022
5023         spin_unlock(&dq_data_lock);
5024         dqput(dquot);
5025         return 0;
5026 }
5027 #endif
5028
5029 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
5030 {
5031         struct super_block *sb = dentry->d_sb;
5032         struct ext4_sb_info *sbi = EXT4_SB(sb);
5033         struct ext4_super_block *es = sbi->s_es;
5034         ext4_fsblk_t overhead = 0, resv_blocks;
5035         u64 fsid;
5036         s64 bfree;
5037         resv_blocks = EXT4_C2B(sbi, atomic64_read(&sbi->s_resv_clusters));
5038
5039         if (!test_opt(sb, MINIX_DF))
5040                 overhead = sbi->s_overhead;
5041
5042         buf->f_type = EXT4_SUPER_MAGIC;
5043         buf->f_bsize = sb->s_blocksize;
5044         buf->f_blocks = ext4_blocks_count(es) - EXT4_C2B(sbi, overhead);
5045         bfree = percpu_counter_sum_positive(&sbi->s_freeclusters_counter) -
5046                 percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter);
5047         /* prevent underflow in case that few free space is available */
5048         buf->f_bfree = EXT4_C2B(sbi, max_t(s64, bfree, 0));
5049         buf->f_bavail = buf->f_bfree -
5050                         (ext4_r_blocks_count(es) + resv_blocks);
5051         if (buf->f_bfree < (ext4_r_blocks_count(es) + resv_blocks))
5052                 buf->f_bavail = 0;
5053         buf->f_files = le32_to_cpu(es->s_inodes_count);
5054         buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
5055         buf->f_namelen = EXT4_NAME_LEN;
5056         fsid = le64_to_cpup((void *)es->s_uuid) ^
5057                le64_to_cpup((void *)es->s_uuid + sizeof(u64));
5058         buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
5059         buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
5060
5061 #ifdef CONFIG_QUOTA
5062         if (ext4_test_inode_flag(dentry->d_inode, EXT4_INODE_PROJINHERIT) &&
5063             sb_has_quota_limits_enabled(sb, PRJQUOTA))
5064                 ext4_statfs_project(sb, EXT4_I(dentry->d_inode)->i_projid, buf);
5065 #endif
5066         return 0;
5067 }
5068
5069 /* Helper function for writing quotas on sync - we need to start transaction
5070  * before quota file is locked for write. Otherwise the are possible deadlocks:
5071  * Process 1                         Process 2
5072  * ext4_create()                     quota_sync()
5073  *   jbd2_journal_start()                  write_dquot()
5074  *   dquot_initialize()                         down(dqio_mutex)
5075  *     down(dqio_mutex)                    jbd2_journal_start()
5076  *
5077  */
5078
5079 #ifdef CONFIG_QUOTA
5080
5081 static inline struct inode *dquot_to_inode(struct dquot *dquot)
5082 {
5083         return sb_dqopt(dquot->dq_sb)->files[dquot->dq_id.type];
5084 }
5085
5086 static int ext4_write_dquot(struct dquot *dquot)
5087 {
5088         int ret, err;
5089         handle_t *handle;
5090         struct inode *inode;
5091
5092         inode = dquot_to_inode(dquot);
5093         handle = ext4_journal_start(inode, EXT4_HT_QUOTA,
5094                                     EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
5095         if (IS_ERR(handle))
5096                 return PTR_ERR(handle);
5097         ret = dquot_commit(dquot);
5098         err = ext4_journal_stop(handle);
5099         if (!ret)
5100                 ret = err;
5101         return ret;
5102 }
5103
5104 static int ext4_acquire_dquot(struct dquot *dquot)
5105 {
5106         int ret, err;
5107         handle_t *handle;
5108
5109         handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,
5110                                     EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb));
5111         if (IS_ERR(handle))
5112                 return PTR_ERR(handle);
5113         ret = dquot_acquire(dquot);
5114         err = ext4_journal_stop(handle);
5115         if (!ret)
5116                 ret = err;
5117         return ret;
5118 }
5119
5120 static int ext4_release_dquot(struct dquot *dquot)
5121 {
5122         int ret, err;
5123         handle_t *handle;
5124
5125         handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,
5126                                     EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
5127         if (IS_ERR(handle)) {
5128                 /* Release dquot anyway to avoid endless cycle in dqput() */
5129                 dquot_release(dquot);
5130                 return PTR_ERR(handle);
5131         }
5132         ret = dquot_release(dquot);
5133         err = ext4_journal_stop(handle);
5134         if (!ret)
5135                 ret = err;
5136         return ret;
5137 }
5138
5139 static int ext4_mark_dquot_dirty(struct dquot *dquot)
5140 {
5141         struct super_block *sb = dquot->dq_sb;
5142         struct ext4_sb_info *sbi = EXT4_SB(sb);
5143
5144         /* Are we journaling quotas? */
5145         if (ext4_has_feature_quota(sb) ||
5146             sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
5147                 dquot_mark_dquot_dirty(dquot);
5148                 return ext4_write_dquot(dquot);
5149         } else {
5150                 return dquot_mark_dquot_dirty(dquot);
5151         }
5152 }
5153
5154 static int ext4_write_info(struct super_block *sb, int type)
5155 {
5156         int ret, err;
5157         handle_t *handle;
5158
5159         /* Data block + inode block */
5160         handle = ext4_journal_start(d_inode(sb->s_root), EXT4_HT_QUOTA, 2);
5161         if (IS_ERR(handle))
5162                 return PTR_ERR(handle);
5163         ret = dquot_commit_info(sb, type);
5164         err = ext4_journal_stop(handle);
5165         if (!ret)
5166                 ret = err;
5167         return ret;
5168 }
5169
5170 /*
5171  * Turn on quotas during mount time - we need to find
5172  * the quota file and such...
5173  */
5174 static int ext4_quota_on_mount(struct super_block *sb, int type)
5175 {
5176         return dquot_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type],
5177                                         EXT4_SB(sb)->s_jquota_fmt, type);
5178 }
5179
5180 static void lockdep_set_quota_inode(struct inode *inode, int subclass)
5181 {
5182         struct ext4_inode_info *ei = EXT4_I(inode);
5183
5184         /* The first argument of lockdep_set_subclass has to be
5185          * *exactly* the same as the argument to init_rwsem() --- in
5186          * this case, in init_once() --- or lockdep gets unhappy
5187          * because the name of the lock is set using the
5188          * stringification of the argument to init_rwsem().
5189          */
5190         (void) ei;      /* shut up clang warning if !CONFIG_LOCKDEP */
5191         lockdep_set_subclass(&ei->i_data_sem, subclass);
5192 }
5193
5194 /*
5195  * Standard function to be called on quota_on
5196  */
5197 static int ext4_quota_on(struct super_block *sb, int type, int format_id,
5198                          struct path *path)
5199 {
5200         int err;
5201
5202         if (!test_opt(sb, QUOTA))
5203                 return -EINVAL;
5204
5205         /* Quotafile not on the same filesystem? */
5206         if (path->dentry->d_sb != sb)
5207                 return -EXDEV;
5208         /* Journaling quota? */
5209         if (EXT4_SB(sb)->s_qf_names[type]) {
5210                 /* Quotafile not in fs root? */
5211                 if (path->dentry->d_parent != sb->s_root)
5212                         ext4_msg(sb, KERN_WARNING,
5213                                 "Quota file not on filesystem root. "
5214                                 "Journaled quota will not work");
5215         }
5216
5217         /*
5218          * When we journal data on quota file, we have to flush journal to see
5219          * all updates to the file when we bypass pagecache...
5220          */
5221         if (EXT4_SB(sb)->s_journal &&
5222             ext4_should_journal_data(d_inode(path->dentry))) {
5223                 /*
5224                  * We don't need to lock updates but journal_flush() could
5225                  * otherwise be livelocked...
5226                  */
5227                 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
5228                 err = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
5229                 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
5230                 if (err)
5231                         return err;
5232         }
5233         lockdep_set_quota_inode(path->dentry->d_inode, I_DATA_SEM_QUOTA);
5234         err = dquot_quota_on(sb, type, format_id, path);
5235         if (err)
5236                 lockdep_set_quota_inode(path->dentry->d_inode,
5237                                              I_DATA_SEM_NORMAL);
5238         return err;
5239 }
5240
5241 static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
5242                              unsigned int flags)
5243 {
5244         int err;
5245         struct inode *qf_inode;
5246         unsigned long qf_inums[EXT4_MAXQUOTAS] = {
5247                 le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
5248                 le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
5249                 le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
5250         };
5251
5252         BUG_ON(!ext4_has_feature_quota(sb));
5253
5254         if (!qf_inums[type])
5255                 return -EPERM;
5256
5257         qf_inode = ext4_iget(sb, qf_inums[type]);
5258         if (IS_ERR(qf_inode)) {
5259                 ext4_error(sb, "Bad quota inode # %lu", qf_inums[type]);
5260                 return PTR_ERR(qf_inode);
5261         }
5262
5263         /* Don't account quota for quota files to avoid recursion */
5264         qf_inode->i_flags |= S_NOQUOTA;
5265         lockdep_set_quota_inode(qf_inode, I_DATA_SEM_QUOTA);
5266         err = dquot_enable(qf_inode, type, format_id, flags);
5267         iput(qf_inode);
5268         if (err)
5269                 lockdep_set_quota_inode(qf_inode, I_DATA_SEM_NORMAL);
5270
5271         return err;
5272 }
5273
5274 /* Enable usage tracking for all quota types. */
5275 static int ext4_enable_quotas(struct super_block *sb)
5276 {
5277         int type, err = 0;
5278         unsigned long qf_inums[EXT4_MAXQUOTAS] = {
5279                 le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
5280                 le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
5281                 le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
5282         };
5283         bool quota_mopt[EXT4_MAXQUOTAS] = {
5284                 test_opt(sb, USRQUOTA),
5285                 test_opt(sb, GRPQUOTA),
5286                 test_opt(sb, PRJQUOTA),
5287         };
5288
5289         sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
5290         for (type = 0; type < EXT4_MAXQUOTAS; type++) {
5291                 if (qf_inums[type]) {
5292                         err = ext4_quota_enable(sb, type, QFMT_VFS_V1,
5293                                 DQUOT_USAGE_ENABLED |
5294                                 (quota_mopt[type] ? DQUOT_LIMITS_ENABLED : 0));
5295                         if (err) {
5296                                 ext4_warning(sb,
5297                                         "Failed to enable quota tracking "
5298                                         "(type=%d, err=%d). Please run "
5299                                         "e2fsck to fix.", type, err);
5300                                 return err;
5301                         }
5302                 }
5303         }
5304         return 0;
5305 }
5306
5307 static int ext4_quota_off(struct super_block *sb, int type)
5308 {
5309         struct inode *inode = sb_dqopt(sb)->files[type];
5310         handle_t *handle;
5311
5312         /* Force all delayed allocation blocks to be allocated.
5313          * Caller already holds s_umount sem */
5314         if (test_opt(sb, DELALLOC))
5315                 sync_filesystem(sb);
5316
5317         if (!inode)
5318                 goto out;
5319
5320         /* Update modification times of quota files when userspace can
5321          * start looking at them */
5322         handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);
5323         if (IS_ERR(handle))
5324                 goto out;
5325         inode->i_mtime = inode->i_ctime = CURRENT_TIME;
5326         ext4_mark_inode_dirty(handle, inode);
5327         ext4_journal_stop(handle);
5328
5329 out:
5330         return dquot_quota_off(sb, type);
5331 }
5332
5333 /* Read data from quotafile - avoid pagecache and such because we cannot afford
5334  * acquiring the locks... As quota files are never truncated and quota code
5335  * itself serializes the operations (and no one else should touch the files)
5336  * we don't have to be afraid of races */
5337 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
5338                                size_t len, loff_t off)
5339 {
5340         struct inode *inode = sb_dqopt(sb)->files[type];
5341         ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
5342         int offset = off & (sb->s_blocksize - 1);
5343         int tocopy;
5344         size_t toread;
5345         struct buffer_head *bh;
5346         loff_t i_size = i_size_read(inode);
5347
5348         if (off > i_size)
5349                 return 0;
5350         if (off+len > i_size)
5351                 len = i_size-off;
5352         toread = len;
5353         while (toread > 0) {
5354                 tocopy = sb->s_blocksize - offset < toread ?
5355                                 sb->s_blocksize - offset : toread;
5356                 bh = ext4_bread(NULL, inode, blk, 0);
5357                 if (IS_ERR(bh))
5358                         return PTR_ERR(bh);
5359                 if (!bh)        /* A hole? */
5360                         memset(data, 0, tocopy);
5361                 else
5362                         memcpy(data, bh->b_data+offset, tocopy);
5363                 brelse(bh);
5364                 offset = 0;
5365                 toread -= tocopy;
5366                 data += tocopy;
5367                 blk++;
5368         }
5369         return len;
5370 }
5371
5372 /* Write to quotafile (we know the transaction is already started and has
5373  * enough credits) */
5374 static ssize_t ext4_quota_write(struct super_block *sb, int type,
5375                                 const char *data, size_t len, loff_t off)
5376 {
5377         struct inode *inode = sb_dqopt(sb)->files[type];
5378         ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
5379         int err, offset = off & (sb->s_blocksize - 1);
5380         int retries = 0;
5381         struct buffer_head *bh;
5382         handle_t *handle = journal_current_handle();
5383
5384         if (EXT4_SB(sb)->s_journal && !handle) {
5385                 ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
5386                         " cancelled because transaction is not started",
5387                         (unsigned long long)off, (unsigned long long)len);
5388                 return -EIO;
5389         }
5390         /*
5391          * Since we account only one data block in transaction credits,
5392          * then it is impossible to cross a block boundary.
5393          */
5394         if (sb->s_blocksize - offset < len) {
5395                 ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
5396                         " cancelled because not block aligned",
5397                         (unsigned long long)off, (unsigned long long)len);
5398                 return -EIO;
5399         }
5400
5401         do {
5402                 bh = ext4_bread(handle, inode, blk,
5403                                 EXT4_GET_BLOCKS_CREATE |
5404                                 EXT4_GET_BLOCKS_METADATA_NOFAIL);
5405         } while (IS_ERR(bh) && (PTR_ERR(bh) == -ENOSPC) &&
5406                  ext4_should_retry_alloc(inode->i_sb, &retries));
5407         if (IS_ERR(bh))
5408                 return PTR_ERR(bh);
5409         if (!bh)
5410                 goto out;
5411         BUFFER_TRACE(bh, "get write access");
5412         err = ext4_journal_get_write_access(handle, bh);
5413         if (err) {
5414                 brelse(bh);
5415                 return err;
5416         }
5417         lock_buffer(bh);
5418         memcpy(bh->b_data+offset, data, len);
5419         flush_dcache_page(bh->b_page);
5420         unlock_buffer(bh);
5421         err = ext4_handle_dirty_metadata(handle, NULL, bh);
5422         brelse(bh);
5423 out:
5424         if (inode->i_size < off + len) {
5425                 i_size_write(inode, off + len);
5426                 EXT4_I(inode)->i_disksize = inode->i_size;
5427                 ext4_mark_inode_dirty(handle, inode);
5428         }
5429         return len;
5430 }
5431
5432 static int ext4_get_next_id(struct super_block *sb, struct kqid *qid)
5433 {
5434         const struct quota_format_ops   *ops;
5435
5436         if (!sb_has_quota_loaded(sb, qid->type))
5437                 return -ESRCH;
5438         ops = sb_dqopt(sb)->ops[qid->type];
5439         if (!ops || !ops->get_next_id)
5440                 return -ENOSYS;
5441         return dquot_get_next_id(sb, qid);
5442 }
5443 #endif
5444
5445 static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
5446                        const char *dev_name, void *data)
5447 {
5448         return mount_bdev(fs_type, flags, dev_name, data, ext4_fill_super);
5449 }
5450
5451 #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2)
5452 static inline void register_as_ext2(void)
5453 {
5454         int err = register_filesystem(&ext2_fs_type);
5455         if (err)
5456                 printk(KERN_WARNING
5457                        "EXT4-fs: Unable to register as ext2 (%d)\n", err);
5458 }
5459
5460 static inline void unregister_as_ext2(void)
5461 {
5462         unregister_filesystem(&ext2_fs_type);
5463 }
5464
5465 static inline int ext2_feature_set_ok(struct super_block *sb)
5466 {
5467         if (ext4_has_unknown_ext2_incompat_features(sb))
5468                 return 0;
5469         if (sb->s_flags & MS_RDONLY)
5470                 return 1;
5471         if (ext4_has_unknown_ext2_ro_compat_features(sb))
5472                 return 0;
5473         return 1;
5474 }
5475 #else
5476 static inline void register_as_ext2(void) { }
5477 static inline void unregister_as_ext2(void) { }
5478 static inline int ext2_feature_set_ok(struct super_block *sb) { return 0; }
5479 #endif
5480
5481 static inline void register_as_ext3(void)
5482 {
5483         int err = register_filesystem(&ext3_fs_type);
5484         if (err)
5485                 printk(KERN_WARNING
5486                        "EXT4-fs: Unable to register as ext3 (%d)\n", err);
5487 }
5488
5489 static inline void unregister_as_ext3(void)
5490 {
5491         unregister_filesystem(&ext3_fs_type);
5492 }
5493
5494 static inline int ext3_feature_set_ok(struct super_block *sb)
5495 {
5496         if (ext4_has_unknown_ext3_incompat_features(sb))
5497                 return 0;
5498         if (!ext4_has_feature_journal(sb))
5499                 return 0;
5500         if (sb->s_flags & MS_RDONLY)
5501                 return 1;
5502         if (ext4_has_unknown_ext3_ro_compat_features(sb))
5503                 return 0;
5504         return 1;
5505 }
5506
5507 static struct file_system_type ext4_fs_type = {
5508         .owner          = THIS_MODULE,
5509         .name           = "ext4",
5510         .mount          = ext4_mount,
5511         .kill_sb        = kill_block_super,
5512         .fs_flags       = FS_REQUIRES_DEV,
5513 };
5514 MODULE_ALIAS_FS("ext4");
5515
5516 /* Shared across all ext4 file systems */
5517 wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
5518
5519 static int __init ext4_init_fs(void)
5520 {
5521         int i, err;
5522
5523         ratelimit_state_init(&ext4_mount_msg_ratelimit, 30 * HZ, 64);
5524         ext4_li_info = NULL;
5525         mutex_init(&ext4_li_mtx);
5526
5527         /* Build-time check for flags consistency */
5528         ext4_check_flag_values();
5529
5530         for (i = 0; i < EXT4_WQ_HASH_SZ; i++)
5531                 init_waitqueue_head(&ext4__ioend_wq[i]);
5532
5533         err = ext4_init_es();
5534         if (err)
5535                 return err;
5536
5537         err = ext4_init_pageio();
5538         if (err)
5539                 goto out5;
5540
5541         err = ext4_init_system_zone();
5542         if (err)
5543                 goto out4;
5544
5545         err = ext4_init_sysfs();
5546         if (err)
5547                 goto out3;
5548
5549         err = ext4_init_mballoc();
5550         if (err)
5551                 goto out2;
5552         err = init_inodecache();
5553         if (err)
5554                 goto out1;
5555         register_as_ext3();
5556         register_as_ext2();
5557         err = register_filesystem(&ext4_fs_type);
5558         if (err)
5559                 goto out;
5560
5561         return 0;
5562 out:
5563         unregister_as_ext2();
5564         unregister_as_ext3();
5565         destroy_inodecache();
5566 out1:
5567         ext4_exit_mballoc();
5568 out2:
5569         ext4_exit_sysfs();
5570 out3:
5571         ext4_exit_system_zone();
5572 out4:
5573         ext4_exit_pageio();
5574 out5:
5575         ext4_exit_es();
5576
5577         return err;
5578 }
5579
5580 static void __exit ext4_exit_fs(void)
5581 {
5582         ext4_destroy_lazyinit_thread();
5583         unregister_as_ext2();
5584         unregister_as_ext3();
5585         unregister_filesystem(&ext4_fs_type);
5586         destroy_inodecache();
5587         ext4_exit_mballoc();
5588         ext4_exit_sysfs();
5589         ext4_exit_system_zone();
5590         ext4_exit_pageio();
5591         ext4_exit_es();
5592 }
5593
5594 MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
5595 MODULE_DESCRIPTION("Fourth Extended Filesystem");
5596 MODULE_LICENSE("GPL");
5597 module_init(ext4_init_fs)
5598 module_exit(ext4_exit_fs)