fs/ext4/mmp.c

   1 #include <linux/fs.h>
   2 #include <linux/random.h>
   3 #include <linux/buffer_head.h>
   4 #include <linux/utsname.h>
   5 #include <linux/kthread.h>
   6
   7 #include "ext4.h"
   8
   9 /* Checksumming functions */
  10 static __le32 ext4_mmp_csum(struct super_block *sb, struct mmp_struct *mmp)
  11 {
  12         struct ext4_sb_info *sbi = EXT4_SB(sb);
  13         int offset = offsetof(struct mmp_struct, mmp_checksum);
  14         __u32 csum;
  15
  16         csum = ext4_chksum(sbi, sbi->s_csum_seed, (char *)mmp, offset);
  17
  18         return cpu_to_le32(csum);
  19 }
  20
  21 static int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp)
  22 {
  23         if (!ext4_has_metadata_csum(sb))
  24                 return 1;
  25
  26         return mmp->mmp_checksum == ext4_mmp_csum(sb, mmp);
  27 }
  28
  29 static void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp)
  30 {
  31         if (!ext4_has_metadata_csum(sb))
  32                 return;
  33
  34         mmp->mmp_checksum = ext4_mmp_csum(sb, mmp);
  35 }
  36
  37 /*
  38  * Write the MMP block using WRITE_SYNC to try to get the block on-disk
  39  * faster.
  40  */
  41 static int write_mmp_block(struct super_block *sb, struct buffer_head *bh)
  42 {
  43         struct mmp_struct *mmp = (struct mmp_struct *)(bh->b_data);
  44
  45         /*
  46          * We protect against freezing so that we don't create dirty buffers
  47          * on frozen filesystem.
  48          */
  49         sb_start_write(sb);
  50         ext4_mmp_csum_set(sb, mmp);
  51         mark_buffer_dirty(bh);
  52         lock_buffer(bh);
  53         bh->b_end_io = end_buffer_write_sync;
  54         get_bh(bh);
  55         submit_bh(WRITE_SYNC | REQ_META | REQ_PRIO, bh);
  56         wait_on_buffer(bh);
  57         sb_end_write(sb);
  58         if (unlikely(!buffer_uptodate(bh)))
  59                 return 1;
  60
  61         return 0;
  62 }
  63
  64 /*
  65  * Read the MMP block. It _must_ be read from disk and hence we clear the
  66  * uptodate flag on the buffer.
  67  */
  68 static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
  69                           ext4_fsblk_t mmp_block)
  70 {
  71         struct mmp_struct *mmp;
  72         int ret;
  73
  74         if (*bh)
  75                 clear_buffer_uptodate(*bh);
  76
  77         /* This would be sb_bread(sb, mmp_block), except we need to be sure
  78          * that the MD RAID device cache has been bypassed, and that the read
  79          * is not blocked in the elevator. */
  80         if (!*bh) {
  81                 *bh = sb_getblk(sb, mmp_block);
  82                 if (!*bh) {
  83                         ret = -ENOMEM;
  84                         goto warn_exit;
  85                 }
  86         }
  87
  88         get_bh(*bh);
  89         lock_buffer(*bh);
  90         (*bh)->b_end_io = end_buffer_read_sync;
  91         submit_bh(READ_SYNC | REQ_META | REQ_PRIO, *bh);
  92         wait_on_buffer(*bh);
  93         if (!buffer_uptodate(*bh)) {
  94                 brelse(*bh);
  95                 *bh = NULL;
  96                 ret = -EIO;
  97                 goto warn_exit;
  98         }
  99
 100         mmp = (struct mmp_struct *)((*bh)->b_data);
 101         if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC)
 102                 ret = -EFSCORRUPTED;
 103         else if (!ext4_mmp_csum_verify(sb, mmp))
 104                 ret = -EFSBADCRC;
 105         else
 106                 return 0;
 107
 108 warn_exit:
 109         ext4_warning(sb, "Error %d while reading MMP block %llu",
 110                      ret, mmp_block);
 111         return ret;
 112 }
 113
 114 /*
 115  * Dump as much information as possible to help the admin.
 116  */
 117 void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp,
 118                     const char *function, unsigned int line, const char *msg)
 119 {
 120         __ext4_warning(sb, function, line, "%s", msg);
 121         __ext4_warning(sb, function, line,
 122                        "MMP failure info: last update time: %llu, last update "
 123                        "node: %s, last update device: %s\n",
 124                        (long long unsigned int) le64_to_cpu(mmp->mmp_time),
 125                        mmp->mmp_nodename, mmp->mmp_bdevname);
 126 }
 127
 128 /*
 129  * kmmpd will update the MMP sequence every s_mmp_update_interval seconds
 130  */
 131 static int kmmpd(void *data)
 132 {
 133         struct super_block *sb = ((struct mmpd_data *) data)->sb;
 134         struct buffer_head *bh = ((struct mmpd_data *) data)->bh;
 135         struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 136         struct mmp_struct *mmp;
 137         ext4_fsblk_t mmp_block;
 138         u32 seq = 0;
 139         unsigned long failed_writes = 0;
 140         int mmp_update_interval = le16_to_cpu(es->s_mmp_update_interval);
 141         unsigned mmp_check_interval;
 142         unsigned long last_update_time;
 143         unsigned long diff;
 144         int retval;
 145
 146         mmp_block = le64_to_cpu(es->s_mmp_block);
 147         mmp = (struct mmp_struct *)(bh->b_data);
 148         mmp->mmp_time = cpu_to_le64(get_seconds());
 149         /*
 150          * Start with the higher mmp_check_interval and reduce it if
 151          * the MMP block is being updated on time.
 152          */
 153         mmp_check_interval = max(EXT4_MMP_CHECK_MULT * mmp_update_interval,
 154                                  EXT4_MMP_MIN_CHECK_INTERVAL);
 155         mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
 156         bdevname(bh->b_bdev, mmp->mmp_bdevname);
 157
 158         memcpy(mmp->mmp_nodename, init_utsname()->nodename,
 159                sizeof(mmp->mmp_nodename));
 160
 161         while (!kthread_should_stop()) {
 162                 if (++seq > EXT4_MMP_SEQ_MAX)
 163                         seq = 1;
 164
 165                 mmp->mmp_seq = cpu_to_le32(seq);
 166                 mmp->mmp_time = cpu_to_le64(get_seconds());
 167                 last_update_time = jiffies;
 168
 169                 retval = write_mmp_block(sb, bh);
 170                 /*
 171                  * Don't spew too many error messages. Print one every
 172                  * (s_mmp_update_interval * 60) seconds.
 173                  */
 174                 if (retval) {
 175                         if ((failed_writes % 60) == 0)
 176                                 ext4_error(sb, "Error writing to MMP block");
 177                         failed_writes++;
 178                 }
 179
 180                 if (!(le32_to_cpu(es->s_feature_incompat) &
 181                     EXT4_FEATURE_INCOMPAT_MMP)) {
 182                         ext4_warning(sb, "kmmpd being stopped since MMP feature"
 183                                      " has been disabled.");
 184                         EXT4_SB(sb)->s_mmp_tsk = NULL;
 185                         goto failed;
 186                 }
 187
 188                 if (sb->s_flags & MS_RDONLY) {
 189                         ext4_warning(sb, "kmmpd being stopped since filesystem "
 190                                      "has been remounted as readonly.");
 191                         EXT4_SB(sb)->s_mmp_tsk = NULL;
 192                         goto failed;
 193                 }
 194
 195                 diff = jiffies - last_update_time;
 196                 if (diff < mmp_update_interval * HZ)
 197                         schedule_timeout_interruptible(mmp_update_interval *
 198                                                        HZ - diff);
 199
 200                 /*
 201                  * We need to make sure that more than mmp_check_interval
 202                  * seconds have not passed since writing. If that has happened
 203                  * we need to check if the MMP block is as we left it.
 204                  */
 205                 diff = jiffies - last_update_time;
 206                 if (diff > mmp_check_interval * HZ) {
 207                         struct buffer_head *bh_check = NULL;
 208                         struct mmp_struct *mmp_check;
 209
 210                         retval = read_mmp_block(sb, &bh_check, mmp_block);
 211                         if (retval) {
 212                                 ext4_error(sb, "error reading MMP data: %d",
 213                                            retval);
 214
 215                                 EXT4_SB(sb)->s_mmp_tsk = NULL;
 216                                 goto failed;
 217                         }
 218
 219                         mmp_check = (struct mmp_struct *)(bh_check->b_data);
 220                         if (mmp->mmp_seq != mmp_check->mmp_seq ||
 221                             memcmp(mmp->mmp_nodename, mmp_check->mmp_nodename,
 222                                    sizeof(mmp->mmp_nodename))) {
 223                                 dump_mmp_msg(sb, mmp_check,
 224                                              "Error while updating MMP info. "
 225                                              "The filesystem seems to have been"
 226                                              " multiply mounted.");
 227                                 ext4_error(sb, "abort");
 228                                 goto failed;
 229                         }
 230                         put_bh(bh_check);
 231                 }
 232
 233                  /*
 234                  * Adjust the mmp_check_interval depending on how much time
 235                  * it took for the MMP block to be written.
 236                  */
 237                 mmp_check_interval = max(min(EXT4_MMP_CHECK_MULT * diff / HZ,
 238                                              EXT4_MMP_MAX_CHECK_INTERVAL),
 239                                          EXT4_MMP_MIN_CHECK_INTERVAL);
 240                 mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
 241         }
 242
 243         /*
 244          * Unmount seems to be clean.
 245          */
 246         mmp->mmp_seq = cpu_to_le32(EXT4_MMP_SEQ_CLEAN);
 247         mmp->mmp_time = cpu_to_le64(get_seconds());
 248
 249         retval = write_mmp_block(sb, bh);
 250
 251 failed:
 252         kfree(data);
 253         brelse(bh);
 254         return retval;
 255 }
 256
 257 /*
 258  * Get a random new sequence number but make sure it is not greater than
 259  * EXT4_MMP_SEQ_MAX.
 260  */
 261 static unsigned int mmp_new_seq(void)
 262 {
 263         u32 new_seq;
 264
 265         do {
 266                 new_seq = prandom_u32();
 267         } while (new_seq > EXT4_MMP_SEQ_MAX);
 268
 269         return new_seq;
 270 }
 271
 272 /*
 273  * Protect the filesystem from being mounted more than once.
 274  */
 275 int ext4_multi_mount_protect(struct super_block *sb,
 276                                     ext4_fsblk_t mmp_block)
 277 {
 278         struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 279         struct buffer_head *bh = NULL;
 280         struct mmp_struct *mmp = NULL;
 281         struct mmpd_data *mmpd_data;
 282         u32 seq;
 283         unsigned int mmp_check_interval = le16_to_cpu(es->s_mmp_update_interval);
 284         unsigned int wait_time = 0;
 285         int retval;
 286
 287         if (mmp_block < le32_to_cpu(es->s_first_data_block) ||
 288             mmp_block >= ext4_blocks_count(es)) {
 289                 ext4_warning(sb, "Invalid MMP block in superblock");
 290                 goto failed;
 291         }
 292
 293         retval = read_mmp_block(sb, &bh, mmp_block);
 294         if (retval)
 295                 goto failed;
 296
 297         mmp = (struct mmp_struct *)(bh->b_data);
 298
 299         if (mmp_check_interval < EXT4_MMP_MIN_CHECK_INTERVAL)
 300                 mmp_check_interval = EXT4_MMP_MIN_CHECK_INTERVAL;
 301
 302         /*
 303          * If check_interval in MMP block is larger, use that instead of
 304          * update_interval from the superblock.
 305          */
 306         if (le16_to_cpu(mmp->mmp_check_interval) > mmp_check_interval)
 307                 mmp_check_interval = le16_to_cpu(mmp->mmp_check_interval);
 308
 309         seq = le32_to_cpu(mmp->mmp_seq);
 310         if (seq == EXT4_MMP_SEQ_CLEAN)
 311                 goto skip;
 312
 313         if (seq == EXT4_MMP_SEQ_FSCK) {
 314                 dump_mmp_msg(sb, mmp, "fsck is running on the filesystem");
 315                 goto failed;
 316         }
 317
 318         wait_time = min(mmp_check_interval * 2 + 1,
 319                         mmp_check_interval + 60);
 320
 321         /* Print MMP interval if more than 20 secs. */
 322         if (wait_time > EXT4_MMP_MIN_CHECK_INTERVAL * 4)
 323                 ext4_warning(sb, "MMP interval %u higher than expected, please"
 324                              " wait.\n", wait_time * 2);
 325
 326         if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
 327                 ext4_warning(sb, "MMP startup interrupted, failing mount\n");
 328                 goto failed;
 329         }
 330
 331         retval = read_mmp_block(sb, &bh, mmp_block);
 332         if (retval)
 333                 goto failed;
 334         mmp = (struct mmp_struct *)(bh->b_data);
 335         if (seq != le32_to_cpu(mmp->mmp_seq)) {
 336                 dump_mmp_msg(sb, mmp,
 337                              "Device is already active on another node.");
 338                 goto failed;
 339         }
 340
 341 skip:
 342         /*
 343          * write a new random sequence number.
 344          */
 345         seq = mmp_new_seq();
 346         mmp->mmp_seq = cpu_to_le32(seq);
 347
 348         retval = write_mmp_block(sb, bh);
 349         if (retval)
 350                 goto failed;
 351
 352         /*
 353          * wait for MMP interval and check mmp_seq.
 354          */
 355         if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
 356                 ext4_warning(sb, "MMP startup interrupted, failing mount\n");
 357                 goto failed;
 358         }
 359
 360         retval = read_mmp_block(sb, &bh, mmp_block);
 361         if (retval)
 362                 goto failed;
 363         mmp = (struct mmp_struct *)(bh->b_data);
 364         if (seq != le32_to_cpu(mmp->mmp_seq)) {
 365                 dump_mmp_msg(sb, mmp,
 366                              "Device is already active on another node.");
 367                 goto failed;
 368         }
 369
 370         mmpd_data = kmalloc(sizeof(struct mmpd_data), GFP_KERNEL);
 371         if (!mmpd_data) {
 372                 ext4_warning(sb, "not enough memory for mmpd_data");
 373                 goto failed;
 374         }
 375         mmpd_data->sb = sb;
 376         mmpd_data->bh = bh;
 377
 378         /*
 379          * Start a kernel thread to update the MMP block periodically.
 380          */
 381         EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, mmpd_data, "kmmpd-%s",
 382                                              bdevname(bh->b_bdev,
 383                                                       mmp->mmp_bdevname));
 384         if (IS_ERR(EXT4_SB(sb)->s_mmp_tsk)) {
 385                 EXT4_SB(sb)->s_mmp_tsk = NULL;
 386                 kfree(mmpd_data);
 387                 ext4_warning(sb, "Unable to create kmmpd thread for %s.",
 388                              sb->s_id);
 389                 goto failed;
 390         }
 391
 392         return 0;
 393
 394 failed:
 395         brelse(bh);
 396         return 1;
 397 }
 398
 399