exportfs: BUG_ON in crazy corner case
[deliverable/linux.git] / fs / exportfs / expfs.c
1 /*
2 * Copyright (C) Neil Brown 2002
3 * Copyright (C) Christoph Hellwig 2007
4 *
5 * This file contains the code mapping from inodes to NFS file handles,
6 * and for mapping back from file handles to dentries.
7 *
8 * For details on why we do all the strange and hairy things in here
9 * take a look at Documentation/filesystems/nfs/Exporting.
10 */
11 #include <linux/exportfs.h>
12 #include <linux/fs.h>
13 #include <linux/file.h>
14 #include <linux/module.h>
15 #include <linux/mount.h>
16 #include <linux/namei.h>
17 #include <linux/sched.h>
18
19 #define dprintk(fmt, args...) do{}while(0)
20
21
22 static int get_name(const struct path *path, char *name, struct dentry *child);
23
24
25 static int exportfs_get_name(struct vfsmount *mnt, struct dentry *dir,
26 char *name, struct dentry *child)
27 {
28 const struct export_operations *nop = dir->d_sb->s_export_op;
29 struct path path = {.mnt = mnt, .dentry = dir};
30
31 if (nop->get_name)
32 return nop->get_name(dir, name, child);
33 else
34 return get_name(&path, name, child);
35 }
36
37 /*
38 * Check if the dentry or any of it's aliases is acceptable.
39 */
40 static struct dentry *
41 find_acceptable_alias(struct dentry *result,
42 int (*acceptable)(void *context, struct dentry *dentry),
43 void *context)
44 {
45 struct dentry *dentry, *toput = NULL;
46 struct inode *inode;
47
48 if (acceptable(context, result))
49 return result;
50
51 inode = result->d_inode;
52 spin_lock(&inode->i_lock);
53 hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
54 dget(dentry);
55 spin_unlock(&inode->i_lock);
56 if (toput)
57 dput(toput);
58 if (dentry != result && acceptable(context, dentry)) {
59 dput(result);
60 return dentry;
61 }
62 spin_lock(&inode->i_lock);
63 toput = dentry;
64 }
65 spin_unlock(&inode->i_lock);
66
67 if (toput)
68 dput(toput);
69 return NULL;
70 }
71
72 /*
73 * Find root of a disconnected subtree and return a reference to it.
74 */
75 static struct dentry *
76 find_disconnected_root(struct dentry *dentry)
77 {
78 dget(dentry);
79 while (!IS_ROOT(dentry)) {
80 struct dentry *parent = dget_parent(dentry);
81
82 if (!(parent->d_flags & DCACHE_DISCONNECTED)) {
83 dput(parent);
84 break;
85 }
86
87 dput(dentry);
88 dentry = parent;
89 }
90 return dentry;
91 }
92
93 /*
94 * Make sure target_dir is fully connected to the dentry tree.
95 *
96 * It may already be, as the flag isn't always updated when connection happens.
97 */
98 static int
99 reconnect_path(struct vfsmount *mnt, struct dentry *target_dir, char *nbuf)
100 {
101 int noprogress = 0;
102 int err = -ESTALE;
103
104 /*
105 * It is possible that a confused file system might not let us complete
106 * the path to the root. For example, if get_parent returns a directory
107 * in which we cannot find a name for the child. While this implies a
108 * very sick filesystem we don't want it to cause knfsd to spin. Hence
109 * the noprogress counter. If we go through the loop 10 times (2 is
110 * probably enough) without getting anywhere, we just give up
111 */
112 while (target_dir->d_flags & DCACHE_DISCONNECTED && noprogress++ < 10) {
113 struct dentry *pd = find_disconnected_root(target_dir);
114
115 BUG_ON(pd == mnt->mnt_sb->s_root);
116
117 if (!IS_ROOT(pd)) {
118 /* must have found a connected parent - great */
119 spin_lock(&pd->d_lock);
120 pd->d_flags &= ~DCACHE_DISCONNECTED;
121 spin_unlock(&pd->d_lock);
122 noprogress = 0;
123 } else {
124 /*
125 * We have hit the top of a disconnected path, try to
126 * find parent and connect.
127 *
128 * Racing with some other process renaming a directory
129 * isn't much of a problem here. If someone renames
130 * the directory, it will end up properly connected,
131 * which is what we want
132 *
133 * Getting the parent can't be supported generically,
134 * the locking is too icky.
135 *
136 * Instead we just return EACCES. If server reboots
137 * or inodes get flushed, you lose
138 */
139 struct dentry *ppd = ERR_PTR(-EACCES);
140 struct dentry *npd;
141
142 mutex_lock(&pd->d_inode->i_mutex);
143 if (mnt->mnt_sb->s_export_op->get_parent)
144 ppd = mnt->mnt_sb->s_export_op->get_parent(pd);
145 mutex_unlock(&pd->d_inode->i_mutex);
146
147 if (IS_ERR(ppd)) {
148 err = PTR_ERR(ppd);
149 dprintk("%s: get_parent of %ld failed, err %d\n",
150 __func__, pd->d_inode->i_ino, err);
151 dput(pd);
152 break;
153 }
154
155 dprintk("%s: find name of %lu in %lu\n", __func__,
156 pd->d_inode->i_ino, ppd->d_inode->i_ino);
157 err = exportfs_get_name(mnt, ppd, nbuf, pd);
158 if (err) {
159 dput(ppd);
160 dput(pd);
161 if (err == -ENOENT)
162 /* some race between get_parent and
163 * get_name? just try again
164 */
165 continue;
166 break;
167 }
168 dprintk("%s: found name: %s\n", __func__, nbuf);
169 mutex_lock(&ppd->d_inode->i_mutex);
170 npd = lookup_one_len(nbuf, ppd, strlen(nbuf));
171 mutex_unlock(&ppd->d_inode->i_mutex);
172 if (IS_ERR(npd)) {
173 err = PTR_ERR(npd);
174 dprintk("%s: lookup failed: %d\n",
175 __func__, err);
176 dput(ppd);
177 dput(pd);
178 break;
179 }
180 /* we didn't really want npd, we really wanted
181 * a side-effect of the lookup.
182 * hopefully, npd == pd, though it isn't really
183 * a problem if it isn't
184 */
185 if (npd == pd)
186 noprogress = 0;
187 else
188 printk("%s: npd != pd\n", __func__);
189 dput(npd);
190 dput(ppd);
191 if (IS_ROOT(pd)) {
192 /* something went wrong, we have to give up */
193 dput(pd);
194 break;
195 }
196 }
197 dput(pd);
198 }
199
200 if (target_dir->d_flags & DCACHE_DISCONNECTED) {
201 /* something went wrong - oh-well */
202 if (!err)
203 err = -ESTALE;
204 return err;
205 }
206
207 return 0;
208 }
209
210 struct getdents_callback {
211 struct dir_context ctx;
212 char *name; /* name that was found. It already points to a
213 buffer NAME_MAX+1 is size */
214 u64 ino; /* the inum we are looking for */
215 int found; /* inode matched? */
216 int sequence; /* sequence counter */
217 };
218
219 /*
220 * A rather strange filldir function to capture
221 * the name matching the specified inode number.
222 */
223 static int filldir_one(void * __buf, const char * name, int len,
224 loff_t pos, u64 ino, unsigned int d_type)
225 {
226 struct getdents_callback *buf = __buf;
227 int result = 0;
228
229 buf->sequence++;
230 if (buf->ino == ino && len <= NAME_MAX) {
231 memcpy(buf->name, name, len);
232 buf->name[len] = '\0';
233 buf->found = 1;
234 result = -1;
235 }
236 return result;
237 }
238
239 /**
240 * get_name - default export_operations->get_name function
241 * @dentry: the directory in which to find a name
242 * @name: a pointer to a %NAME_MAX+1 char buffer to store the name
243 * @child: the dentry for the child directory.
244 *
245 * calls readdir on the parent until it finds an entry with
246 * the same inode number as the child, and returns that.
247 */
248 static int get_name(const struct path *path, char *name, struct dentry *child)
249 {
250 const struct cred *cred = current_cred();
251 struct inode *dir = path->dentry->d_inode;
252 int error;
253 struct file *file;
254 struct kstat stat;
255 struct path child_path = {
256 .mnt = path->mnt,
257 .dentry = child,
258 };
259 struct getdents_callback buffer = {
260 .ctx.actor = filldir_one,
261 .name = name,
262 };
263
264 error = -ENOTDIR;
265 if (!dir || !S_ISDIR(dir->i_mode))
266 goto out;
267 error = -EINVAL;
268 if (!dir->i_fop)
269 goto out;
270 /*
271 * inode->i_ino is unsigned long, kstat->ino is u64, so the
272 * former would be insufficient on 32-bit hosts when the
273 * filesystem supports 64-bit inode numbers. So we need to
274 * actually call ->getattr, not just read i_ino:
275 */
276 error = vfs_getattr_nosec(&child_path, &stat);
277 if (error)
278 return error;
279 buffer.ino = stat.ino;
280 /*
281 * Open the directory ...
282 */
283 file = dentry_open(path, O_RDONLY, cred);
284 error = PTR_ERR(file);
285 if (IS_ERR(file))
286 goto out;
287
288 error = -EINVAL;
289 if (!file->f_op->iterate)
290 goto out_close;
291
292 buffer.sequence = 0;
293 while (1) {
294 int old_seq = buffer.sequence;
295
296 error = iterate_dir(file, &buffer.ctx);
297 if (buffer.found) {
298 error = 0;
299 break;
300 }
301
302 if (error < 0)
303 break;
304
305 error = -ENOENT;
306 if (old_seq == buffer.sequence)
307 break;
308 }
309
310 out_close:
311 fput(file);
312 out:
313 return error;
314 }
315
316 /**
317 * export_encode_fh - default export_operations->encode_fh function
318 * @inode: the object to encode
319 * @fh: where to store the file handle fragment
320 * @max_len: maximum length to store there
321 * @parent: parent directory inode, if wanted
322 *
323 * This default encode_fh function assumes that the 32 inode number
324 * is suitable for locating an inode, and that the generation number
325 * can be used to check that it is still valid. It places them in the
326 * filehandle fragment where export_decode_fh expects to find them.
327 */
328 static int export_encode_fh(struct inode *inode, struct fid *fid,
329 int *max_len, struct inode *parent)
330 {
331 int len = *max_len;
332 int type = FILEID_INO32_GEN;
333
334 if (parent && (len < 4)) {
335 *max_len = 4;
336 return FILEID_INVALID;
337 } else if (len < 2) {
338 *max_len = 2;
339 return FILEID_INVALID;
340 }
341
342 len = 2;
343 fid->i32.ino = inode->i_ino;
344 fid->i32.gen = inode->i_generation;
345 if (parent) {
346 fid->i32.parent_ino = parent->i_ino;
347 fid->i32.parent_gen = parent->i_generation;
348 len = 4;
349 type = FILEID_INO32_GEN_PARENT;
350 }
351 *max_len = len;
352 return type;
353 }
354
355 int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid,
356 int *max_len, struct inode *parent)
357 {
358 const struct export_operations *nop = inode->i_sb->s_export_op;
359
360 if (nop && nop->encode_fh)
361 return nop->encode_fh(inode, fid->raw, max_len, parent);
362
363 return export_encode_fh(inode, fid, max_len, parent);
364 }
365 EXPORT_SYMBOL_GPL(exportfs_encode_inode_fh);
366
367 int exportfs_encode_fh(struct dentry *dentry, struct fid *fid, int *max_len,
368 int connectable)
369 {
370 int error;
371 struct dentry *p = NULL;
372 struct inode *inode = dentry->d_inode, *parent = NULL;
373
374 if (connectable && !S_ISDIR(inode->i_mode)) {
375 p = dget_parent(dentry);
376 /*
377 * note that while p might've ceased to be our parent already,
378 * it's still pinned by and still positive.
379 */
380 parent = p->d_inode;
381 }
382
383 error = exportfs_encode_inode_fh(inode, fid, max_len, parent);
384 dput(p);
385
386 return error;
387 }
388 EXPORT_SYMBOL_GPL(exportfs_encode_fh);
389
390 struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
391 int fh_len, int fileid_type,
392 int (*acceptable)(void *, struct dentry *), void *context)
393 {
394 const struct export_operations *nop = mnt->mnt_sb->s_export_op;
395 struct dentry *result, *alias;
396 char nbuf[NAME_MAX+1];
397 int err;
398
399 /*
400 * Try to get any dentry for the given file handle from the filesystem.
401 */
402 if (!nop || !nop->fh_to_dentry)
403 return ERR_PTR(-ESTALE);
404 result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type);
405 if (!result)
406 result = ERR_PTR(-ESTALE);
407 if (IS_ERR(result))
408 return result;
409
410 if (S_ISDIR(result->d_inode->i_mode)) {
411 /*
412 * This request is for a directory.
413 *
414 * On the positive side there is only one dentry for each
415 * directory inode. On the negative side this implies that we
416 * to ensure our dentry is connected all the way up to the
417 * filesystem root.
418 */
419 if (result->d_flags & DCACHE_DISCONNECTED) {
420 err = reconnect_path(mnt, result, nbuf);
421 if (err)
422 goto err_result;
423 }
424
425 if (!acceptable(context, result)) {
426 err = -EACCES;
427 goto err_result;
428 }
429
430 return result;
431 } else {
432 /*
433 * It's not a directory. Life is a little more complicated.
434 */
435 struct dentry *target_dir, *nresult;
436
437 /*
438 * See if either the dentry we just got from the filesystem
439 * or any alias for it is acceptable. This is always true
440 * if this filesystem is exported without the subtreecheck
441 * option. If the filesystem is exported with the subtree
442 * check option there's a fair chance we need to look at
443 * the parent directory in the file handle and make sure
444 * it's connected to the filesystem root.
445 */
446 alias = find_acceptable_alias(result, acceptable, context);
447 if (alias)
448 return alias;
449
450 /*
451 * Try to extract a dentry for the parent directory from the
452 * file handle. If this fails we'll have to give up.
453 */
454 err = -ESTALE;
455 if (!nop->fh_to_parent)
456 goto err_result;
457
458 target_dir = nop->fh_to_parent(mnt->mnt_sb, fid,
459 fh_len, fileid_type);
460 if (!target_dir)
461 goto err_result;
462 err = PTR_ERR(target_dir);
463 if (IS_ERR(target_dir))
464 goto err_result;
465
466 /*
467 * And as usual we need to make sure the parent directory is
468 * connected to the filesystem root. The VFS really doesn't
469 * like disconnected directories..
470 */
471 err = reconnect_path(mnt, target_dir, nbuf);
472 if (err) {
473 dput(target_dir);
474 goto err_result;
475 }
476
477 /*
478 * Now that we've got both a well-connected parent and a
479 * dentry for the inode we're after, make sure that our
480 * inode is actually connected to the parent.
481 */
482 err = exportfs_get_name(mnt, target_dir, nbuf, result);
483 if (!err) {
484 mutex_lock(&target_dir->d_inode->i_mutex);
485 nresult = lookup_one_len(nbuf, target_dir,
486 strlen(nbuf));
487 mutex_unlock(&target_dir->d_inode->i_mutex);
488 if (!IS_ERR(nresult)) {
489 if (nresult->d_inode) {
490 dput(result);
491 result = nresult;
492 } else
493 dput(nresult);
494 }
495 }
496
497 /*
498 * At this point we are done with the parent, but it's pinned
499 * by the child dentry anyway.
500 */
501 dput(target_dir);
502
503 /*
504 * And finally make sure the dentry is actually acceptable
505 * to NFSD.
506 */
507 alias = find_acceptable_alias(result, acceptable, context);
508 if (!alias) {
509 err = -EACCES;
510 goto err_result;
511 }
512
513 return alias;
514 }
515
516 err_result:
517 dput(result);
518 return ERR_PTR(err);
519 }
520 EXPORT_SYMBOL_GPL(exportfs_decode_fh);
521
522 MODULE_LICENSE("GPL");
This page took 0.042481 seconds and 5 git commands to generate.