Commit | Line | Data |
---|---|---|
4282d606 SRRH |
1 | /* |
2 | * inode.c - part of tracefs, a pseudo file system for activating tracing | |
3 | * | |
4 | * Based on debugfs by: Greg Kroah-Hartman <greg@kroah.com> | |
5 | * | |
6 | * Copyright (C) 2014 Red Hat Inc, author: Steven Rostedt <srostedt@redhat.com> | |
7 | * | |
8 | * This program is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU General Public License version | |
10 | * 2 as published by the Free Software Foundation. | |
11 | * | |
12 | * tracefs is the file system that is used by the tracing infrastructure. | |
13 | * | |
14 | */ | |
15 | ||
16 | #include <linux/module.h> | |
17 | #include <linux/fs.h> | |
18 | #include <linux/mount.h> | |
cc31004a | 19 | #include <linux/kobject.h> |
4282d606 SRRH |
20 | #include <linux/namei.h> |
21 | #include <linux/tracefs.h> | |
22 | #include <linux/fsnotify.h> | |
23 | #include <linux/seq_file.h> | |
24 | #include <linux/parser.h> | |
25 | #include <linux/magic.h> | |
26 | #include <linux/slab.h> | |
27 | ||
28 | #define TRACEFS_DEFAULT_MODE 0700 | |
29 | ||
30 | static struct vfsmount *tracefs_mount; | |
31 | static int tracefs_mount_count; | |
32 | static bool tracefs_registered; | |
33 | ||
34 | static ssize_t default_read_file(struct file *file, char __user *buf, | |
35 | size_t count, loff_t *ppos) | |
36 | { | |
37 | return 0; | |
38 | } | |
39 | ||
40 | static ssize_t default_write_file(struct file *file, const char __user *buf, | |
41 | size_t count, loff_t *ppos) | |
42 | { | |
43 | return count; | |
44 | } | |
45 | ||
46 | static const struct file_operations tracefs_file_operations = { | |
47 | .read = default_read_file, | |
48 | .write = default_write_file, | |
49 | .open = simple_open, | |
50 | .llseek = noop_llseek, | |
51 | }; | |
52 | ||
53 | static struct inode *tracefs_get_inode(struct super_block *sb) | |
54 | { | |
55 | struct inode *inode = new_inode(sb); | |
56 | if (inode) { | |
57 | inode->i_ino = get_next_ino(); | |
58 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; | |
59 | } | |
60 | return inode; | |
61 | } | |
62 | ||
63 | struct tracefs_mount_opts { | |
64 | kuid_t uid; | |
65 | kgid_t gid; | |
66 | umode_t mode; | |
67 | }; | |
68 | ||
69 | enum { | |
70 | Opt_uid, | |
71 | Opt_gid, | |
72 | Opt_mode, | |
73 | Opt_err | |
74 | }; | |
75 | ||
76 | static const match_table_t tokens = { | |
77 | {Opt_uid, "uid=%u"}, | |
78 | {Opt_gid, "gid=%u"}, | |
79 | {Opt_mode, "mode=%o"}, | |
80 | {Opt_err, NULL} | |
81 | }; | |
82 | ||
83 | struct tracefs_fs_info { | |
84 | struct tracefs_mount_opts mount_opts; | |
85 | }; | |
86 | ||
87 | static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts) | |
88 | { | |
89 | substring_t args[MAX_OPT_ARGS]; | |
90 | int option; | |
91 | int token; | |
92 | kuid_t uid; | |
93 | kgid_t gid; | |
94 | char *p; | |
95 | ||
96 | opts->mode = TRACEFS_DEFAULT_MODE; | |
97 | ||
98 | while ((p = strsep(&data, ",")) != NULL) { | |
99 | if (!*p) | |
100 | continue; | |
101 | ||
102 | token = match_token(p, tokens, args); | |
103 | switch (token) { | |
104 | case Opt_uid: | |
105 | if (match_int(&args[0], &option)) | |
106 | return -EINVAL; | |
107 | uid = make_kuid(current_user_ns(), option); | |
108 | if (!uid_valid(uid)) | |
109 | return -EINVAL; | |
110 | opts->uid = uid; | |
111 | break; | |
112 | case Opt_gid: | |
113 | if (match_int(&args[0], &option)) | |
114 | return -EINVAL; | |
115 | gid = make_kgid(current_user_ns(), option); | |
116 | if (!gid_valid(gid)) | |
117 | return -EINVAL; | |
118 | opts->gid = gid; | |
119 | break; | |
120 | case Opt_mode: | |
121 | if (match_octal(&args[0], &option)) | |
122 | return -EINVAL; | |
123 | opts->mode = option & S_IALLUGO; | |
124 | break; | |
125 | /* | |
126 | * We might like to report bad mount options here; | |
127 | * but traditionally tracefs has ignored all mount options | |
128 | */ | |
129 | } | |
130 | } | |
131 | ||
132 | return 0; | |
133 | } | |
134 | ||
135 | static int tracefs_apply_options(struct super_block *sb) | |
136 | { | |
137 | struct tracefs_fs_info *fsi = sb->s_fs_info; | |
138 | struct inode *inode = sb->s_root->d_inode; | |
139 | struct tracefs_mount_opts *opts = &fsi->mount_opts; | |
140 | ||
141 | inode->i_mode &= ~S_IALLUGO; | |
142 | inode->i_mode |= opts->mode; | |
143 | ||
144 | inode->i_uid = opts->uid; | |
145 | inode->i_gid = opts->gid; | |
146 | ||
147 | return 0; | |
148 | } | |
149 | ||
150 | static int tracefs_remount(struct super_block *sb, int *flags, char *data) | |
151 | { | |
152 | int err; | |
153 | struct tracefs_fs_info *fsi = sb->s_fs_info; | |
154 | ||
155 | sync_filesystem(sb); | |
156 | err = tracefs_parse_options(data, &fsi->mount_opts); | |
157 | if (err) | |
158 | goto fail; | |
159 | ||
160 | tracefs_apply_options(sb); | |
161 | ||
162 | fail: | |
163 | return err; | |
164 | } | |
165 | ||
166 | static int tracefs_show_options(struct seq_file *m, struct dentry *root) | |
167 | { | |
168 | struct tracefs_fs_info *fsi = root->d_sb->s_fs_info; | |
169 | struct tracefs_mount_opts *opts = &fsi->mount_opts; | |
170 | ||
171 | if (!uid_eq(opts->uid, GLOBAL_ROOT_UID)) | |
172 | seq_printf(m, ",uid=%u", | |
173 | from_kuid_munged(&init_user_ns, opts->uid)); | |
174 | if (!gid_eq(opts->gid, GLOBAL_ROOT_GID)) | |
175 | seq_printf(m, ",gid=%u", | |
176 | from_kgid_munged(&init_user_ns, opts->gid)); | |
177 | if (opts->mode != TRACEFS_DEFAULT_MODE) | |
178 | seq_printf(m, ",mode=%o", opts->mode); | |
179 | ||
180 | return 0; | |
181 | } | |
182 | ||
183 | static const struct super_operations tracefs_super_operations = { | |
184 | .statfs = simple_statfs, | |
185 | .remount_fs = tracefs_remount, | |
186 | .show_options = tracefs_show_options, | |
187 | }; | |
188 | ||
189 | static int trace_fill_super(struct super_block *sb, void *data, int silent) | |
190 | { | |
191 | static struct tree_descr trace_files[] = {{""}}; | |
192 | struct tracefs_fs_info *fsi; | |
193 | int err; | |
194 | ||
195 | save_mount_options(sb, data); | |
196 | ||
197 | fsi = kzalloc(sizeof(struct tracefs_fs_info), GFP_KERNEL); | |
198 | sb->s_fs_info = fsi; | |
199 | if (!fsi) { | |
200 | err = -ENOMEM; | |
201 | goto fail; | |
202 | } | |
203 | ||
204 | err = tracefs_parse_options(data, &fsi->mount_opts); | |
205 | if (err) | |
206 | goto fail; | |
207 | ||
208 | err = simple_fill_super(sb, TRACEFS_MAGIC, trace_files); | |
209 | if (err) | |
210 | goto fail; | |
211 | ||
212 | sb->s_op = &tracefs_super_operations; | |
213 | ||
214 | tracefs_apply_options(sb); | |
215 | ||
216 | return 0; | |
217 | ||
218 | fail: | |
219 | kfree(fsi); | |
220 | sb->s_fs_info = NULL; | |
221 | return err; | |
222 | } | |
223 | ||
224 | static struct dentry *trace_mount(struct file_system_type *fs_type, | |
225 | int flags, const char *dev_name, | |
226 | void *data) | |
227 | { | |
228 | return mount_single(fs_type, flags, data, trace_fill_super); | |
229 | } | |
230 | ||
231 | static struct file_system_type trace_fs_type = { | |
232 | .owner = THIS_MODULE, | |
233 | .name = "tracefs", | |
234 | .mount = trace_mount, | |
235 | .kill_sb = kill_litter_super, | |
236 | }; | |
237 | MODULE_ALIAS_FS("tracefs"); | |
238 | ||
239 | static struct dentry *start_creating(const char *name, struct dentry *parent) | |
240 | { | |
241 | struct dentry *dentry; | |
242 | int error; | |
243 | ||
244 | pr_debug("tracefs: creating file '%s'\n",name); | |
245 | ||
246 | error = simple_pin_fs(&trace_fs_type, &tracefs_mount, | |
247 | &tracefs_mount_count); | |
248 | if (error) | |
249 | return ERR_PTR(error); | |
250 | ||
251 | /* If the parent is not specified, we create it in the root. | |
252 | * We need the root dentry to do this, which is in the super | |
253 | * block. A pointer to that is in the struct vfsmount that we | |
254 | * have around. | |
255 | */ | |
256 | if (!parent) | |
257 | parent = tracefs_mount->mnt_root; | |
258 | ||
259 | mutex_lock(&parent->d_inode->i_mutex); | |
260 | dentry = lookup_one_len(name, parent, strlen(name)); | |
261 | if (!IS_ERR(dentry) && dentry->d_inode) { | |
262 | dput(dentry); | |
263 | dentry = ERR_PTR(-EEXIST); | |
264 | } | |
265 | if (IS_ERR(dentry)) | |
266 | mutex_unlock(&parent->d_inode->i_mutex); | |
267 | return dentry; | |
268 | } | |
269 | ||
270 | static struct dentry *failed_creating(struct dentry *dentry) | |
271 | { | |
272 | mutex_unlock(&dentry->d_parent->d_inode->i_mutex); | |
273 | dput(dentry); | |
274 | simple_release_fs(&tracefs_mount, &tracefs_mount_count); | |
275 | return NULL; | |
276 | } | |
277 | ||
278 | static struct dentry *end_creating(struct dentry *dentry) | |
279 | { | |
280 | mutex_unlock(&dentry->d_parent->d_inode->i_mutex); | |
281 | return dentry; | |
282 | } | |
283 | ||
284 | /** | |
285 | * tracefs_create_file - create a file in the tracefs filesystem | |
286 | * @name: a pointer to a string containing the name of the file to create. | |
287 | * @mode: the permission that the file should have. | |
288 | * @parent: a pointer to the parent dentry for this file. This should be a | |
289 | * directory dentry if set. If this parameter is NULL, then the | |
290 | * file will be created in the root of the tracefs filesystem. | |
291 | * @data: a pointer to something that the caller will want to get to later | |
292 | * on. The inode.i_private pointer will point to this value on | |
293 | * the open() call. | |
294 | * @fops: a pointer to a struct file_operations that should be used for | |
295 | * this file. | |
296 | * | |
297 | * This is the basic "create a file" function for tracefs. It allows for a | |
298 | * wide range of flexibility in creating a file, or a directory (if you want | |
299 | * to create a directory, the tracefs_create_dir() function is | |
300 | * recommended to be used instead.) | |
301 | * | |
302 | * This function will return a pointer to a dentry if it succeeds. This | |
303 | * pointer must be passed to the tracefs_remove() function when the file is | |
304 | * to be removed (no automatic cleanup happens if your module is unloaded, | |
305 | * you are responsible here.) If an error occurs, %NULL will be returned. | |
306 | * | |
307 | * If tracefs is not enabled in the kernel, the value -%ENODEV will be | |
308 | * returned. | |
309 | */ | |
310 | struct dentry *tracefs_create_file(const char *name, umode_t mode, | |
311 | struct dentry *parent, void *data, | |
312 | const struct file_operations *fops) | |
313 | { | |
314 | struct dentry *dentry; | |
315 | struct inode *inode; | |
316 | ||
317 | if (!(mode & S_IFMT)) | |
318 | mode |= S_IFREG; | |
319 | BUG_ON(!S_ISREG(mode)); | |
320 | dentry = start_creating(name, parent); | |
321 | ||
322 | if (IS_ERR(dentry)) | |
323 | return NULL; | |
324 | ||
325 | inode = tracefs_get_inode(dentry->d_sb); | |
326 | if (unlikely(!inode)) | |
327 | return failed_creating(dentry); | |
328 | ||
329 | inode->i_mode = mode; | |
330 | inode->i_fop = fops ? fops : &tracefs_file_operations; | |
331 | inode->i_private = data; | |
332 | d_instantiate(dentry, inode); | |
333 | fsnotify_create(dentry->d_parent->d_inode, dentry); | |
334 | return end_creating(dentry); | |
335 | } | |
336 | ||
337 | /** | |
338 | * tracefs_create_dir - create a directory in the tracefs filesystem | |
339 | * @name: a pointer to a string containing the name of the directory to | |
340 | * create. | |
341 | * @parent: a pointer to the parent dentry for this file. This should be a | |
342 | * directory dentry if set. If this parameter is NULL, then the | |
343 | * directory will be created in the root of the tracefs filesystem. | |
344 | * | |
345 | * This function creates a directory in tracefs with the given name. | |
346 | * | |
347 | * This function will return a pointer to a dentry if it succeeds. This | |
348 | * pointer must be passed to the tracefs_remove() function when the file is | |
349 | * to be removed. If an error occurs, %NULL will be returned. | |
350 | * | |
351 | * If tracing is not enabled in the kernel, the value -%ENODEV will be | |
352 | * returned. | |
353 | */ | |
354 | struct dentry *tracefs_create_dir(const char *name, struct dentry *parent) | |
355 | { | |
356 | struct dentry *dentry = start_creating(name, parent); | |
357 | struct inode *inode; | |
358 | ||
359 | if (IS_ERR(dentry)) | |
360 | return NULL; | |
361 | ||
362 | inode = tracefs_get_inode(dentry->d_sb); | |
363 | if (unlikely(!inode)) | |
364 | return failed_creating(dentry); | |
365 | ||
366 | inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; | |
367 | inode->i_op = &simple_dir_inode_operations; | |
368 | inode->i_fop = &simple_dir_operations; | |
369 | ||
370 | /* directory inodes start off with i_nlink == 2 (for "." entry) */ | |
371 | inc_nlink(inode); | |
372 | d_instantiate(dentry, inode); | |
373 | inc_nlink(dentry->d_parent->d_inode); | |
374 | fsnotify_mkdir(dentry->d_parent->d_inode, dentry); | |
375 | return end_creating(dentry); | |
376 | } | |
377 | ||
378 | static inline int tracefs_positive(struct dentry *dentry) | |
379 | { | |
380 | return dentry->d_inode && !d_unhashed(dentry); | |
381 | } | |
382 | ||
383 | static int __tracefs_remove(struct dentry *dentry, struct dentry *parent) | |
384 | { | |
385 | int ret = 0; | |
386 | ||
387 | if (tracefs_positive(dentry)) { | |
388 | if (dentry->d_inode) { | |
389 | dget(dentry); | |
390 | switch (dentry->d_inode->i_mode & S_IFMT) { | |
391 | case S_IFDIR: | |
392 | ret = simple_rmdir(parent->d_inode, dentry); | |
393 | break; | |
394 | default: | |
395 | simple_unlink(parent->d_inode, dentry); | |
396 | break; | |
397 | } | |
398 | if (!ret) | |
399 | d_delete(dentry); | |
400 | dput(dentry); | |
401 | } | |
402 | } | |
403 | return ret; | |
404 | } | |
405 | ||
406 | /** | |
407 | * tracefs_remove - removes a file or directory from the tracefs filesystem | |
408 | * @dentry: a pointer to a the dentry of the file or directory to be | |
409 | * removed. | |
410 | * | |
411 | * This function removes a file or directory in tracefs that was previously | |
412 | * created with a call to another tracefs function (like | |
413 | * tracefs_create_file() or variants thereof.) | |
414 | */ | |
415 | void tracefs_remove(struct dentry *dentry) | |
416 | { | |
417 | struct dentry *parent; | |
418 | int ret; | |
419 | ||
420 | if (IS_ERR_OR_NULL(dentry)) | |
421 | return; | |
422 | ||
423 | parent = dentry->d_parent; | |
424 | if (!parent || !parent->d_inode) | |
425 | return; | |
426 | ||
427 | mutex_lock(&parent->d_inode->i_mutex); | |
428 | ret = __tracefs_remove(dentry, parent); | |
429 | mutex_unlock(&parent->d_inode->i_mutex); | |
430 | if (!ret) | |
431 | simple_release_fs(&tracefs_mount, &tracefs_mount_count); | |
432 | } | |
433 | ||
434 | /** | |
435 | * tracefs_remove_recursive - recursively removes a directory | |
436 | * @dentry: a pointer to a the dentry of the directory to be removed. | |
437 | * | |
438 | * This function recursively removes a directory tree in tracefs that | |
439 | * was previously created with a call to another tracefs function | |
440 | * (like tracefs_create_file() or variants thereof.) | |
441 | */ | |
442 | void tracefs_remove_recursive(struct dentry *dentry) | |
443 | { | |
444 | struct dentry *child, *parent; | |
445 | ||
446 | if (IS_ERR_OR_NULL(dentry)) | |
447 | return; | |
448 | ||
449 | parent = dentry->d_parent; | |
450 | if (!parent || !parent->d_inode) | |
451 | return; | |
452 | ||
453 | parent = dentry; | |
454 | down: | |
455 | mutex_lock(&parent->d_inode->i_mutex); | |
456 | loop: | |
457 | /* | |
458 | * The parent->d_subdirs is protected by the d_lock. Outside that | |
459 | * lock, the child can be unlinked and set to be freed which can | |
460 | * use the d_u.d_child as the rcu head and corrupt this list. | |
461 | */ | |
462 | spin_lock(&parent->d_lock); | |
463 | list_for_each_entry(child, &parent->d_subdirs, d_child) { | |
464 | if (!tracefs_positive(child)) | |
465 | continue; | |
466 | ||
467 | /* perhaps simple_empty(child) makes more sense */ | |
468 | if (!list_empty(&child->d_subdirs)) { | |
469 | spin_unlock(&parent->d_lock); | |
470 | mutex_unlock(&parent->d_inode->i_mutex); | |
471 | parent = child; | |
472 | goto down; | |
473 | } | |
474 | ||
475 | spin_unlock(&parent->d_lock); | |
476 | ||
477 | if (!__tracefs_remove(child, parent)) | |
478 | simple_release_fs(&tracefs_mount, &tracefs_mount_count); | |
479 | ||
480 | /* | |
481 | * The parent->d_lock protects agaist child from unlinking | |
482 | * from d_subdirs. When releasing the parent->d_lock we can | |
483 | * no longer trust that the next pointer is valid. | |
484 | * Restart the loop. We'll skip this one with the | |
485 | * tracefs_positive() check. | |
486 | */ | |
487 | goto loop; | |
488 | } | |
489 | spin_unlock(&parent->d_lock); | |
490 | ||
491 | mutex_unlock(&parent->d_inode->i_mutex); | |
492 | child = parent; | |
493 | parent = parent->d_parent; | |
494 | mutex_lock(&parent->d_inode->i_mutex); | |
495 | ||
496 | if (child != dentry) | |
497 | /* go up */ | |
498 | goto loop; | |
499 | ||
500 | if (!__tracefs_remove(child, parent)) | |
501 | simple_release_fs(&tracefs_mount, &tracefs_mount_count); | |
502 | mutex_unlock(&parent->d_inode->i_mutex); | |
503 | } | |
504 | ||
505 | /** | |
506 | * tracefs_initialized - Tells whether tracefs has been registered | |
507 | */ | |
508 | bool tracefs_initialized(void) | |
509 | { | |
510 | return tracefs_registered; | |
511 | } | |
512 | ||
cc31004a SRRH |
513 | static struct kobject *trace_kobj; |
514 | ||
4282d606 SRRH |
515 | static int __init tracefs_init(void) |
516 | { | |
517 | int retval; | |
518 | ||
cc31004a SRRH |
519 | trace_kobj = kobject_create_and_add("tracing", kernel_kobj); |
520 | if (!trace_kobj) | |
521 | return -EINVAL; | |
522 | ||
4282d606 SRRH |
523 | retval = register_filesystem(&trace_fs_type); |
524 | if (!retval) | |
525 | tracefs_registered = true; | |
526 | ||
527 | return retval; | |
528 | } | |
529 | core_initcall(tracefs_init); |