Commit | Line | Data |
---|---|---|
0eeca283 RL |
1 | /* |
2 | * fs/inotify.c - inode-based file event notifications | |
3 | * | |
4 | * Authors: | |
5 | * John McCutchan <ttb@tentacle.dhs.org> | |
6 | * Robert Love <rml@novell.com> | |
7 | * | |
2d9048e2 AG |
8 | * Kernel API added by: Amy Griffis <amy.griffis@hp.com> |
9 | * | |
0eeca283 | 10 | * Copyright (C) 2005 John McCutchan |
2d9048e2 | 11 | * Copyright 2006 Hewlett-Packard Development Company, L.P. |
0eeca283 RL |
12 | * |
13 | * This program is free software; you can redistribute it and/or modify it | |
14 | * under the terms of the GNU General Public License as published by the | |
15 | * Free Software Foundation; either version 2, or (at your option) any | |
16 | * later version. | |
17 | * | |
18 | * This program is distributed in the hope that it will be useful, but | |
19 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
20 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
21 | * General Public License for more details. | |
22 | */ | |
23 | ||
24 | #include <linux/module.h> | |
25 | #include <linux/kernel.h> | |
0eeca283 RL |
26 | #include <linux/spinlock.h> |
27 | #include <linux/idr.h> | |
28 | #include <linux/slab.h> | |
29 | #include <linux/fs.h> | |
914e2637 | 30 | #include <linux/sched.h> |
0eeca283 RL |
31 | #include <linux/init.h> |
32 | #include <linux/list.h> | |
33 | #include <linux/writeback.h> | |
34 | #include <linux/inotify.h> | |
0eeca283 RL |
35 | |
36 | static atomic_t inotify_cookie; | |
37 | ||
0eeca283 RL |
38 | /* |
39 | * Lock ordering: | |
40 | * | |
41 | * dentry->d_lock (used to keep d_move() away from dentry->d_parent) | |
f24075bd | 42 | * iprune_mutex (synchronize shrink_icache_memory()) |
0eeca283 | 43 | * inode_lock (protects the super_block->s_inodes list) |
d4f9af9d | 44 | * inode->inotify_mutex (protects inode->inotify_watches and watches->i_list) |
2d9048e2 AG |
45 | * inotify_handle->mutex (protects inotify_handle and watches->h_list) |
46 | * | |
47 | * The inode->inotify_mutex and inotify_handle->mutex and held during execution | |
48 | * of a caller's event handler. Thus, the caller must not hold any locks | |
49 | * taken in their event handler while calling any of the published inotify | |
50 | * interfaces. | |
0eeca283 RL |
51 | */ |
52 | ||
53 | /* | |
2d9048e2 | 54 | * Lifetimes of the three main data structures--inotify_handle, inode, and |
0eeca283 RL |
55 | * inotify_watch--are managed by reference count. |
56 | * | |
2d9048e2 AG |
57 | * inotify_handle: Lifetime is from inotify_init() to inotify_destroy(). |
58 | * Additional references can bump the count via get_inotify_handle() and drop | |
59 | * the count via put_inotify_handle(). | |
0eeca283 | 60 | * |
2d9048e2 AG |
61 | * inotify_watch: for inotify's purposes, lifetime is from inotify_add_watch() |
62 | * to remove_watch_no_event(). Additional references can bump the count via | |
63 | * get_inotify_watch() and drop the count via put_inotify_watch(). The caller | |
64 | * is reponsible for the final put after receiving IN_IGNORED, or when using | |
65 | * IN_ONESHOT after receiving the first event. Inotify does the final put if | |
66 | * inotify_destroy() is called. | |
0eeca283 RL |
67 | * |
68 | * inode: Pinned so long as the inode is associated with a watch, from | |
2d9048e2 | 69 | * inotify_add_watch() to the final put_inotify_watch(). |
0eeca283 RL |
70 | */ |
71 | ||
72 | /* | |
2d9048e2 | 73 | * struct inotify_handle - represents an inotify instance |
0eeca283 | 74 | * |
d4f9af9d | 75 | * This structure is protected by the mutex 'mutex'. |
0eeca283 | 76 | */ |
2d9048e2 | 77 | struct inotify_handle { |
0eeca283 | 78 | struct idr idr; /* idr mapping wd -> watch */ |
d4f9af9d | 79 | struct mutex mutex; /* protects this bad boy */ |
0eeca283 RL |
80 | struct list_head watches; /* list of watches */ |
81 | atomic_t count; /* reference count */ | |
b9c55d29 | 82 | u32 last_wd; /* the last wd allocated */ |
2d9048e2 | 83 | const struct inotify_operations *in_ops; /* inotify caller operations */ |
0eeca283 RL |
84 | }; |
85 | ||
2d9048e2 | 86 | static inline void get_inotify_handle(struct inotify_handle *ih) |
0eeca283 | 87 | { |
2d9048e2 | 88 | atomic_inc(&ih->count); |
0eeca283 RL |
89 | } |
90 | ||
2d9048e2 | 91 | static inline void put_inotify_handle(struct inotify_handle *ih) |
0eeca283 | 92 | { |
2d9048e2 AG |
93 | if (atomic_dec_and_test(&ih->count)) { |
94 | idr_destroy(&ih->idr); | |
95 | kfree(ih); | |
0eeca283 RL |
96 | } |
97 | } | |
98 | ||
2d9048e2 AG |
99 | /** |
100 | * get_inotify_watch - grab a reference to an inotify_watch | |
101 | * @watch: watch to grab | |
102 | */ | |
103 | void get_inotify_watch(struct inotify_watch *watch) | |
0eeca283 RL |
104 | { |
105 | atomic_inc(&watch->count); | |
106 | } | |
2d9048e2 | 107 | EXPORT_SYMBOL_GPL(get_inotify_watch); |
0eeca283 | 108 | |
8f7b0ba1 AV |
109 | int pin_inotify_watch(struct inotify_watch *watch) |
110 | { | |
111 | struct super_block *sb = watch->inode->i_sb; | |
112 | spin_lock(&sb_lock); | |
113 | if (sb->s_count >= S_BIAS) { | |
114 | atomic_inc(&sb->s_active); | |
115 | spin_unlock(&sb_lock); | |
116 | atomic_inc(&watch->count); | |
117 | return 1; | |
118 | } | |
119 | spin_unlock(&sb_lock); | |
120 | return 0; | |
121 | } | |
122 | ||
2d9048e2 | 123 | /** |
0eeca283 | 124 | * put_inotify_watch - decrements the ref count on a given watch. cleans up |
2d9048e2 AG |
125 | * watch references if the count reaches zero. inotify_watch is freed by |
126 | * inotify callers via the destroy_watch() op. | |
127 | * @watch: watch to release | |
0eeca283 | 128 | */ |
2d9048e2 | 129 | void put_inotify_watch(struct inotify_watch *watch) |
0eeca283 RL |
130 | { |
131 | if (atomic_dec_and_test(&watch->count)) { | |
2d9048e2 | 132 | struct inotify_handle *ih = watch->ih; |
0eeca283 | 133 | |
2d9048e2 AG |
134 | iput(watch->inode); |
135 | ih->in_ops->destroy_watch(watch); | |
136 | put_inotify_handle(ih); | |
0eeca283 RL |
137 | } |
138 | } | |
2d9048e2 | 139 | EXPORT_SYMBOL_GPL(put_inotify_watch); |
0eeca283 | 140 | |
8f7b0ba1 AV |
141 | void unpin_inotify_watch(struct inotify_watch *watch) |
142 | { | |
143 | struct super_block *sb = watch->inode->i_sb; | |
144 | put_inotify_watch(watch); | |
145 | deactivate_super(sb); | |
146 | } | |
147 | ||
0eeca283 | 148 | /* |
2d9048e2 | 149 | * inotify_handle_get_wd - returns the next WD for use by the given handle |
0eeca283 | 150 | * |
2d9048e2 | 151 | * Callers must hold ih->mutex. This function can sleep. |
0eeca283 | 152 | */ |
2d9048e2 AG |
153 | static int inotify_handle_get_wd(struct inotify_handle *ih, |
154 | struct inotify_watch *watch) | |
0eeca283 RL |
155 | { |
156 | int ret; | |
157 | ||
158 | do { | |
2d9048e2 | 159 | if (unlikely(!idr_pre_get(&ih->idr, GFP_KERNEL))) |
0eeca283 | 160 | return -ENOSPC; |
2d9048e2 | 161 | ret = idr_get_new_above(&ih->idr, watch, ih->last_wd+1, &watch->wd); |
0eeca283 RL |
162 | } while (ret == -EAGAIN); |
163 | ||
2d9048e2 AG |
164 | if (likely(!ret)) |
165 | ih->last_wd = watch->wd; | |
0eeca283 | 166 | |
2d9048e2 | 167 | return ret; |
0eeca283 RL |
168 | } |
169 | ||
c32ccd87 NP |
170 | /* |
171 | * inotify_inode_watched - returns nonzero if there are watches on this inode | |
172 | * and zero otherwise. We call this lockless, we do not care if we race. | |
173 | */ | |
174 | static inline int inotify_inode_watched(struct inode *inode) | |
175 | { | |
176 | return !list_empty(&inode->inotify_watches); | |
177 | } | |
178 | ||
179 | /* | |
180 | * Get child dentry flag into synch with parent inode. | |
181 | * Flag should always be clear for negative dentrys. | |
182 | */ | |
183 | static void set_dentry_child_flags(struct inode *inode, int watched) | |
184 | { | |
185 | struct dentry *alias; | |
186 | ||
187 | spin_lock(&dcache_lock); | |
188 | list_for_each_entry(alias, &inode->i_dentry, d_alias) { | |
189 | struct dentry *child; | |
190 | ||
191 | list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) { | |
0d71bd59 | 192 | if (!child->d_inode) |
c32ccd87 | 193 | continue; |
0d71bd59 | 194 | |
c32ccd87 | 195 | spin_lock(&child->d_lock); |
0d71bd59 | 196 | if (watched) |
c32ccd87 | 197 | child->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED; |
0d71bd59 NP |
198 | else |
199 | child->d_flags &=~DCACHE_INOTIFY_PARENT_WATCHED; | |
c32ccd87 NP |
200 | spin_unlock(&child->d_lock); |
201 | } | |
202 | } | |
203 | spin_unlock(&dcache_lock); | |
204 | } | |
205 | ||
0eeca283 | 206 | /* |
2d9048e2 AG |
207 | * inotify_find_handle - find the watch associated with the given inode and |
208 | * handle | |
0eeca283 | 209 | * |
d4f9af9d | 210 | * Callers must hold inode->inotify_mutex. |
0eeca283 | 211 | */ |
2d9048e2 AG |
212 | static struct inotify_watch *inode_find_handle(struct inode *inode, |
213 | struct inotify_handle *ih) | |
0eeca283 RL |
214 | { |
215 | struct inotify_watch *watch; | |
216 | ||
217 | list_for_each_entry(watch, &inode->inotify_watches, i_list) { | |
2d9048e2 | 218 | if (watch->ih == ih) |
0eeca283 RL |
219 | return watch; |
220 | } | |
221 | ||
222 | return NULL; | |
223 | } | |
224 | ||
225 | /* | |
3ca10067 | 226 | * remove_watch_no_event - remove watch without the IN_IGNORED event. |
2d9048e2 AG |
227 | * |
228 | * Callers must hold both inode->inotify_mutex and ih->mutex. | |
0eeca283 RL |
229 | */ |
230 | static void remove_watch_no_event(struct inotify_watch *watch, | |
2d9048e2 | 231 | struct inotify_handle *ih) |
0eeca283 RL |
232 | { |
233 | list_del(&watch->i_list); | |
2d9048e2 | 234 | list_del(&watch->h_list); |
0eeca283 | 235 | |
c32ccd87 NP |
236 | if (!inotify_inode_watched(watch->inode)) |
237 | set_dentry_child_flags(watch->inode, 0); | |
238 | ||
2d9048e2 | 239 | idr_remove(&ih->idr, watch->wd); |
0eeca283 RL |
240 | } |
241 | ||
3ca10067 AG |
242 | /** |
243 | * inotify_remove_watch_locked - Remove a watch from both the handle and the | |
244 | * inode. Sends the IN_IGNORED event signifying that the inode is no longer | |
245 | * watched. May be invoked from a caller's event handler. | |
246 | * @ih: inotify handle associated with watch | |
247 | * @watch: watch to remove | |
0eeca283 | 248 | * |
2d9048e2 | 249 | * Callers must hold both inode->inotify_mutex and ih->mutex. |
0eeca283 | 250 | */ |
3ca10067 AG |
251 | void inotify_remove_watch_locked(struct inotify_handle *ih, |
252 | struct inotify_watch *watch) | |
0eeca283 | 253 | { |
2d9048e2 | 254 | remove_watch_no_event(watch, ih); |
7c297722 | 255 | ih->in_ops->handle_event(watch, watch->wd, IN_IGNORED, 0, NULL, NULL); |
0eeca283 | 256 | } |
3ca10067 | 257 | EXPORT_SYMBOL_GPL(inotify_remove_watch_locked); |
0eeca283 | 258 | |
2d9048e2 | 259 | /* Kernel API for producing events */ |
c32ccd87 | 260 | |
0eeca283 | 261 | /* |
c32ccd87 | 262 | * inotify_d_instantiate - instantiate dcache entry for inode |
0eeca283 | 263 | */ |
c32ccd87 | 264 | void inotify_d_instantiate(struct dentry *entry, struct inode *inode) |
0eeca283 | 265 | { |
c32ccd87 NP |
266 | struct dentry *parent; |
267 | ||
268 | if (!inode) | |
269 | return; | |
270 | ||
c32ccd87 NP |
271 | spin_lock(&entry->d_lock); |
272 | parent = entry->d_parent; | |
091e881d | 273 | if (parent->d_inode && inotify_inode_watched(parent->d_inode)) |
c32ccd87 NP |
274 | entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED; |
275 | spin_unlock(&entry->d_lock); | |
0eeca283 RL |
276 | } |
277 | ||
c32ccd87 NP |
278 | /* |
279 | * inotify_d_move - dcache entry has been moved | |
280 | */ | |
281 | void inotify_d_move(struct dentry *entry) | |
282 | { | |
283 | struct dentry *parent; | |
284 | ||
285 | parent = entry->d_parent; | |
286 | if (inotify_inode_watched(parent->d_inode)) | |
287 | entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED; | |
288 | else | |
289 | entry->d_flags &= ~DCACHE_INOTIFY_PARENT_WATCHED; | |
290 | } | |
0eeca283 RL |
291 | |
292 | /** | |
293 | * inotify_inode_queue_event - queue an event to all watches on this inode | |
294 | * @inode: inode event is originating from | |
295 | * @mask: event mask describing this event | |
296 | * @cookie: cookie for synchronization, or zero | |
297 | * @name: filename, if any | |
7c297722 | 298 | * @n_inode: inode associated with name |
0eeca283 RL |
299 | */ |
300 | void inotify_inode_queue_event(struct inode *inode, u32 mask, u32 cookie, | |
7c297722 | 301 | const char *name, struct inode *n_inode) |
0eeca283 RL |
302 | { |
303 | struct inotify_watch *watch, *next; | |
304 | ||
305 | if (!inotify_inode_watched(inode)) | |
306 | return; | |
307 | ||
d4f9af9d | 308 | mutex_lock(&inode->inotify_mutex); |
0eeca283 RL |
309 | list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) { |
310 | u32 watch_mask = watch->mask; | |
311 | if (watch_mask & mask) { | |
2d9048e2 AG |
312 | struct inotify_handle *ih= watch->ih; |
313 | mutex_lock(&ih->mutex); | |
0eeca283 | 314 | if (watch_mask & IN_ONESHOT) |
2d9048e2 | 315 | remove_watch_no_event(watch, ih); |
7c297722 AG |
316 | ih->in_ops->handle_event(watch, watch->wd, mask, cookie, |
317 | name, n_inode); | |
2d9048e2 | 318 | mutex_unlock(&ih->mutex); |
0eeca283 RL |
319 | } |
320 | } | |
d4f9af9d | 321 | mutex_unlock(&inode->inotify_mutex); |
0eeca283 RL |
322 | } |
323 | EXPORT_SYMBOL_GPL(inotify_inode_queue_event); | |
324 | ||
325 | /** | |
326 | * inotify_dentry_parent_queue_event - queue an event to a dentry's parent | |
327 | * @dentry: the dentry in question, we queue against this dentry's parent | |
328 | * @mask: event mask describing this event | |
329 | * @cookie: cookie for synchronization, or zero | |
330 | * @name: filename, if any | |
331 | */ | |
332 | void inotify_dentry_parent_queue_event(struct dentry *dentry, u32 mask, | |
333 | u32 cookie, const char *name) | |
334 | { | |
335 | struct dentry *parent; | |
336 | struct inode *inode; | |
337 | ||
c32ccd87 | 338 | if (!(dentry->d_flags & DCACHE_INOTIFY_PARENT_WATCHED)) |
820249ba JM |
339 | return; |
340 | ||
0eeca283 RL |
341 | spin_lock(&dentry->d_lock); |
342 | parent = dentry->d_parent; | |
343 | inode = parent->d_inode; | |
344 | ||
345 | if (inotify_inode_watched(inode)) { | |
346 | dget(parent); | |
347 | spin_unlock(&dentry->d_lock); | |
7c297722 AG |
348 | inotify_inode_queue_event(inode, mask, cookie, name, |
349 | dentry->d_inode); | |
0eeca283 RL |
350 | dput(parent); |
351 | } else | |
352 | spin_unlock(&dentry->d_lock); | |
353 | } | |
354 | EXPORT_SYMBOL_GPL(inotify_dentry_parent_queue_event); | |
355 | ||
356 | /** | |
357 | * inotify_get_cookie - return a unique cookie for use in synchronizing events. | |
358 | */ | |
359 | u32 inotify_get_cookie(void) | |
360 | { | |
361 | return atomic_inc_return(&inotify_cookie); | |
362 | } | |
363 | EXPORT_SYMBOL_GPL(inotify_get_cookie); | |
364 | ||
365 | /** | |
366 | * inotify_unmount_inodes - an sb is unmounting. handle any watched inodes. | |
367 | * @list: list of inodes being unmounted (sb->s_inodes) | |
368 | * | |
369 | * Called with inode_lock held, protecting the unmounting super block's list | |
f24075bd | 370 | * of inodes, and with iprune_mutex held, keeping shrink_icache_memory() at bay. |
0eeca283 RL |
371 | * We temporarily drop inode_lock, however, and CAN block. |
372 | */ | |
373 | void inotify_unmount_inodes(struct list_head *list) | |
374 | { | |
375 | struct inode *inode, *next_i, *need_iput = NULL; | |
376 | ||
377 | list_for_each_entry_safe(inode, next_i, list, i_sb_list) { | |
378 | struct inotify_watch *watch, *next_w; | |
379 | struct inode *need_iput_tmp; | |
380 | struct list_head *watches; | |
381 | ||
382 | /* | |
383 | * If i_count is zero, the inode cannot have any watches and | |
384 | * doing an __iget/iput with MS_ACTIVE clear would actually | |
385 | * evict all inodes with zero i_count from icache which is | |
386 | * unnecessarily violent and may in fact be illegal to do. | |
387 | */ | |
388 | if (!atomic_read(&inode->i_count)) | |
389 | continue; | |
390 | ||
391 | /* | |
392 | * We cannot __iget() an inode in state I_CLEAR, I_FREEING, or | |
393 | * I_WILL_FREE which is fine because by that point the inode | |
394 | * cannot have any associated watches. | |
395 | */ | |
396 | if (inode->i_state & (I_CLEAR | I_FREEING | I_WILL_FREE)) | |
397 | continue; | |
398 | ||
399 | need_iput_tmp = need_iput; | |
400 | need_iput = NULL; | |
3ca10067 | 401 | /* In case inotify_remove_watch_locked() drops a reference. */ |
0eeca283 RL |
402 | if (inode != need_iput_tmp) |
403 | __iget(inode); | |
404 | else | |
405 | need_iput_tmp = NULL; | |
406 | /* In case the dropping of a reference would nuke next_i. */ | |
407 | if ((&next_i->i_sb_list != list) && | |
408 | atomic_read(&next_i->i_count) && | |
409 | !(next_i->i_state & (I_CLEAR | I_FREEING | | |
410 | I_WILL_FREE))) { | |
411 | __iget(next_i); | |
412 | need_iput = next_i; | |
413 | } | |
414 | ||
415 | /* | |
416 | * We can safely drop inode_lock here because we hold | |
417 | * references on both inode and next_i. Also no new inodes | |
418 | * will be added since the umount has begun. Finally, | |
f24075bd | 419 | * iprune_mutex keeps shrink_icache_memory() away. |
0eeca283 RL |
420 | */ |
421 | spin_unlock(&inode_lock); | |
422 | ||
423 | if (need_iput_tmp) | |
424 | iput(need_iput_tmp); | |
425 | ||
426 | /* for each watch, send IN_UNMOUNT and then remove it */ | |
d4f9af9d | 427 | mutex_lock(&inode->inotify_mutex); |
0eeca283 RL |
428 | watches = &inode->inotify_watches; |
429 | list_for_each_entry_safe(watch, next_w, watches, i_list) { | |
2d9048e2 | 430 | struct inotify_handle *ih= watch->ih; |
6ee5a399 | 431 | get_inotify_watch(watch); |
2d9048e2 AG |
432 | mutex_lock(&ih->mutex); |
433 | ih->in_ops->handle_event(watch, watch->wd, IN_UNMOUNT, 0, | |
7c297722 | 434 | NULL, NULL); |
3ca10067 | 435 | inotify_remove_watch_locked(ih, watch); |
2d9048e2 | 436 | mutex_unlock(&ih->mutex); |
6ee5a399 | 437 | put_inotify_watch(watch); |
0eeca283 | 438 | } |
d4f9af9d | 439 | mutex_unlock(&inode->inotify_mutex); |
0eeca283 RL |
440 | iput(inode); |
441 | ||
442 | spin_lock(&inode_lock); | |
443 | } | |
444 | } | |
445 | EXPORT_SYMBOL_GPL(inotify_unmount_inodes); | |
446 | ||
447 | /** | |
448 | * inotify_inode_is_dead - an inode has been deleted, cleanup any watches | |
449 | * @inode: inode that is about to be removed | |
450 | */ | |
451 | void inotify_inode_is_dead(struct inode *inode) | |
452 | { | |
453 | struct inotify_watch *watch, *next; | |
454 | ||
d4f9af9d | 455 | mutex_lock(&inode->inotify_mutex); |
0eeca283 | 456 | list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) { |
2d9048e2 AG |
457 | struct inotify_handle *ih = watch->ih; |
458 | mutex_lock(&ih->mutex); | |
3ca10067 | 459 | inotify_remove_watch_locked(ih, watch); |
2d9048e2 | 460 | mutex_unlock(&ih->mutex); |
0eeca283 | 461 | } |
d4f9af9d | 462 | mutex_unlock(&inode->inotify_mutex); |
0eeca283 RL |
463 | } |
464 | EXPORT_SYMBOL_GPL(inotify_inode_is_dead); | |
465 | ||
2d9048e2 | 466 | /* Kernel Consumer API */ |
0eeca283 | 467 | |
2d9048e2 AG |
468 | /** |
469 | * inotify_init - allocate and initialize an inotify instance | |
470 | * @ops: caller's inotify operations | |
471 | */ | |
472 | struct inotify_handle *inotify_init(const struct inotify_operations *ops) | |
0eeca283 | 473 | { |
2d9048e2 | 474 | struct inotify_handle *ih; |
0eeca283 | 475 | |
2d9048e2 AG |
476 | ih = kmalloc(sizeof(struct inotify_handle), GFP_KERNEL); |
477 | if (unlikely(!ih)) | |
478 | return ERR_PTR(-ENOMEM); | |
0eeca283 | 479 | |
2d9048e2 AG |
480 | idr_init(&ih->idr); |
481 | INIT_LIST_HEAD(&ih->watches); | |
482 | mutex_init(&ih->mutex); | |
483 | ih->last_wd = 0; | |
484 | ih->in_ops = ops; | |
485 | atomic_set(&ih->count, 0); | |
486 | get_inotify_handle(ih); | |
0eeca283 | 487 | |
2d9048e2 | 488 | return ih; |
0eeca283 | 489 | } |
2d9048e2 | 490 | EXPORT_SYMBOL_GPL(inotify_init); |
0eeca283 | 491 | |
a9dc971d AG |
492 | /** |
493 | * inotify_init_watch - initialize an inotify watch | |
494 | * @watch: watch to initialize | |
495 | */ | |
496 | void inotify_init_watch(struct inotify_watch *watch) | |
497 | { | |
498 | INIT_LIST_HEAD(&watch->h_list); | |
499 | INIT_LIST_HEAD(&watch->i_list); | |
500 | atomic_set(&watch->count, 0); | |
501 | get_inotify_watch(watch); /* initial get */ | |
502 | } | |
503 | EXPORT_SYMBOL_GPL(inotify_init_watch); | |
504 | ||
8f7b0ba1 AV |
505 | /* |
506 | * Watch removals suck violently. To kick the watch out we need (in this | |
507 | * order) inode->inotify_mutex and ih->mutex. That's fine if we have | |
508 | * a hold on inode; however, for all other cases we need to make damn sure | |
509 | * we don't race with umount. We can *NOT* just grab a reference to a | |
510 | * watch - inotify_unmount_inodes() will happily sail past it and we'll end | |
511 | * with reference to inode potentially outliving its superblock. Ideally | |
512 | * we just want to grab an active reference to superblock if we can; that | |
513 | * will make sure we won't go into inotify_umount_inodes() until we are | |
514 | * done. Cleanup is just deactivate_super(). However, that leaves a messy | |
515 | * case - what if we *are* racing with umount() and active references to | |
516 | * superblock can't be acquired anymore? We can bump ->s_count, grab | |
517 | * ->s_umount, which will almost certainly wait until the superblock is shut | |
518 | * down and the watch in question is pining for fjords. That's fine, but | |
519 | * there is a problem - we might have hit the window between ->s_active | |
520 | * getting to 0 / ->s_count - below S_BIAS (i.e. the moment when superblock | |
521 | * is past the point of no return and is heading for shutdown) and the | |
522 | * moment when deactivate_super() acquires ->s_umount. We could just do | |
523 | * drop_super() yield() and retry, but that's rather antisocial and this | |
524 | * stuff is luser-triggerable. OTOH, having grabbed ->s_umount and having | |
525 | * found that we'd got there first (i.e. that ->s_root is non-NULL) we know | |
526 | * that we won't race with inotify_umount_inodes(). So we could grab a | |
527 | * reference to watch and do the rest as above, just with drop_super() instead | |
528 | * of deactivate_super(), right? Wrong. We had to drop ih->mutex before we | |
529 | * could grab ->s_umount. So the watch could've been gone already. | |
530 | * | |
531 | * That still can be dealt with - we need to save watch->wd, do idr_find() | |
532 | * and compare its result with our pointer. If they match, we either have | |
533 | * the damn thing still alive or we'd lost not one but two races at once, | |
534 | * the watch had been killed and a new one got created with the same ->wd | |
535 | * at the same address. That couldn't have happened in inotify_destroy(), | |
536 | * but inotify_rm_wd() could run into that. Still, "new one got created" | |
537 | * is not a problem - we have every right to kill it or leave it alone, | |
538 | * whatever's more convenient. | |
539 | * | |
540 | * So we can use idr_find(...) == watch && watch->inode->i_sb == sb as | |
541 | * "grab it and kill it" check. If it's been our original watch, we are | |
542 | * fine, if it's a newcomer - nevermind, just pretend that we'd won the | |
543 | * race and kill the fscker anyway; we are safe since we know that its | |
544 | * superblock won't be going away. | |
545 | * | |
546 | * And yes, this is far beyond mere "not very pretty"; so's the entire | |
547 | * concept of inotify to start with. | |
548 | */ | |
549 | ||
550 | /** | |
551 | * pin_to_kill - pin the watch down for removal | |
552 | * @ih: inotify handle | |
553 | * @watch: watch to kill | |
554 | * | |
555 | * Called with ih->mutex held, drops it. Possible return values: | |
556 | * 0 - nothing to do, it has died | |
557 | * 1 - remove it, drop the reference and deactivate_super() | |
558 | * 2 - remove it, drop the reference and drop_super(); we tried hard to avoid | |
559 | * that variant, since it involved a lot of PITA, but that's the best that | |
560 | * could've been done. | |
561 | */ | |
562 | static int pin_to_kill(struct inotify_handle *ih, struct inotify_watch *watch) | |
563 | { | |
564 | struct super_block *sb = watch->inode->i_sb; | |
565 | s32 wd = watch->wd; | |
566 | ||
567 | spin_lock(&sb_lock); | |
568 | if (sb->s_count >= S_BIAS) { | |
569 | atomic_inc(&sb->s_active); | |
570 | spin_unlock(&sb_lock); | |
571 | get_inotify_watch(watch); | |
572 | mutex_unlock(&ih->mutex); | |
573 | return 1; /* the best outcome */ | |
574 | } | |
575 | sb->s_count++; | |
576 | spin_unlock(&sb_lock); | |
577 | mutex_unlock(&ih->mutex); /* can't grab ->s_umount under it */ | |
578 | down_read(&sb->s_umount); | |
579 | if (likely(!sb->s_root)) { | |
580 | /* fs is already shut down; the watch is dead */ | |
581 | drop_super(sb); | |
582 | return 0; | |
583 | } | |
584 | /* raced with the final deactivate_super() */ | |
585 | mutex_lock(&ih->mutex); | |
586 | if (idr_find(&ih->idr, wd) != watch || watch->inode->i_sb != sb) { | |
587 | /* the watch is dead */ | |
588 | mutex_unlock(&ih->mutex); | |
589 | drop_super(sb); | |
590 | return 0; | |
591 | } | |
592 | /* still alive or freed and reused with the same sb and wd; kill */ | |
593 | get_inotify_watch(watch); | |
594 | mutex_unlock(&ih->mutex); | |
595 | return 2; | |
596 | } | |
597 | ||
598 | static void unpin_and_kill(struct inotify_watch *watch, int how) | |
599 | { | |
600 | struct super_block *sb = watch->inode->i_sb; | |
601 | put_inotify_watch(watch); | |
602 | switch (how) { | |
603 | case 1: | |
604 | deactivate_super(sb); | |
605 | break; | |
606 | case 2: | |
607 | drop_super(sb); | |
608 | } | |
609 | } | |
610 | ||
2d9048e2 AG |
611 | /** |
612 | * inotify_destroy - clean up and destroy an inotify instance | |
613 | * @ih: inotify handle | |
614 | */ | |
615 | void inotify_destroy(struct inotify_handle *ih) | |
0eeca283 | 616 | { |
0eeca283 | 617 | /* |
2d9048e2 | 618 | * Destroy all of the watches for this handle. Unfortunately, not very |
0eeca283 RL |
619 | * pretty. We cannot do a simple iteration over the list, because we |
620 | * do not know the inode until we iterate to the watch. But we need to | |
2d9048e2 | 621 | * hold inode->inotify_mutex before ih->mutex. The following works. |
8f7b0ba1 AV |
622 | * |
623 | * AV: it had to become even uglier to start working ;-/ | |
0eeca283 RL |
624 | */ |
625 | while (1) { | |
626 | struct inotify_watch *watch; | |
627 | struct list_head *watches; | |
8f7b0ba1 | 628 | struct super_block *sb; |
0eeca283 | 629 | struct inode *inode; |
8f7b0ba1 | 630 | int how; |
0eeca283 | 631 | |
2d9048e2 AG |
632 | mutex_lock(&ih->mutex); |
633 | watches = &ih->watches; | |
0eeca283 | 634 | if (list_empty(watches)) { |
2d9048e2 | 635 | mutex_unlock(&ih->mutex); |
0eeca283 RL |
636 | break; |
637 | } | |
b5e61818 | 638 | watch = list_first_entry(watches, struct inotify_watch, h_list); |
8f7b0ba1 AV |
639 | sb = watch->inode->i_sb; |
640 | how = pin_to_kill(ih, watch); | |
641 | if (!how) | |
642 | continue; | |
0eeca283 RL |
643 | |
644 | inode = watch->inode; | |
d4f9af9d | 645 | mutex_lock(&inode->inotify_mutex); |
2d9048e2 | 646 | mutex_lock(&ih->mutex); |
66055a4e AG |
647 | |
648 | /* make sure we didn't race with another list removal */ | |
2d9048e2 AG |
649 | if (likely(idr_find(&ih->idr, watch->wd))) { |
650 | remove_watch_no_event(watch, ih); | |
651 | put_inotify_watch(watch); | |
652 | } | |
66055a4e | 653 | |
2d9048e2 | 654 | mutex_unlock(&ih->mutex); |
d4f9af9d | 655 | mutex_unlock(&inode->inotify_mutex); |
8f7b0ba1 | 656 | unpin_and_kill(watch, how); |
0eeca283 RL |
657 | } |
658 | ||
2d9048e2 AG |
659 | /* free this handle: the put matching the get in inotify_init() */ |
660 | put_inotify_handle(ih); | |
0eeca283 | 661 | } |
2d9048e2 | 662 | EXPORT_SYMBOL_GPL(inotify_destroy); |
0eeca283 | 663 | |
a9dc971d AG |
664 | /** |
665 | * inotify_find_watch - find an existing watch for an (ih,inode) pair | |
666 | * @ih: inotify handle | |
667 | * @inode: inode to watch | |
668 | * @watchp: pointer to existing inotify_watch | |
669 | * | |
670 | * Caller must pin given inode (via nameidata). | |
671 | */ | |
672 | s32 inotify_find_watch(struct inotify_handle *ih, struct inode *inode, | |
673 | struct inotify_watch **watchp) | |
674 | { | |
675 | struct inotify_watch *old; | |
676 | int ret = -ENOENT; | |
677 | ||
678 | mutex_lock(&inode->inotify_mutex); | |
679 | mutex_lock(&ih->mutex); | |
680 | ||
681 | old = inode_find_handle(inode, ih); | |
682 | if (unlikely(old)) { | |
683 | get_inotify_watch(old); /* caller must put watch */ | |
684 | *watchp = old; | |
685 | ret = old->wd; | |
686 | } | |
687 | ||
688 | mutex_unlock(&ih->mutex); | |
689 | mutex_unlock(&inode->inotify_mutex); | |
690 | ||
691 | return ret; | |
692 | } | |
693 | EXPORT_SYMBOL_GPL(inotify_find_watch); | |
694 | ||
2d9048e2 AG |
695 | /** |
696 | * inotify_find_update_watch - find and update the mask of an existing watch | |
697 | * @ih: inotify handle | |
698 | * @inode: inode's watch to update | |
699 | * @mask: mask of events to watch | |
0eeca283 | 700 | * |
2d9048e2 | 701 | * Caller must pin given inode (via nameidata). |
0eeca283 | 702 | */ |
2d9048e2 AG |
703 | s32 inotify_find_update_watch(struct inotify_handle *ih, struct inode *inode, |
704 | u32 mask) | |
0eeca283 | 705 | { |
2d9048e2 AG |
706 | struct inotify_watch *old; |
707 | int mask_add = 0; | |
708 | int ret; | |
0eeca283 | 709 | |
2d9048e2 AG |
710 | if (mask & IN_MASK_ADD) |
711 | mask_add = 1; | |
712 | ||
713 | /* don't allow invalid bits: we don't want flags set */ | |
714 | mask &= IN_ALL_EVENTS | IN_ONESHOT; | |
715 | if (unlikely(!mask)) | |
0eeca283 | 716 | return -EINVAL; |
0eeca283 | 717 | |
d4f9af9d | 718 | mutex_lock(&inode->inotify_mutex); |
2d9048e2 | 719 | mutex_lock(&ih->mutex); |
0eeca283 | 720 | |
2d9048e2 AG |
721 | /* |
722 | * Handle the case of re-adding a watch on an (inode,ih) pair that we | |
723 | * are already watching. We just update the mask and return its wd. | |
724 | */ | |
725 | old = inode_find_handle(inode, ih); | |
726 | if (unlikely(!old)) { | |
727 | ret = -ENOENT; | |
728 | goto out; | |
0eeca283 RL |
729 | } |
730 | ||
2d9048e2 AG |
731 | if (mask_add) |
732 | old->mask |= mask; | |
733 | else | |
734 | old->mask = mask; | |
735 | ret = old->wd; | |
736 | out: | |
737 | mutex_unlock(&ih->mutex); | |
738 | mutex_unlock(&inode->inotify_mutex); | |
0eeca283 RL |
739 | return ret; |
740 | } | |
2d9048e2 | 741 | EXPORT_SYMBOL_GPL(inotify_find_update_watch); |
0eeca283 | 742 | |
2d9048e2 AG |
743 | /** |
744 | * inotify_add_watch - add a watch to an inotify instance | |
745 | * @ih: inotify handle | |
746 | * @watch: caller allocated watch structure | |
747 | * @inode: inode to watch | |
748 | * @mask: mask of events to watch | |
749 | * | |
750 | * Caller must pin given inode (via nameidata). | |
751 | * Caller must ensure it only calls inotify_add_watch() once per watch. | |
752 | * Calls inotify_handle_get_wd() so may sleep. | |
753 | */ | |
754 | s32 inotify_add_watch(struct inotify_handle *ih, struct inotify_watch *watch, | |
755 | struct inode *inode, u32 mask) | |
0eeca283 | 756 | { |
2d9048e2 | 757 | int ret = 0; |
d599e36a | 758 | int newly_watched; |
0eeca283 | 759 | |
2d9048e2 AG |
760 | /* don't allow invalid bits: we don't want flags set */ |
761 | mask &= IN_ALL_EVENTS | IN_ONESHOT; | |
762 | if (unlikely(!mask)) | |
763 | return -EINVAL; | |
764 | watch->mask = mask; | |
783bc29b | 765 | |
2d9048e2 AG |
766 | mutex_lock(&inode->inotify_mutex); |
767 | mutex_lock(&ih->mutex); | |
8140a500 | 768 | |
2d9048e2 AG |
769 | /* Initialize a new watch */ |
770 | ret = inotify_handle_get_wd(ih, watch); | |
b680716e | 771 | if (unlikely(ret)) |
2d9048e2 AG |
772 | goto out; |
773 | ret = watch->wd; | |
0eeca283 | 774 | |
2d9048e2 AG |
775 | /* save a reference to handle and bump the count to make it official */ |
776 | get_inotify_handle(ih); | |
777 | watch->ih = ih; | |
0eeca283 RL |
778 | |
779 | /* | |
2d9048e2 AG |
780 | * Save a reference to the inode and bump the ref count to make it |
781 | * official. We hold a reference to nameidata, which makes this safe. | |
0eeca283 | 782 | */ |
2d9048e2 | 783 | watch->inode = igrab(inode); |
0eeca283 | 784 | |
2d9048e2 | 785 | /* Add the watch to the handle's and the inode's list */ |
d599e36a | 786 | newly_watched = !inotify_inode_watched(inode); |
2d9048e2 | 787 | list_add(&watch->h_list, &ih->watches); |
0eeca283 | 788 | list_add(&watch->i_list, &inode->inotify_watches); |
d599e36a NP |
789 | /* |
790 | * Set child flags _after_ adding the watch, so there is no race | |
791 | * windows where newly instantiated children could miss their parent's | |
792 | * watched flag. | |
793 | */ | |
794 | if (newly_watched) | |
795 | set_dentry_child_flags(inode, 1); | |
796 | ||
0eeca283 | 797 | out: |
2d9048e2 | 798 | mutex_unlock(&ih->mutex); |
d4f9af9d | 799 | mutex_unlock(&inode->inotify_mutex); |
0eeca283 RL |
800 | return ret; |
801 | } | |
2d9048e2 | 802 | EXPORT_SYMBOL_GPL(inotify_add_watch); |
0eeca283 | 803 | |
b9efe8a2 AV |
804 | /** |
805 | * inotify_clone_watch - put the watch next to existing one | |
806 | * @old: already installed watch | |
807 | * @new: new watch | |
808 | * | |
809 | * Caller must hold the inotify_mutex of inode we are dealing with; | |
810 | * it is expected to remove the old watch before unlocking the inode. | |
811 | */ | |
812 | s32 inotify_clone_watch(struct inotify_watch *old, struct inotify_watch *new) | |
813 | { | |
814 | struct inotify_handle *ih = old->ih; | |
815 | int ret = 0; | |
816 | ||
817 | new->mask = old->mask; | |
818 | new->ih = ih; | |
819 | ||
820 | mutex_lock(&ih->mutex); | |
821 | ||
822 | /* Initialize a new watch */ | |
823 | ret = inotify_handle_get_wd(ih, new); | |
824 | if (unlikely(ret)) | |
825 | goto out; | |
826 | ret = new->wd; | |
827 | ||
828 | get_inotify_handle(ih); | |
829 | ||
830 | new->inode = igrab(old->inode); | |
831 | ||
832 | list_add(&new->h_list, &ih->watches); | |
833 | list_add(&new->i_list, &old->inode->inotify_watches); | |
834 | out: | |
835 | mutex_unlock(&ih->mutex); | |
836 | return ret; | |
837 | } | |
838 | ||
455434d4 AV |
839 | void inotify_evict_watch(struct inotify_watch *watch) |
840 | { | |
841 | get_inotify_watch(watch); | |
842 | mutex_lock(&watch->ih->mutex); | |
843 | inotify_remove_watch_locked(watch->ih, watch); | |
844 | mutex_unlock(&watch->ih->mutex); | |
845 | } | |
846 | ||
2d9048e2 AG |
847 | /** |
848 | * inotify_rm_wd - remove a watch from an inotify instance | |
849 | * @ih: inotify handle | |
850 | * @wd: watch descriptor to remove | |
851 | * | |
852 | * Can sleep. | |
853 | */ | |
854 | int inotify_rm_wd(struct inotify_handle *ih, u32 wd) | |
0eeca283 | 855 | { |
2d9048e2 | 856 | struct inotify_watch *watch; |
8f7b0ba1 | 857 | struct super_block *sb; |
2d9048e2 | 858 | struct inode *inode; |
8f7b0ba1 | 859 | int how; |
783bc29b | 860 | |
2d9048e2 AG |
861 | mutex_lock(&ih->mutex); |
862 | watch = idr_find(&ih->idr, wd); | |
863 | if (unlikely(!watch)) { | |
864 | mutex_unlock(&ih->mutex); | |
865 | return -EINVAL; | |
783bc29b | 866 | } |
8f7b0ba1 AV |
867 | sb = watch->inode->i_sb; |
868 | how = pin_to_kill(ih, watch); | |
869 | if (!how) | |
870 | return 0; | |
871 | ||
2d9048e2 | 872 | inode = watch->inode; |
783bc29b | 873 | |
2d9048e2 AG |
874 | mutex_lock(&inode->inotify_mutex); |
875 | mutex_lock(&ih->mutex); | |
9a556e89 | 876 | |
2d9048e2 AG |
877 | /* make sure that we did not race */ |
878 | if (likely(idr_find(&ih->idr, wd) == watch)) | |
3ca10067 | 879 | inotify_remove_watch_locked(ih, watch); |
0eeca283 | 880 | |
2d9048e2 AG |
881 | mutex_unlock(&ih->mutex); |
882 | mutex_unlock(&inode->inotify_mutex); | |
8f7b0ba1 | 883 | unpin_and_kill(watch, how); |
0eeca283 | 884 | |
2d9048e2 AG |
885 | return 0; |
886 | } | |
887 | EXPORT_SYMBOL_GPL(inotify_rm_wd); | |
0eeca283 | 888 | |
a9dc971d AG |
889 | /** |
890 | * inotify_rm_watch - remove a watch from an inotify instance | |
891 | * @ih: inotify handle | |
892 | * @watch: watch to remove | |
893 | * | |
894 | * Can sleep. | |
895 | */ | |
896 | int inotify_rm_watch(struct inotify_handle *ih, | |
897 | struct inotify_watch *watch) | |
898 | { | |
899 | return inotify_rm_wd(ih, watch->wd); | |
900 | } | |
901 | EXPORT_SYMBOL_GPL(inotify_rm_watch); | |
902 | ||
0eeca283 | 903 | /* |
2d9048e2 | 904 | * inotify_setup - core initialization function |
0eeca283 | 905 | */ |
b680716e | 906 | static int __init inotify_setup(void) |
0eeca283 | 907 | { |
0eeca283 RL |
908 | atomic_set(&inotify_cookie, 0); |
909 | ||
0eeca283 RL |
910 | return 0; |
911 | } | |
912 | ||
b680716e | 913 | module_init(inotify_setup); |