Commit | Line | Data |
---|---|---|
334f485d MS |
1 | /* |
2 | FUSE: Filesystem in Userspace | |
3 | Copyright (C) 2001-2005 Miklos Szeredi <miklos@szeredi.hu> | |
4 | ||
5 | This program can be distributed under the terms of the GNU GPL. | |
6 | See the file COPYING. | |
7 | */ | |
8 | ||
9 | #include "fuse_i.h" | |
10 | ||
11 | #include <linux/init.h> | |
12 | #include <linux/module.h> | |
13 | #include <linux/poll.h> | |
14 | #include <linux/uio.h> | |
15 | #include <linux/miscdevice.h> | |
16 | #include <linux/pagemap.h> | |
17 | #include <linux/file.h> | |
18 | #include <linux/slab.h> | |
19 | ||
20 | MODULE_ALIAS_MISCDEV(FUSE_MINOR); | |
21 | ||
22 | static kmem_cache_t *fuse_req_cachep; | |
23 | ||
8bfc016d | 24 | static struct fuse_conn *fuse_get_conn(struct file *file) |
334f485d MS |
25 | { |
26 | struct fuse_conn *fc; | |
27 | spin_lock(&fuse_lock); | |
28 | fc = file->private_data; | |
9ba7cbba | 29 | if (fc && !fc->connected) |
334f485d MS |
30 | fc = NULL; |
31 | spin_unlock(&fuse_lock); | |
32 | return fc; | |
33 | } | |
34 | ||
8bfc016d | 35 | static void fuse_request_init(struct fuse_req *req) |
334f485d MS |
36 | { |
37 | memset(req, 0, sizeof(*req)); | |
38 | INIT_LIST_HEAD(&req->list); | |
39 | init_waitqueue_head(&req->waitq); | |
40 | atomic_set(&req->count, 1); | |
41 | } | |
42 | ||
43 | struct fuse_req *fuse_request_alloc(void) | |
44 | { | |
45 | struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, SLAB_KERNEL); | |
46 | if (req) | |
47 | fuse_request_init(req); | |
48 | return req; | |
49 | } | |
50 | ||
51 | void fuse_request_free(struct fuse_req *req) | |
52 | { | |
53 | kmem_cache_free(fuse_req_cachep, req); | |
54 | } | |
55 | ||
8bfc016d | 56 | static void block_sigs(sigset_t *oldset) |
334f485d MS |
57 | { |
58 | sigset_t mask; | |
59 | ||
60 | siginitsetinv(&mask, sigmask(SIGKILL)); | |
61 | sigprocmask(SIG_BLOCK, &mask, oldset); | |
62 | } | |
63 | ||
8bfc016d | 64 | static void restore_sigs(sigset_t *oldset) |
334f485d MS |
65 | { |
66 | sigprocmask(SIG_SETMASK, oldset, NULL); | |
67 | } | |
68 | ||
77e7f250 MS |
69 | /* |
70 | * Reset request, so that it can be reused | |
71 | * | |
72 | * The caller must be _very_ careful to make sure, that it is holding | |
73 | * the only reference to req | |
74 | */ | |
334f485d MS |
75 | void fuse_reset_request(struct fuse_req *req) |
76 | { | |
77 | int preallocated = req->preallocated; | |
78 | BUG_ON(atomic_read(&req->count) != 1); | |
79 | fuse_request_init(req); | |
80 | req->preallocated = preallocated; | |
81 | } | |
82 | ||
83 | static void __fuse_get_request(struct fuse_req *req) | |
84 | { | |
85 | atomic_inc(&req->count); | |
86 | } | |
87 | ||
88 | /* Must be called with > 1 refcount */ | |
89 | static void __fuse_put_request(struct fuse_req *req) | |
90 | { | |
91 | BUG_ON(atomic_read(&req->count) < 2); | |
92 | atomic_dec(&req->count); | |
93 | } | |
94 | ||
95 | static struct fuse_req *do_get_request(struct fuse_conn *fc) | |
96 | { | |
97 | struct fuse_req *req; | |
98 | ||
99 | spin_lock(&fuse_lock); | |
100 | BUG_ON(list_empty(&fc->unused_list)); | |
101 | req = list_entry(fc->unused_list.next, struct fuse_req, list); | |
102 | list_del_init(&req->list); | |
103 | spin_unlock(&fuse_lock); | |
104 | fuse_request_init(req); | |
105 | req->preallocated = 1; | |
106 | req->in.h.uid = current->fsuid; | |
107 | req->in.h.gid = current->fsgid; | |
108 | req->in.h.pid = current->pid; | |
109 | return req; | |
110 | } | |
111 | ||
7c352bdf | 112 | /* This can return NULL, but only in case it's interrupted by a SIGKILL */ |
334f485d | 113 | struct fuse_req *fuse_get_request(struct fuse_conn *fc) |
334f485d MS |
114 | { |
115 | int intr; | |
116 | sigset_t oldset; | |
117 | ||
0cd5b885 | 118 | atomic_inc(&fc->num_waiting); |
334f485d MS |
119 | block_sigs(&oldset); |
120 | intr = down_interruptible(&fc->outstanding_sem); | |
121 | restore_sigs(&oldset); | |
0cd5b885 MS |
122 | if (intr) { |
123 | atomic_dec(&fc->num_waiting); | |
124 | return NULL; | |
125 | } | |
126 | return do_get_request(fc); | |
334f485d MS |
127 | } |
128 | ||
7128ec2a | 129 | /* Must be called with fuse_lock held */ |
334f485d MS |
130 | static void fuse_putback_request(struct fuse_conn *fc, struct fuse_req *req) |
131 | { | |
0cd5b885 MS |
132 | if (req->preallocated) { |
133 | atomic_dec(&fc->num_waiting); | |
334f485d | 134 | list_add(&req->list, &fc->unused_list); |
0cd5b885 | 135 | } else |
334f485d MS |
136 | fuse_request_free(req); |
137 | ||
138 | /* If we are in debt decrease that first */ | |
139 | if (fc->outstanding_debt) | |
140 | fc->outstanding_debt--; | |
141 | else | |
142 | up(&fc->outstanding_sem); | |
334f485d MS |
143 | } |
144 | ||
145 | void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req) | |
7128ec2a MS |
146 | { |
147 | if (atomic_dec_and_test(&req->count)) { | |
148 | spin_lock(&fuse_lock); | |
149 | fuse_putback_request(fc, req); | |
150 | spin_unlock(&fuse_lock); | |
151 | } | |
152 | } | |
153 | ||
154 | static void fuse_put_request_locked(struct fuse_conn *fc, struct fuse_req *req) | |
334f485d MS |
155 | { |
156 | if (atomic_dec_and_test(&req->count)) | |
157 | fuse_putback_request(fc, req); | |
158 | } | |
159 | ||
1e9a4ed9 MS |
160 | void fuse_release_background(struct fuse_req *req) |
161 | { | |
162 | iput(req->inode); | |
163 | iput(req->inode2); | |
164 | if (req->file) | |
165 | fput(req->file); | |
166 | spin_lock(&fuse_lock); | |
167 | list_del(&req->bg_entry); | |
168 | spin_unlock(&fuse_lock); | |
169 | } | |
170 | ||
334f485d MS |
171 | /* |
172 | * This function is called when a request is finished. Either a reply | |
173 | * has arrived or it was interrupted (and not yet sent) or some error | |
f43b155a MS |
174 | * occurred during communication with userspace, or the device file |
175 | * was closed. In case of a background request the reference to the | |
176 | * stored objects are released. The requester thread is woken up (if | |
64c6d8ed MS |
177 | * still waiting), the 'end' callback is called if given, else the |
178 | * reference to the request is released | |
334f485d | 179 | * |
7128ec2a MS |
180 | * Releasing extra reference for foreground requests must be done |
181 | * within the same locked region as setting state to finished. This | |
182 | * is because fuse_reset_request() may be called after request is | |
183 | * finished and it must be the sole possessor. If request is | |
184 | * interrupted and put in the background, it will return with an error | |
185 | * and hence never be reset and reused. | |
186 | * | |
334f485d MS |
187 | * Called with fuse_lock, unlocks it |
188 | */ | |
189 | static void request_end(struct fuse_conn *fc, struct fuse_req *req) | |
190 | { | |
d77a1d5b | 191 | list_del(&req->list); |
83cfd493 | 192 | req->state = FUSE_REQ_FINISHED; |
7128ec2a MS |
193 | if (!req->background) { |
194 | wake_up(&req->waitq); | |
195 | fuse_put_request_locked(fc, req); | |
196 | spin_unlock(&fuse_lock); | |
197 | } else { | |
198 | void (*end) (struct fuse_conn *, struct fuse_req *) = req->end; | |
199 | req->end = NULL; | |
200 | spin_unlock(&fuse_lock); | |
1e9a4ed9 MS |
201 | down_read(&fc->sbput_sem); |
202 | if (fc->mounted) | |
203 | fuse_release_background(req); | |
204 | up_read(&fc->sbput_sem); | |
7128ec2a MS |
205 | if (end) |
206 | end(fc, req); | |
207 | else | |
208 | fuse_put_request(fc, req); | |
334f485d | 209 | } |
334f485d MS |
210 | } |
211 | ||
1e9a4ed9 MS |
212 | /* |
213 | * Unfortunately request interruption not just solves the deadlock | |
214 | * problem, it causes problems too. These stem from the fact, that an | |
215 | * interrupted request is continued to be processed in userspace, | |
216 | * while all the locks and object references (inode and file) held | |
217 | * during the operation are released. | |
218 | * | |
219 | * To release the locks is exactly why there's a need to interrupt the | |
220 | * request, so there's not a lot that can be done about this, except | |
221 | * introduce additional locking in userspace. | |
222 | * | |
223 | * More important is to keep inode and file references until userspace | |
224 | * has replied, otherwise FORGET and RELEASE could be sent while the | |
225 | * inode/file is still used by the filesystem. | |
226 | * | |
227 | * For this reason the concept of "background" request is introduced. | |
228 | * An interrupted request is backgrounded if it has been already sent | |
229 | * to userspace. Backgrounding involves getting an extra reference to | |
230 | * inode(s) or file used in the request, and adding the request to | |
231 | * fc->background list. When a reply is received for a background | |
232 | * request, the object references are released, and the request is | |
233 | * removed from the list. If the filesystem is unmounted while there | |
234 | * are still background requests, the list is walked and references | |
235 | * are released as if a reply was received. | |
236 | * | |
237 | * There's one more use for a background request. The RELEASE message is | |
238 | * always sent as background, since it doesn't return an error or | |
239 | * data. | |
240 | */ | |
241 | static void background_request(struct fuse_conn *fc, struct fuse_req *req) | |
334f485d | 242 | { |
334f485d | 243 | req->background = 1; |
1e9a4ed9 | 244 | list_add(&req->bg_entry, &fc->background); |
334f485d MS |
245 | if (req->inode) |
246 | req->inode = igrab(req->inode); | |
247 | if (req->inode2) | |
248 | req->inode2 = igrab(req->inode2); | |
249 | if (req->file) | |
250 | get_file(req->file); | |
251 | } | |
252 | ||
334f485d | 253 | /* Called with fuse_lock held. Releases, and then reacquires it. */ |
7c352bdf | 254 | static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) |
334f485d | 255 | { |
7c352bdf | 256 | sigset_t oldset; |
334f485d MS |
257 | |
258 | spin_unlock(&fuse_lock); | |
7c352bdf | 259 | block_sigs(&oldset); |
83cfd493 | 260 | wait_event_interruptible(req->waitq, req->state == FUSE_REQ_FINISHED); |
7c352bdf | 261 | restore_sigs(&oldset); |
334f485d | 262 | spin_lock(&fuse_lock); |
69a53bf2 | 263 | if (req->state == FUSE_REQ_FINISHED && !req->interrupted) |
334f485d MS |
264 | return; |
265 | ||
69a53bf2 MS |
266 | if (!req->interrupted) { |
267 | req->out.h.error = -EINTR; | |
268 | req->interrupted = 1; | |
269 | } | |
334f485d MS |
270 | if (req->locked) { |
271 | /* This is uninterruptible sleep, because data is | |
272 | being copied to/from the buffers of req. During | |
273 | locked state, there mustn't be any filesystem | |
274 | operation (e.g. page fault), since that could lead | |
275 | to deadlock */ | |
276 | spin_unlock(&fuse_lock); | |
277 | wait_event(req->waitq, !req->locked); | |
278 | spin_lock(&fuse_lock); | |
279 | } | |
83cfd493 | 280 | if (req->state == FUSE_REQ_PENDING) { |
334f485d MS |
281 | list_del(&req->list); |
282 | __fuse_put_request(req); | |
83cfd493 | 283 | } else if (req->state == FUSE_REQ_SENT) |
1e9a4ed9 | 284 | background_request(fc, req); |
334f485d MS |
285 | } |
286 | ||
287 | static unsigned len_args(unsigned numargs, struct fuse_arg *args) | |
288 | { | |
289 | unsigned nbytes = 0; | |
290 | unsigned i; | |
291 | ||
292 | for (i = 0; i < numargs; i++) | |
293 | nbytes += args[i].size; | |
294 | ||
295 | return nbytes; | |
296 | } | |
297 | ||
298 | static void queue_request(struct fuse_conn *fc, struct fuse_req *req) | |
299 | { | |
300 | fc->reqctr++; | |
301 | /* zero is special */ | |
302 | if (fc->reqctr == 0) | |
303 | fc->reqctr = 1; | |
304 | req->in.h.unique = fc->reqctr; | |
305 | req->in.h.len = sizeof(struct fuse_in_header) + | |
306 | len_args(req->in.numargs, (struct fuse_arg *) req->in.args); | |
307 | if (!req->preallocated) { | |
308 | /* If request is not preallocated (either FORGET or | |
309 | RELEASE), then still decrease outstanding_sem, so | |
310 | user can't open infinite number of files while not | |
311 | processing the RELEASE requests. However for | |
312 | efficiency do it without blocking, so if down() | |
313 | would block, just increase the debt instead */ | |
314 | if (down_trylock(&fc->outstanding_sem)) | |
315 | fc->outstanding_debt++; | |
316 | } | |
317 | list_add_tail(&req->list, &fc->pending); | |
83cfd493 | 318 | req->state = FUSE_REQ_PENDING; |
334f485d | 319 | wake_up(&fc->waitq); |
385a17bf | 320 | kill_fasync(&fc->fasync, SIGIO, POLL_IN); |
334f485d MS |
321 | } |
322 | ||
7c352bdf MS |
323 | /* |
324 | * This can only be interrupted by a SIGKILL | |
325 | */ | |
326 | void request_send(struct fuse_conn *fc, struct fuse_req *req) | |
334f485d MS |
327 | { |
328 | req->isreply = 1; | |
329 | spin_lock(&fuse_lock); | |
1e9a4ed9 | 330 | if (!fc->connected) |
334f485d MS |
331 | req->out.h.error = -ENOTCONN; |
332 | else if (fc->conn_error) | |
333 | req->out.h.error = -ECONNREFUSED; | |
334 | else { | |
335 | queue_request(fc, req); | |
336 | /* acquire extra reference, since request is still needed | |
337 | after request_end() */ | |
338 | __fuse_get_request(req); | |
339 | ||
7c352bdf | 340 | request_wait_answer(fc, req); |
334f485d MS |
341 | } |
342 | spin_unlock(&fuse_lock); | |
343 | } | |
344 | ||
334f485d MS |
345 | static void request_send_nowait(struct fuse_conn *fc, struct fuse_req *req) |
346 | { | |
347 | spin_lock(&fuse_lock); | |
1e9a4ed9 | 348 | if (fc->connected) { |
334f485d MS |
349 | queue_request(fc, req); |
350 | spin_unlock(&fuse_lock); | |
351 | } else { | |
352 | req->out.h.error = -ENOTCONN; | |
353 | request_end(fc, req); | |
354 | } | |
355 | } | |
356 | ||
357 | void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req) | |
358 | { | |
359 | req->isreply = 0; | |
360 | request_send_nowait(fc, req); | |
361 | } | |
362 | ||
363 | void request_send_background(struct fuse_conn *fc, struct fuse_req *req) | |
364 | { | |
365 | req->isreply = 1; | |
1e9a4ed9 MS |
366 | spin_lock(&fuse_lock); |
367 | background_request(fc, req); | |
368 | spin_unlock(&fuse_lock); | |
334f485d MS |
369 | request_send_nowait(fc, req); |
370 | } | |
371 | ||
334f485d MS |
372 | /* |
373 | * Lock the request. Up to the next unlock_request() there mustn't be | |
374 | * anything that could cause a page-fault. If the request was already | |
375 | * interrupted bail out. | |
376 | */ | |
8bfc016d | 377 | static int lock_request(struct fuse_req *req) |
334f485d MS |
378 | { |
379 | int err = 0; | |
380 | if (req) { | |
381 | spin_lock(&fuse_lock); | |
382 | if (req->interrupted) | |
383 | err = -ENOENT; | |
384 | else | |
385 | req->locked = 1; | |
386 | spin_unlock(&fuse_lock); | |
387 | } | |
388 | return err; | |
389 | } | |
390 | ||
391 | /* | |
392 | * Unlock request. If it was interrupted during being locked, the | |
393 | * requester thread is currently waiting for it to be unlocked, so | |
394 | * wake it up. | |
395 | */ | |
8bfc016d | 396 | static void unlock_request(struct fuse_req *req) |
334f485d MS |
397 | { |
398 | if (req) { | |
399 | spin_lock(&fuse_lock); | |
400 | req->locked = 0; | |
401 | if (req->interrupted) | |
402 | wake_up(&req->waitq); | |
403 | spin_unlock(&fuse_lock); | |
404 | } | |
405 | } | |
406 | ||
407 | struct fuse_copy_state { | |
408 | int write; | |
409 | struct fuse_req *req; | |
410 | const struct iovec *iov; | |
411 | unsigned long nr_segs; | |
412 | unsigned long seglen; | |
413 | unsigned long addr; | |
414 | struct page *pg; | |
415 | void *mapaddr; | |
416 | void *buf; | |
417 | unsigned len; | |
418 | }; | |
419 | ||
420 | static void fuse_copy_init(struct fuse_copy_state *cs, int write, | |
421 | struct fuse_req *req, const struct iovec *iov, | |
422 | unsigned long nr_segs) | |
423 | { | |
424 | memset(cs, 0, sizeof(*cs)); | |
425 | cs->write = write; | |
426 | cs->req = req; | |
427 | cs->iov = iov; | |
428 | cs->nr_segs = nr_segs; | |
429 | } | |
430 | ||
431 | /* Unmap and put previous page of userspace buffer */ | |
8bfc016d | 432 | static void fuse_copy_finish(struct fuse_copy_state *cs) |
334f485d MS |
433 | { |
434 | if (cs->mapaddr) { | |
435 | kunmap_atomic(cs->mapaddr, KM_USER0); | |
436 | if (cs->write) { | |
437 | flush_dcache_page(cs->pg); | |
438 | set_page_dirty_lock(cs->pg); | |
439 | } | |
440 | put_page(cs->pg); | |
441 | cs->mapaddr = NULL; | |
442 | } | |
443 | } | |
444 | ||
445 | /* | |
446 | * Get another pagefull of userspace buffer, and map it to kernel | |
447 | * address space, and lock request | |
448 | */ | |
449 | static int fuse_copy_fill(struct fuse_copy_state *cs) | |
450 | { | |
451 | unsigned long offset; | |
452 | int err; | |
453 | ||
454 | unlock_request(cs->req); | |
455 | fuse_copy_finish(cs); | |
456 | if (!cs->seglen) { | |
457 | BUG_ON(!cs->nr_segs); | |
458 | cs->seglen = cs->iov[0].iov_len; | |
459 | cs->addr = (unsigned long) cs->iov[0].iov_base; | |
460 | cs->iov ++; | |
461 | cs->nr_segs --; | |
462 | } | |
463 | down_read(¤t->mm->mmap_sem); | |
464 | err = get_user_pages(current, current->mm, cs->addr, 1, cs->write, 0, | |
465 | &cs->pg, NULL); | |
466 | up_read(¤t->mm->mmap_sem); | |
467 | if (err < 0) | |
468 | return err; | |
469 | BUG_ON(err != 1); | |
470 | offset = cs->addr % PAGE_SIZE; | |
471 | cs->mapaddr = kmap_atomic(cs->pg, KM_USER0); | |
472 | cs->buf = cs->mapaddr + offset; | |
473 | cs->len = min(PAGE_SIZE - offset, cs->seglen); | |
474 | cs->seglen -= cs->len; | |
475 | cs->addr += cs->len; | |
476 | ||
477 | return lock_request(cs->req); | |
478 | } | |
479 | ||
480 | /* Do as much copy to/from userspace buffer as we can */ | |
8bfc016d | 481 | static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size) |
334f485d MS |
482 | { |
483 | unsigned ncpy = min(*size, cs->len); | |
484 | if (val) { | |
485 | if (cs->write) | |
486 | memcpy(cs->buf, *val, ncpy); | |
487 | else | |
488 | memcpy(*val, cs->buf, ncpy); | |
489 | *val += ncpy; | |
490 | } | |
491 | *size -= ncpy; | |
492 | cs->len -= ncpy; | |
493 | cs->buf += ncpy; | |
494 | return ncpy; | |
495 | } | |
496 | ||
497 | /* | |
498 | * Copy a page in the request to/from the userspace buffer. Must be | |
499 | * done atomically | |
500 | */ | |
8bfc016d MS |
501 | static int fuse_copy_page(struct fuse_copy_state *cs, struct page *page, |
502 | unsigned offset, unsigned count, int zeroing) | |
334f485d MS |
503 | { |
504 | if (page && zeroing && count < PAGE_SIZE) { | |
505 | void *mapaddr = kmap_atomic(page, KM_USER1); | |
506 | memset(mapaddr, 0, PAGE_SIZE); | |
507 | kunmap_atomic(mapaddr, KM_USER1); | |
508 | } | |
509 | while (count) { | |
510 | int err; | |
511 | if (!cs->len && (err = fuse_copy_fill(cs))) | |
512 | return err; | |
513 | if (page) { | |
514 | void *mapaddr = kmap_atomic(page, KM_USER1); | |
515 | void *buf = mapaddr + offset; | |
516 | offset += fuse_copy_do(cs, &buf, &count); | |
517 | kunmap_atomic(mapaddr, KM_USER1); | |
518 | } else | |
519 | offset += fuse_copy_do(cs, NULL, &count); | |
520 | } | |
521 | if (page && !cs->write) | |
522 | flush_dcache_page(page); | |
523 | return 0; | |
524 | } | |
525 | ||
526 | /* Copy pages in the request to/from userspace buffer */ | |
527 | static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes, | |
528 | int zeroing) | |
529 | { | |
530 | unsigned i; | |
531 | struct fuse_req *req = cs->req; | |
532 | unsigned offset = req->page_offset; | |
533 | unsigned count = min(nbytes, (unsigned) PAGE_SIZE - offset); | |
534 | ||
535 | for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) { | |
536 | struct page *page = req->pages[i]; | |
537 | int err = fuse_copy_page(cs, page, offset, count, zeroing); | |
538 | if (err) | |
539 | return err; | |
540 | ||
541 | nbytes -= count; | |
542 | count = min(nbytes, (unsigned) PAGE_SIZE); | |
543 | offset = 0; | |
544 | } | |
545 | return 0; | |
546 | } | |
547 | ||
548 | /* Copy a single argument in the request to/from userspace buffer */ | |
549 | static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size) | |
550 | { | |
551 | while (size) { | |
552 | int err; | |
553 | if (!cs->len && (err = fuse_copy_fill(cs))) | |
554 | return err; | |
555 | fuse_copy_do(cs, &val, &size); | |
556 | } | |
557 | return 0; | |
558 | } | |
559 | ||
560 | /* Copy request arguments to/from userspace buffer */ | |
561 | static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs, | |
562 | unsigned argpages, struct fuse_arg *args, | |
563 | int zeroing) | |
564 | { | |
565 | int err = 0; | |
566 | unsigned i; | |
567 | ||
568 | for (i = 0; !err && i < numargs; i++) { | |
569 | struct fuse_arg *arg = &args[i]; | |
570 | if (i == numargs - 1 && argpages) | |
571 | err = fuse_copy_pages(cs, arg->size, zeroing); | |
572 | else | |
573 | err = fuse_copy_one(cs, arg->value, arg->size); | |
574 | } | |
575 | return err; | |
576 | } | |
577 | ||
578 | /* Wait until a request is available on the pending list */ | |
579 | static void request_wait(struct fuse_conn *fc) | |
580 | { | |
581 | DECLARE_WAITQUEUE(wait, current); | |
582 | ||
583 | add_wait_queue_exclusive(&fc->waitq, &wait); | |
9ba7cbba | 584 | while (fc->connected && list_empty(&fc->pending)) { |
334f485d MS |
585 | set_current_state(TASK_INTERRUPTIBLE); |
586 | if (signal_pending(current)) | |
587 | break; | |
588 | ||
589 | spin_unlock(&fuse_lock); | |
590 | schedule(); | |
591 | spin_lock(&fuse_lock); | |
592 | } | |
593 | set_current_state(TASK_RUNNING); | |
594 | remove_wait_queue(&fc->waitq, &wait); | |
595 | } | |
596 | ||
597 | /* | |
598 | * Read a single request into the userspace filesystem's buffer. This | |
599 | * function waits until a request is available, then removes it from | |
600 | * the pending list and copies request data to userspace buffer. If | |
601 | * no reply is needed (FORGET) or request has been interrupted or | |
602 | * there was an error during the copying then it's finished by calling | |
603 | * request_end(). Otherwise add it to the processing list, and set | |
604 | * the 'sent' flag. | |
605 | */ | |
606 | static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov, | |
607 | unsigned long nr_segs, loff_t *off) | |
608 | { | |
609 | int err; | |
610 | struct fuse_conn *fc; | |
611 | struct fuse_req *req; | |
612 | struct fuse_in *in; | |
613 | struct fuse_copy_state cs; | |
614 | unsigned reqsize; | |
615 | ||
1d3d752b | 616 | restart: |
334f485d MS |
617 | spin_lock(&fuse_lock); |
618 | fc = file->private_data; | |
619 | err = -EPERM; | |
620 | if (!fc) | |
621 | goto err_unlock; | |
e5ac1d1e JD |
622 | |
623 | err = -EAGAIN; | |
624 | if ((file->f_flags & O_NONBLOCK) && fc->connected && | |
625 | list_empty(&fc->pending)) | |
626 | goto err_unlock; | |
627 | ||
334f485d MS |
628 | request_wait(fc); |
629 | err = -ENODEV; | |
9ba7cbba | 630 | if (!fc->connected) |
334f485d MS |
631 | goto err_unlock; |
632 | err = -ERESTARTSYS; | |
633 | if (list_empty(&fc->pending)) | |
634 | goto err_unlock; | |
635 | ||
636 | req = list_entry(fc->pending.next, struct fuse_req, list); | |
83cfd493 | 637 | req->state = FUSE_REQ_READING; |
d77a1d5b | 638 | list_move(&req->list, &fc->io); |
334f485d MS |
639 | |
640 | in = &req->in; | |
1d3d752b MS |
641 | reqsize = in->h.len; |
642 | /* If request is too large, reply with an error and restart the read */ | |
643 | if (iov_length(iov, nr_segs) < reqsize) { | |
644 | req->out.h.error = -EIO; | |
645 | /* SETXATTR is special, since it may contain too large data */ | |
646 | if (in->h.opcode == FUSE_SETXATTR) | |
647 | req->out.h.error = -E2BIG; | |
648 | request_end(fc, req); | |
649 | goto restart; | |
334f485d | 650 | } |
1d3d752b MS |
651 | spin_unlock(&fuse_lock); |
652 | fuse_copy_init(&cs, 1, req, iov, nr_segs); | |
653 | err = fuse_copy_one(&cs, &in->h, sizeof(in->h)); | |
654 | if (!err) | |
655 | err = fuse_copy_args(&cs, in->numargs, in->argpages, | |
656 | (struct fuse_arg *) in->args, 0); | |
334f485d | 657 | fuse_copy_finish(&cs); |
334f485d MS |
658 | spin_lock(&fuse_lock); |
659 | req->locked = 0; | |
660 | if (!err && req->interrupted) | |
661 | err = -ENOENT; | |
662 | if (err) { | |
663 | if (!req->interrupted) | |
664 | req->out.h.error = -EIO; | |
665 | request_end(fc, req); | |
666 | return err; | |
667 | } | |
668 | if (!req->isreply) | |
669 | request_end(fc, req); | |
670 | else { | |
83cfd493 | 671 | req->state = FUSE_REQ_SENT; |
d77a1d5b | 672 | list_move_tail(&req->list, &fc->processing); |
334f485d MS |
673 | spin_unlock(&fuse_lock); |
674 | } | |
675 | return reqsize; | |
676 | ||
677 | err_unlock: | |
678 | spin_unlock(&fuse_lock); | |
679 | return err; | |
680 | } | |
681 | ||
682 | static ssize_t fuse_dev_read(struct file *file, char __user *buf, | |
683 | size_t nbytes, loff_t *off) | |
684 | { | |
685 | struct iovec iov; | |
686 | iov.iov_len = nbytes; | |
687 | iov.iov_base = buf; | |
688 | return fuse_dev_readv(file, &iov, 1, off); | |
689 | } | |
690 | ||
691 | /* Look up request on processing list by unique ID */ | |
692 | static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique) | |
693 | { | |
694 | struct list_head *entry; | |
695 | ||
696 | list_for_each(entry, &fc->processing) { | |
697 | struct fuse_req *req; | |
698 | req = list_entry(entry, struct fuse_req, list); | |
699 | if (req->in.h.unique == unique) | |
700 | return req; | |
701 | } | |
702 | return NULL; | |
703 | } | |
704 | ||
705 | static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out, | |
706 | unsigned nbytes) | |
707 | { | |
708 | unsigned reqsize = sizeof(struct fuse_out_header); | |
709 | ||
710 | if (out->h.error) | |
711 | return nbytes != reqsize ? -EINVAL : 0; | |
712 | ||
713 | reqsize += len_args(out->numargs, out->args); | |
714 | ||
715 | if (reqsize < nbytes || (reqsize > nbytes && !out->argvar)) | |
716 | return -EINVAL; | |
717 | else if (reqsize > nbytes) { | |
718 | struct fuse_arg *lastarg = &out->args[out->numargs-1]; | |
719 | unsigned diffsize = reqsize - nbytes; | |
720 | if (diffsize > lastarg->size) | |
721 | return -EINVAL; | |
722 | lastarg->size -= diffsize; | |
723 | } | |
724 | return fuse_copy_args(cs, out->numargs, out->argpages, out->args, | |
725 | out->page_zeroing); | |
726 | } | |
727 | ||
728 | /* | |
729 | * Write a single reply to a request. First the header is copied from | |
730 | * the write buffer. The request is then searched on the processing | |
731 | * list by the unique ID found in the header. If found, then remove | |
732 | * it from the list and copy the rest of the buffer to the request. | |
733 | * The request is finished by calling request_end() | |
734 | */ | |
735 | static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov, | |
736 | unsigned long nr_segs, loff_t *off) | |
737 | { | |
738 | int err; | |
739 | unsigned nbytes = iov_length(iov, nr_segs); | |
740 | struct fuse_req *req; | |
741 | struct fuse_out_header oh; | |
742 | struct fuse_copy_state cs; | |
743 | struct fuse_conn *fc = fuse_get_conn(file); | |
744 | if (!fc) | |
745 | return -ENODEV; | |
746 | ||
747 | fuse_copy_init(&cs, 0, NULL, iov, nr_segs); | |
748 | if (nbytes < sizeof(struct fuse_out_header)) | |
749 | return -EINVAL; | |
750 | ||
751 | err = fuse_copy_one(&cs, &oh, sizeof(oh)); | |
752 | if (err) | |
753 | goto err_finish; | |
754 | err = -EINVAL; | |
755 | if (!oh.unique || oh.error <= -1000 || oh.error > 0 || | |
756 | oh.len != nbytes) | |
757 | goto err_finish; | |
758 | ||
759 | spin_lock(&fuse_lock); | |
69a53bf2 MS |
760 | err = -ENOENT; |
761 | if (!fc->connected) | |
762 | goto err_unlock; | |
763 | ||
334f485d MS |
764 | req = request_find(fc, oh.unique); |
765 | err = -EINVAL; | |
766 | if (!req) | |
767 | goto err_unlock; | |
768 | ||
334f485d | 769 | if (req->interrupted) { |
222f1d69 | 770 | spin_unlock(&fuse_lock); |
334f485d | 771 | fuse_copy_finish(&cs); |
222f1d69 MS |
772 | spin_lock(&fuse_lock); |
773 | request_end(fc, req); | |
334f485d MS |
774 | return -ENOENT; |
775 | } | |
d77a1d5b | 776 | list_move(&req->list, &fc->io); |
334f485d MS |
777 | req->out.h = oh; |
778 | req->locked = 1; | |
779 | cs.req = req; | |
780 | spin_unlock(&fuse_lock); | |
781 | ||
782 | err = copy_out_args(&cs, &req->out, nbytes); | |
783 | fuse_copy_finish(&cs); | |
784 | ||
785 | spin_lock(&fuse_lock); | |
786 | req->locked = 0; | |
787 | if (!err) { | |
788 | if (req->interrupted) | |
789 | err = -ENOENT; | |
790 | } else if (!req->interrupted) | |
791 | req->out.h.error = -EIO; | |
792 | request_end(fc, req); | |
793 | ||
794 | return err ? err : nbytes; | |
795 | ||
796 | err_unlock: | |
797 | spin_unlock(&fuse_lock); | |
798 | err_finish: | |
799 | fuse_copy_finish(&cs); | |
800 | return err; | |
801 | } | |
802 | ||
803 | static ssize_t fuse_dev_write(struct file *file, const char __user *buf, | |
804 | size_t nbytes, loff_t *off) | |
805 | { | |
806 | struct iovec iov; | |
807 | iov.iov_len = nbytes; | |
808 | iov.iov_base = (char __user *) buf; | |
809 | return fuse_dev_writev(file, &iov, 1, off); | |
810 | } | |
811 | ||
812 | static unsigned fuse_dev_poll(struct file *file, poll_table *wait) | |
813 | { | |
334f485d | 814 | unsigned mask = POLLOUT | POLLWRNORM; |
7025d9ad | 815 | struct fuse_conn *fc = fuse_get_conn(file); |
334f485d | 816 | if (!fc) |
7025d9ad | 817 | return POLLERR; |
334f485d MS |
818 | |
819 | poll_wait(file, &fc->waitq, wait); | |
820 | ||
821 | spin_lock(&fuse_lock); | |
7025d9ad MS |
822 | if (!fc->connected) |
823 | mask = POLLERR; | |
824 | else if (!list_empty(&fc->pending)) | |
825 | mask |= POLLIN | POLLRDNORM; | |
334f485d MS |
826 | spin_unlock(&fuse_lock); |
827 | ||
828 | return mask; | |
829 | } | |
830 | ||
69a53bf2 MS |
831 | /* |
832 | * Abort all requests on the given list (pending or processing) | |
833 | * | |
834 | * This function releases and reacquires fuse_lock | |
835 | */ | |
334f485d MS |
836 | static void end_requests(struct fuse_conn *fc, struct list_head *head) |
837 | { | |
838 | while (!list_empty(head)) { | |
839 | struct fuse_req *req; | |
840 | req = list_entry(head->next, struct fuse_req, list); | |
334f485d MS |
841 | req->out.h.error = -ECONNABORTED; |
842 | request_end(fc, req); | |
843 | spin_lock(&fuse_lock); | |
844 | } | |
845 | } | |
846 | ||
69a53bf2 MS |
847 | /* |
848 | * Abort requests under I/O | |
849 | * | |
850 | * The requests are set to interrupted and finished, and the request | |
851 | * waiter is woken up. This will make request_wait_answer() wait | |
852 | * until the request is unlocked and then return. | |
64c6d8ed MS |
853 | * |
854 | * If the request is asynchronous, then the end function needs to be | |
855 | * called after waiting for the request to be unlocked (if it was | |
856 | * locked). | |
69a53bf2 MS |
857 | */ |
858 | static void end_io_requests(struct fuse_conn *fc) | |
859 | { | |
860 | while (!list_empty(&fc->io)) { | |
64c6d8ed MS |
861 | struct fuse_req *req = |
862 | list_entry(fc->io.next, struct fuse_req, list); | |
863 | void (*end) (struct fuse_conn *, struct fuse_req *) = req->end; | |
864 | ||
69a53bf2 MS |
865 | req->interrupted = 1; |
866 | req->out.h.error = -ECONNABORTED; | |
867 | req->state = FUSE_REQ_FINISHED; | |
868 | list_del_init(&req->list); | |
869 | wake_up(&req->waitq); | |
64c6d8ed MS |
870 | if (end) { |
871 | req->end = NULL; | |
872 | /* The end function will consume this reference */ | |
873 | __fuse_get_request(req); | |
874 | spin_unlock(&fuse_lock); | |
875 | wait_event(req->waitq, !req->locked); | |
876 | end(fc, req); | |
877 | spin_lock(&fuse_lock); | |
878 | } | |
69a53bf2 MS |
879 | } |
880 | } | |
881 | ||
882 | /* | |
883 | * Abort all requests. | |
884 | * | |
885 | * Emergency exit in case of a malicious or accidental deadlock, or | |
886 | * just a hung filesystem. | |
887 | * | |
888 | * The same effect is usually achievable through killing the | |
889 | * filesystem daemon and all users of the filesystem. The exception | |
890 | * is the combination of an asynchronous request and the tricky | |
891 | * deadlock (see Documentation/filesystems/fuse.txt). | |
892 | * | |
893 | * During the aborting, progression of requests from the pending and | |
894 | * processing lists onto the io list, and progression of new requests | |
895 | * onto the pending list is prevented by req->connected being false. | |
896 | * | |
897 | * Progression of requests under I/O to the processing list is | |
898 | * prevented by the req->interrupted flag being true for these | |
899 | * requests. For this reason requests on the io list must be aborted | |
900 | * first. | |
901 | */ | |
902 | void fuse_abort_conn(struct fuse_conn *fc) | |
903 | { | |
904 | spin_lock(&fuse_lock); | |
905 | if (fc->connected) { | |
906 | fc->connected = 0; | |
907 | end_io_requests(fc); | |
908 | end_requests(fc, &fc->pending); | |
909 | end_requests(fc, &fc->processing); | |
910 | wake_up_all(&fc->waitq); | |
385a17bf | 911 | kill_fasync(&fc->fasync, SIGIO, POLL_IN); |
69a53bf2 MS |
912 | } |
913 | spin_unlock(&fuse_lock); | |
914 | } | |
915 | ||
334f485d MS |
916 | static int fuse_dev_release(struct inode *inode, struct file *file) |
917 | { | |
918 | struct fuse_conn *fc; | |
919 | ||
920 | spin_lock(&fuse_lock); | |
921 | fc = file->private_data; | |
922 | if (fc) { | |
1e9a4ed9 | 923 | fc->connected = 0; |
334f485d MS |
924 | end_requests(fc, &fc->pending); |
925 | end_requests(fc, &fc->processing); | |
334f485d MS |
926 | } |
927 | spin_unlock(&fuse_lock); | |
385a17bf JD |
928 | if (fc) { |
929 | fasync_helper(-1, file, 0, &fc->fasync); | |
f543f253 | 930 | kobject_put(&fc->kobj); |
385a17bf | 931 | } |
f543f253 | 932 | |
334f485d MS |
933 | return 0; |
934 | } | |
935 | ||
385a17bf JD |
936 | static int fuse_dev_fasync(int fd, struct file *file, int on) |
937 | { | |
938 | struct fuse_conn *fc = fuse_get_conn(file); | |
939 | if (!fc) | |
940 | return -ENODEV; | |
941 | ||
942 | /* No locking - fasync_helper does its own locking */ | |
943 | return fasync_helper(fd, file, on, &fc->fasync); | |
944 | } | |
945 | ||
4b6f5d20 | 946 | const struct file_operations fuse_dev_operations = { |
334f485d MS |
947 | .owner = THIS_MODULE, |
948 | .llseek = no_llseek, | |
949 | .read = fuse_dev_read, | |
950 | .readv = fuse_dev_readv, | |
951 | .write = fuse_dev_write, | |
952 | .writev = fuse_dev_writev, | |
953 | .poll = fuse_dev_poll, | |
954 | .release = fuse_dev_release, | |
385a17bf | 955 | .fasync = fuse_dev_fasync, |
334f485d MS |
956 | }; |
957 | ||
958 | static struct miscdevice fuse_miscdevice = { | |
959 | .minor = FUSE_MINOR, | |
960 | .name = "fuse", | |
961 | .fops = &fuse_dev_operations, | |
962 | }; | |
963 | ||
964 | int __init fuse_dev_init(void) | |
965 | { | |
966 | int err = -ENOMEM; | |
967 | fuse_req_cachep = kmem_cache_create("fuse_request", | |
968 | sizeof(struct fuse_req), | |
969 | 0, 0, NULL, NULL); | |
970 | if (!fuse_req_cachep) | |
971 | goto out; | |
972 | ||
973 | err = misc_register(&fuse_miscdevice); | |
974 | if (err) | |
975 | goto out_cache_clean; | |
976 | ||
977 | return 0; | |
978 | ||
979 | out_cache_clean: | |
980 | kmem_cache_destroy(fuse_req_cachep); | |
981 | out: | |
982 | return err; | |
983 | } | |
984 | ||
985 | void fuse_dev_cleanup(void) | |
986 | { | |
987 | misc_deregister(&fuse_miscdevice); | |
988 | kmem_cache_destroy(fuse_req_cachep); | |
989 | } |