[PATCH] splice: improve writeback and clean up page stealing
[deliverable/linux.git] / fs / pipe.c
1 /*
2 * linux/fs/pipe.c
3 *
4 * Copyright (C) 1991, 1992, 1999 Linus Torvalds
5 */
6
7 #include <linux/mm.h>
8 #include <linux/file.h>
9 #include <linux/poll.h>
10 #include <linux/slab.h>
11 #include <linux/module.h>
12 #include <linux/init.h>
13 #include <linux/fs.h>
14 #include <linux/mount.h>
15 #include <linux/pipe_fs_i.h>
16 #include <linux/uio.h>
17 #include <linux/highmem.h>
18 #include <linux/pagemap.h>
19
20 #include <asm/uaccess.h>
21 #include <asm/ioctls.h>
22
23 /*
24 * We use a start+len construction, which provides full use of the
25 * allocated memory.
26 * -- Florian Coosmann (FGC)
27 *
28 * Reads with count = 0 should always return 0.
29 * -- Julian Bradfield 1999-06-07.
30 *
31 * FIFOs and Pipes now generate SIGIO for both readers and writers.
32 * -- Jeremy Elson <jelson@circlemud.org> 2001-08-16
33 *
34 * pipe_read & write cleanup
35 * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
36 */
37
38 /* Drop the inode semaphore and wait for a pipe event, atomically */
39 void pipe_wait(struct inode * inode)
40 {
41 DEFINE_WAIT(wait);
42
43 /*
44 * Pipes are system-local resources, so sleeping on them
45 * is considered a noninteractive wait:
46 */
47 prepare_to_wait(PIPE_WAIT(*inode), &wait, TASK_INTERRUPTIBLE|TASK_NONINTERACTIVE);
48 mutex_unlock(PIPE_MUTEX(*inode));
49 schedule();
50 finish_wait(PIPE_WAIT(*inode), &wait);
51 mutex_lock(PIPE_MUTEX(*inode));
52 }
53
54 static int
55 pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len)
56 {
57 unsigned long copy;
58
59 while (len > 0) {
60 while (!iov->iov_len)
61 iov++;
62 copy = min_t(unsigned long, len, iov->iov_len);
63
64 if (copy_from_user(to, iov->iov_base, copy))
65 return -EFAULT;
66 to += copy;
67 len -= copy;
68 iov->iov_base += copy;
69 iov->iov_len -= copy;
70 }
71 return 0;
72 }
73
74 static int
75 pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len)
76 {
77 unsigned long copy;
78
79 while (len > 0) {
80 while (!iov->iov_len)
81 iov++;
82 copy = min_t(unsigned long, len, iov->iov_len);
83
84 if (copy_to_user(iov->iov_base, from, copy))
85 return -EFAULT;
86 from += copy;
87 len -= copy;
88 iov->iov_base += copy;
89 iov->iov_len -= copy;
90 }
91 return 0;
92 }
93
94 static void anon_pipe_buf_release(struct pipe_inode_info *info, struct pipe_buffer *buf)
95 {
96 struct page *page = buf->page;
97
98 /*
99 * If nobody else uses this page, and we don't already have a
100 * temporary page, let's keep track of it as a one-deep
101 * allocation cache
102 */
103 if (page_count(page) == 1 && !info->tmp_page) {
104 info->tmp_page = page;
105 return;
106 }
107
108 /*
109 * Otherwise just release our reference to it
110 */
111 page_cache_release(page);
112 }
113
114 static void *anon_pipe_buf_map(struct file *file, struct pipe_inode_info *info, struct pipe_buffer *buf)
115 {
116 return kmap(buf->page);
117 }
118
119 static void anon_pipe_buf_unmap(struct pipe_inode_info *info, struct pipe_buffer *buf)
120 {
121 kunmap(buf->page);
122 }
123
124 static int anon_pipe_buf_steal(struct pipe_inode_info *info,
125 struct pipe_buffer *buf)
126 {
127 return 0;
128 }
129
130 static struct pipe_buf_operations anon_pipe_buf_ops = {
131 .can_merge = 1,
132 .map = anon_pipe_buf_map,
133 .unmap = anon_pipe_buf_unmap,
134 .release = anon_pipe_buf_release,
135 .steal = anon_pipe_buf_steal,
136 };
137
138 static ssize_t
139 pipe_readv(struct file *filp, const struct iovec *_iov,
140 unsigned long nr_segs, loff_t *ppos)
141 {
142 struct inode *inode = filp->f_dentry->d_inode;
143 struct pipe_inode_info *info;
144 int do_wakeup;
145 ssize_t ret;
146 struct iovec *iov = (struct iovec *)_iov;
147 size_t total_len;
148
149 total_len = iov_length(iov, nr_segs);
150 /* Null read succeeds. */
151 if (unlikely(total_len == 0))
152 return 0;
153
154 do_wakeup = 0;
155 ret = 0;
156 mutex_lock(PIPE_MUTEX(*inode));
157 info = inode->i_pipe;
158 for (;;) {
159 int bufs = info->nrbufs;
160 if (bufs) {
161 int curbuf = info->curbuf;
162 struct pipe_buffer *buf = info->bufs + curbuf;
163 struct pipe_buf_operations *ops = buf->ops;
164 void *addr;
165 size_t chars = buf->len;
166 int error;
167
168 if (chars > total_len)
169 chars = total_len;
170
171 addr = ops->map(filp, info, buf);
172 if (IS_ERR(addr)) {
173 if (!ret)
174 ret = PTR_ERR(addr);
175 break;
176 }
177 error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars);
178 ops->unmap(info, buf);
179 if (unlikely(error)) {
180 if (!ret) ret = -EFAULT;
181 break;
182 }
183 ret += chars;
184 buf->offset += chars;
185 buf->len -= chars;
186 if (!buf->len) {
187 buf->ops = NULL;
188 ops->release(info, buf);
189 curbuf = (curbuf + 1) & (PIPE_BUFFERS-1);
190 info->curbuf = curbuf;
191 info->nrbufs = --bufs;
192 do_wakeup = 1;
193 }
194 total_len -= chars;
195 if (!total_len)
196 break; /* common path: read succeeded */
197 }
198 if (bufs) /* More to do? */
199 continue;
200 if (!PIPE_WRITERS(*inode))
201 break;
202 if (!PIPE_WAITING_WRITERS(*inode)) {
203 /* syscall merging: Usually we must not sleep
204 * if O_NONBLOCK is set, or if we got some data.
205 * But if a writer sleeps in kernel space, then
206 * we can wait for that data without violating POSIX.
207 */
208 if (ret)
209 break;
210 if (filp->f_flags & O_NONBLOCK) {
211 ret = -EAGAIN;
212 break;
213 }
214 }
215 if (signal_pending(current)) {
216 if (!ret) ret = -ERESTARTSYS;
217 break;
218 }
219 if (do_wakeup) {
220 wake_up_interruptible_sync(PIPE_WAIT(*inode));
221 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
222 }
223 pipe_wait(inode);
224 }
225 mutex_unlock(PIPE_MUTEX(*inode));
226 /* Signal writers asynchronously that there is more room. */
227 if (do_wakeup) {
228 wake_up_interruptible(PIPE_WAIT(*inode));
229 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
230 }
231 if (ret > 0)
232 file_accessed(filp);
233 return ret;
234 }
235
236 static ssize_t
237 pipe_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
238 {
239 struct iovec iov = { .iov_base = buf, .iov_len = count };
240 return pipe_readv(filp, &iov, 1, ppos);
241 }
242
243 static ssize_t
244 pipe_writev(struct file *filp, const struct iovec *_iov,
245 unsigned long nr_segs, loff_t *ppos)
246 {
247 struct inode *inode = filp->f_dentry->d_inode;
248 struct pipe_inode_info *info;
249 ssize_t ret;
250 int do_wakeup;
251 struct iovec *iov = (struct iovec *)_iov;
252 size_t total_len;
253 ssize_t chars;
254
255 total_len = iov_length(iov, nr_segs);
256 /* Null write succeeds. */
257 if (unlikely(total_len == 0))
258 return 0;
259
260 do_wakeup = 0;
261 ret = 0;
262 mutex_lock(PIPE_MUTEX(*inode));
263 info = inode->i_pipe;
264
265 if (!PIPE_READERS(*inode)) {
266 send_sig(SIGPIPE, current, 0);
267 ret = -EPIPE;
268 goto out;
269 }
270
271 /* We try to merge small writes */
272 chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */
273 if (info->nrbufs && chars != 0) {
274 int lastbuf = (info->curbuf + info->nrbufs - 1) & (PIPE_BUFFERS-1);
275 struct pipe_buffer *buf = info->bufs + lastbuf;
276 struct pipe_buf_operations *ops = buf->ops;
277 int offset = buf->offset + buf->len;
278 if (ops->can_merge && offset + chars <= PAGE_SIZE) {
279 void *addr;
280 int error;
281
282 addr = ops->map(filp, info, buf);
283 if (IS_ERR(addr)) {
284 error = PTR_ERR(addr);
285 goto out;
286 }
287 error = pipe_iov_copy_from_user(offset + addr, iov,
288 chars);
289 ops->unmap(info, buf);
290 ret = error;
291 do_wakeup = 1;
292 if (error)
293 goto out;
294 buf->len += chars;
295 total_len -= chars;
296 ret = chars;
297 if (!total_len)
298 goto out;
299 }
300 }
301
302 for (;;) {
303 int bufs;
304 if (!PIPE_READERS(*inode)) {
305 send_sig(SIGPIPE, current, 0);
306 if (!ret) ret = -EPIPE;
307 break;
308 }
309 bufs = info->nrbufs;
310 if (bufs < PIPE_BUFFERS) {
311 int newbuf = (info->curbuf + bufs) & (PIPE_BUFFERS-1);
312 struct pipe_buffer *buf = info->bufs + newbuf;
313 struct page *page = info->tmp_page;
314 int error;
315
316 if (!page) {
317 page = alloc_page(GFP_HIGHUSER);
318 if (unlikely(!page)) {
319 ret = ret ? : -ENOMEM;
320 break;
321 }
322 info->tmp_page = page;
323 }
324 /* Always wakeup, even if the copy fails. Otherwise
325 * we lock up (O_NONBLOCK-)readers that sleep due to
326 * syscall merging.
327 * FIXME! Is this really true?
328 */
329 do_wakeup = 1;
330 chars = PAGE_SIZE;
331 if (chars > total_len)
332 chars = total_len;
333
334 error = pipe_iov_copy_from_user(kmap(page), iov, chars);
335 kunmap(page);
336 if (unlikely(error)) {
337 if (!ret) ret = -EFAULT;
338 break;
339 }
340 ret += chars;
341
342 /* Insert it into the buffer array */
343 buf->page = page;
344 buf->ops = &anon_pipe_buf_ops;
345 buf->offset = 0;
346 buf->len = chars;
347 info->nrbufs = ++bufs;
348 info->tmp_page = NULL;
349
350 total_len -= chars;
351 if (!total_len)
352 break;
353 }
354 if (bufs < PIPE_BUFFERS)
355 continue;
356 if (filp->f_flags & O_NONBLOCK) {
357 if (!ret) ret = -EAGAIN;
358 break;
359 }
360 if (signal_pending(current)) {
361 if (!ret) ret = -ERESTARTSYS;
362 break;
363 }
364 if (do_wakeup) {
365 wake_up_interruptible_sync(PIPE_WAIT(*inode));
366 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
367 do_wakeup = 0;
368 }
369 PIPE_WAITING_WRITERS(*inode)++;
370 pipe_wait(inode);
371 PIPE_WAITING_WRITERS(*inode)--;
372 }
373 out:
374 mutex_unlock(PIPE_MUTEX(*inode));
375 if (do_wakeup) {
376 wake_up_interruptible(PIPE_WAIT(*inode));
377 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
378 }
379 if (ret > 0)
380 file_update_time(filp);
381 return ret;
382 }
383
384 static ssize_t
385 pipe_write(struct file *filp, const char __user *buf,
386 size_t count, loff_t *ppos)
387 {
388 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
389 return pipe_writev(filp, &iov, 1, ppos);
390 }
391
392 static ssize_t
393 bad_pipe_r(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
394 {
395 return -EBADF;
396 }
397
398 static ssize_t
399 bad_pipe_w(struct file *filp, const char __user *buf, size_t count, loff_t *ppos)
400 {
401 return -EBADF;
402 }
403
404 static int
405 pipe_ioctl(struct inode *pino, struct file *filp,
406 unsigned int cmd, unsigned long arg)
407 {
408 struct inode *inode = filp->f_dentry->d_inode;
409 struct pipe_inode_info *info;
410 int count, buf, nrbufs;
411
412 switch (cmd) {
413 case FIONREAD:
414 mutex_lock(PIPE_MUTEX(*inode));
415 info = inode->i_pipe;
416 count = 0;
417 buf = info->curbuf;
418 nrbufs = info->nrbufs;
419 while (--nrbufs >= 0) {
420 count += info->bufs[buf].len;
421 buf = (buf+1) & (PIPE_BUFFERS-1);
422 }
423 mutex_unlock(PIPE_MUTEX(*inode));
424 return put_user(count, (int __user *)arg);
425 default:
426 return -EINVAL;
427 }
428 }
429
430 /* No kernel lock held - fine */
431 static unsigned int
432 pipe_poll(struct file *filp, poll_table *wait)
433 {
434 unsigned int mask;
435 struct inode *inode = filp->f_dentry->d_inode;
436 struct pipe_inode_info *info = inode->i_pipe;
437 int nrbufs;
438
439 poll_wait(filp, PIPE_WAIT(*inode), wait);
440
441 /* Reading only -- no need for acquiring the semaphore. */
442 nrbufs = info->nrbufs;
443 mask = 0;
444 if (filp->f_mode & FMODE_READ) {
445 mask = (nrbufs > 0) ? POLLIN | POLLRDNORM : 0;
446 if (!PIPE_WRITERS(*inode) && filp->f_version != PIPE_WCOUNTER(*inode))
447 mask |= POLLHUP;
448 }
449
450 if (filp->f_mode & FMODE_WRITE) {
451 mask |= (nrbufs < PIPE_BUFFERS) ? POLLOUT | POLLWRNORM : 0;
452 /*
453 * Most Unices do not set POLLERR for FIFOs but on Linux they
454 * behave exactly like pipes for poll().
455 */
456 if (!PIPE_READERS(*inode))
457 mask |= POLLERR;
458 }
459
460 return mask;
461 }
462
463 static int
464 pipe_release(struct inode *inode, int decr, int decw)
465 {
466 mutex_lock(PIPE_MUTEX(*inode));
467 PIPE_READERS(*inode) -= decr;
468 PIPE_WRITERS(*inode) -= decw;
469 if (!PIPE_READERS(*inode) && !PIPE_WRITERS(*inode)) {
470 free_pipe_info(inode);
471 } else {
472 wake_up_interruptible(PIPE_WAIT(*inode));
473 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
474 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
475 }
476 mutex_unlock(PIPE_MUTEX(*inode));
477
478 return 0;
479 }
480
481 static int
482 pipe_read_fasync(int fd, struct file *filp, int on)
483 {
484 struct inode *inode = filp->f_dentry->d_inode;
485 int retval;
486
487 mutex_lock(PIPE_MUTEX(*inode));
488 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode));
489 mutex_unlock(PIPE_MUTEX(*inode));
490
491 if (retval < 0)
492 return retval;
493
494 return 0;
495 }
496
497
498 static int
499 pipe_write_fasync(int fd, struct file *filp, int on)
500 {
501 struct inode *inode = filp->f_dentry->d_inode;
502 int retval;
503
504 mutex_lock(PIPE_MUTEX(*inode));
505 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode));
506 mutex_unlock(PIPE_MUTEX(*inode));
507
508 if (retval < 0)
509 return retval;
510
511 return 0;
512 }
513
514
515 static int
516 pipe_rdwr_fasync(int fd, struct file *filp, int on)
517 {
518 struct inode *inode = filp->f_dentry->d_inode;
519 int retval;
520
521 mutex_lock(PIPE_MUTEX(*inode));
522
523 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode));
524
525 if (retval >= 0)
526 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode));
527
528 mutex_unlock(PIPE_MUTEX(*inode));
529
530 if (retval < 0)
531 return retval;
532
533 return 0;
534 }
535
536
537 static int
538 pipe_read_release(struct inode *inode, struct file *filp)
539 {
540 pipe_read_fasync(-1, filp, 0);
541 return pipe_release(inode, 1, 0);
542 }
543
544 static int
545 pipe_write_release(struct inode *inode, struct file *filp)
546 {
547 pipe_write_fasync(-1, filp, 0);
548 return pipe_release(inode, 0, 1);
549 }
550
551 static int
552 pipe_rdwr_release(struct inode *inode, struct file *filp)
553 {
554 int decr, decw;
555
556 pipe_rdwr_fasync(-1, filp, 0);
557 decr = (filp->f_mode & FMODE_READ) != 0;
558 decw = (filp->f_mode & FMODE_WRITE) != 0;
559 return pipe_release(inode, decr, decw);
560 }
561
562 static int
563 pipe_read_open(struct inode *inode, struct file *filp)
564 {
565 /* We could have perhaps used atomic_t, but this and friends
566 below are the only places. So it doesn't seem worthwhile. */
567 mutex_lock(PIPE_MUTEX(*inode));
568 PIPE_READERS(*inode)++;
569 mutex_unlock(PIPE_MUTEX(*inode));
570
571 return 0;
572 }
573
574 static int
575 pipe_write_open(struct inode *inode, struct file *filp)
576 {
577 mutex_lock(PIPE_MUTEX(*inode));
578 PIPE_WRITERS(*inode)++;
579 mutex_unlock(PIPE_MUTEX(*inode));
580
581 return 0;
582 }
583
584 static int
585 pipe_rdwr_open(struct inode *inode, struct file *filp)
586 {
587 mutex_lock(PIPE_MUTEX(*inode));
588 if (filp->f_mode & FMODE_READ)
589 PIPE_READERS(*inode)++;
590 if (filp->f_mode & FMODE_WRITE)
591 PIPE_WRITERS(*inode)++;
592 mutex_unlock(PIPE_MUTEX(*inode));
593
594 return 0;
595 }
596
597 /*
598 * The file_operations structs are not static because they
599 * are also used in linux/fs/fifo.c to do operations on FIFOs.
600 */
601 const struct file_operations read_fifo_fops = {
602 .llseek = no_llseek,
603 .read = pipe_read,
604 .readv = pipe_readv,
605 .write = bad_pipe_w,
606 .poll = pipe_poll,
607 .ioctl = pipe_ioctl,
608 .open = pipe_read_open,
609 .release = pipe_read_release,
610 .fasync = pipe_read_fasync,
611 };
612
613 const struct file_operations write_fifo_fops = {
614 .llseek = no_llseek,
615 .read = bad_pipe_r,
616 .write = pipe_write,
617 .writev = pipe_writev,
618 .poll = pipe_poll,
619 .ioctl = pipe_ioctl,
620 .open = pipe_write_open,
621 .release = pipe_write_release,
622 .fasync = pipe_write_fasync,
623 };
624
625 const struct file_operations rdwr_fifo_fops = {
626 .llseek = no_llseek,
627 .read = pipe_read,
628 .readv = pipe_readv,
629 .write = pipe_write,
630 .writev = pipe_writev,
631 .poll = pipe_poll,
632 .ioctl = pipe_ioctl,
633 .open = pipe_rdwr_open,
634 .release = pipe_rdwr_release,
635 .fasync = pipe_rdwr_fasync,
636 };
637
638 static struct file_operations read_pipe_fops = {
639 .llseek = no_llseek,
640 .read = pipe_read,
641 .readv = pipe_readv,
642 .write = bad_pipe_w,
643 .poll = pipe_poll,
644 .ioctl = pipe_ioctl,
645 .open = pipe_read_open,
646 .release = pipe_read_release,
647 .fasync = pipe_read_fasync,
648 };
649
650 static struct file_operations write_pipe_fops = {
651 .llseek = no_llseek,
652 .read = bad_pipe_r,
653 .write = pipe_write,
654 .writev = pipe_writev,
655 .poll = pipe_poll,
656 .ioctl = pipe_ioctl,
657 .open = pipe_write_open,
658 .release = pipe_write_release,
659 .fasync = pipe_write_fasync,
660 };
661
662 static struct file_operations rdwr_pipe_fops = {
663 .llseek = no_llseek,
664 .read = pipe_read,
665 .readv = pipe_readv,
666 .write = pipe_write,
667 .writev = pipe_writev,
668 .poll = pipe_poll,
669 .ioctl = pipe_ioctl,
670 .open = pipe_rdwr_open,
671 .release = pipe_rdwr_release,
672 .fasync = pipe_rdwr_fasync,
673 };
674
675 void free_pipe_info(struct inode *inode)
676 {
677 int i;
678 struct pipe_inode_info *info = inode->i_pipe;
679
680 inode->i_pipe = NULL;
681 for (i = 0; i < PIPE_BUFFERS; i++) {
682 struct pipe_buffer *buf = info->bufs + i;
683 if (buf->ops)
684 buf->ops->release(info, buf);
685 }
686 if (info->tmp_page)
687 __free_page(info->tmp_page);
688 kfree(info);
689 }
690
691 struct inode* pipe_new(struct inode* inode)
692 {
693 struct pipe_inode_info *info;
694
695 info = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
696 if (!info)
697 goto fail_page;
698 inode->i_pipe = info;
699
700 init_waitqueue_head(PIPE_WAIT(*inode));
701 PIPE_RCOUNTER(*inode) = PIPE_WCOUNTER(*inode) = 1;
702
703 return inode;
704 fail_page:
705 return NULL;
706 }
707
708 static struct vfsmount *pipe_mnt __read_mostly;
709 static int pipefs_delete_dentry(struct dentry *dentry)
710 {
711 return 1;
712 }
713 static struct dentry_operations pipefs_dentry_operations = {
714 .d_delete = pipefs_delete_dentry,
715 };
716
717 static struct inode * get_pipe_inode(void)
718 {
719 struct inode *inode = new_inode(pipe_mnt->mnt_sb);
720
721 if (!inode)
722 goto fail_inode;
723
724 if(!pipe_new(inode))
725 goto fail_iput;
726 PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1;
727 inode->i_fop = &rdwr_pipe_fops;
728
729 /*
730 * Mark the inode dirty from the very beginning,
731 * that way it will never be moved to the dirty
732 * list because "mark_inode_dirty()" will think
733 * that it already _is_ on the dirty list.
734 */
735 inode->i_state = I_DIRTY;
736 inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
737 inode->i_uid = current->fsuid;
738 inode->i_gid = current->fsgid;
739 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
740 inode->i_blksize = PAGE_SIZE;
741 return inode;
742
743 fail_iput:
744 iput(inode);
745 fail_inode:
746 return NULL;
747 }
748
749 int do_pipe(int *fd)
750 {
751 struct qstr this;
752 char name[32];
753 struct dentry *dentry;
754 struct inode * inode;
755 struct file *f1, *f2;
756 int error;
757 int i,j;
758
759 error = -ENFILE;
760 f1 = get_empty_filp();
761 if (!f1)
762 goto no_files;
763
764 f2 = get_empty_filp();
765 if (!f2)
766 goto close_f1;
767
768 inode = get_pipe_inode();
769 if (!inode)
770 goto close_f12;
771
772 error = get_unused_fd();
773 if (error < 0)
774 goto close_f12_inode;
775 i = error;
776
777 error = get_unused_fd();
778 if (error < 0)
779 goto close_f12_inode_i;
780 j = error;
781
782 error = -ENOMEM;
783 sprintf(name, "[%lu]", inode->i_ino);
784 this.name = name;
785 this.len = strlen(name);
786 this.hash = inode->i_ino; /* will go */
787 dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &this);
788 if (!dentry)
789 goto close_f12_inode_i_j;
790 dentry->d_op = &pipefs_dentry_operations;
791 d_add(dentry, inode);
792 f1->f_vfsmnt = f2->f_vfsmnt = mntget(mntget(pipe_mnt));
793 f1->f_dentry = f2->f_dentry = dget(dentry);
794 f1->f_mapping = f2->f_mapping = inode->i_mapping;
795
796 /* read file */
797 f1->f_pos = f2->f_pos = 0;
798 f1->f_flags = O_RDONLY;
799 f1->f_op = &read_pipe_fops;
800 f1->f_mode = FMODE_READ;
801 f1->f_version = 0;
802
803 /* write file */
804 f2->f_flags = O_WRONLY;
805 f2->f_op = &write_pipe_fops;
806 f2->f_mode = FMODE_WRITE;
807 f2->f_version = 0;
808
809 fd_install(i, f1);
810 fd_install(j, f2);
811 fd[0] = i;
812 fd[1] = j;
813 return 0;
814
815 close_f12_inode_i_j:
816 put_unused_fd(j);
817 close_f12_inode_i:
818 put_unused_fd(i);
819 close_f12_inode:
820 free_pipe_info(inode);
821 iput(inode);
822 close_f12:
823 put_filp(f2);
824 close_f1:
825 put_filp(f1);
826 no_files:
827 return error;
828 }
829
830 /*
831 * pipefs should _never_ be mounted by userland - too much of security hassle,
832 * no real gain from having the whole whorehouse mounted. So we don't need
833 * any operations on the root directory. However, we need a non-trivial
834 * d_name - pipe: will go nicely and kill the special-casing in procfs.
835 */
836
837 static struct super_block *pipefs_get_sb(struct file_system_type *fs_type,
838 int flags, const char *dev_name, void *data)
839 {
840 return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC);
841 }
842
843 static struct file_system_type pipe_fs_type = {
844 .name = "pipefs",
845 .get_sb = pipefs_get_sb,
846 .kill_sb = kill_anon_super,
847 };
848
849 static int __init init_pipe_fs(void)
850 {
851 int err = register_filesystem(&pipe_fs_type);
852 if (!err) {
853 pipe_mnt = kern_mount(&pipe_fs_type);
854 if (IS_ERR(pipe_mnt)) {
855 err = PTR_ERR(pipe_mnt);
856 unregister_filesystem(&pipe_fs_type);
857 }
858 }
859 return err;
860 }
861
862 static void __exit exit_pipe_fs(void)
863 {
864 unregister_filesystem(&pipe_fs_type);
865 mntput(pipe_mnt);
866 }
867
868 fs_initcall(init_pipe_fs);
869 module_exit(exit_pipe_fs);
This page took 0.049029 seconds and 6 git commands to generate.