Orangefs: Don't wait the old-fashioned way.
[deliverable/linux.git] / fs / orangefs / devorangefs-req.c
CommitLineData
5db11c21
MM
1/*
2 * (C) 2001 Clemson University and The University of Chicago
3 *
4 * Changes by Acxiom Corporation to add protocol version to kernel
5 * communication, Copyright Acxiom Corporation, 2005.
6 *
7 * See COPYING in top-level directory.
8 */
9
10#include "protocol.h"
575e9461
MM
11#include "orangefs-kernel.h"
12#include "orangefs-dev-proto.h"
13#include "orangefs-bufmap.h"
5db11c21
MM
14
15#include <linux/debugfs.h>
16#include <linux/slab.h>
17
18/* this file implements the /dev/pvfs2-req device node */
19
20static int open_access_count;
21
22#define DUMP_DEVICE_ERROR() \
23do { \
24 gossip_err("*****************************************************\n");\
8bb8aefd 25 gossip_err("ORANGEFS Device Error: You cannot open the device file "); \
5db11c21 26 gossip_err("\n/dev/%s more than once. Please make sure that\nthere " \
8bb8aefd 27 "are no ", ORANGEFS_REQDEVICE_NAME); \
5db11c21
MM
28 gossip_err("instances of a program using this device\ncurrently " \
29 "running. (You must verify this!)\n"); \
30 gossip_err("For example, you can use the lsof program as follows:\n");\
31 gossip_err("'lsof | grep %s' (run this as root)\n", \
8bb8aefd 32 ORANGEFS_REQDEVICE_NAME); \
5db11c21
MM
33 gossip_err(" open_access_count = %d\n", open_access_count); \
34 gossip_err("*****************************************************\n");\
35} while (0)
36
37static int hash_func(__u64 tag, int table_size)
38{
2c590d5f 39 return do_div(tag, (unsigned int)table_size);
5db11c21
MM
40}
41
8bb8aefd 42static void orangefs_devreq_add_op(struct orangefs_kernel_op_s *op)
5db11c21
MM
43{
44 int index = hash_func(op->tag, hash_table_size);
45
46 spin_lock(&htable_ops_in_progress_lock);
47 list_add_tail(&op->list, &htable_ops_in_progress[index]);
48 spin_unlock(&htable_ops_in_progress_lock);
49}
50
8bb8aefd 51static struct orangefs_kernel_op_s *orangefs_devreq_remove_op(__u64 tag)
5db11c21 52{
8bb8aefd 53 struct orangefs_kernel_op_s *op, *next;
5db11c21
MM
54 int index;
55
56 index = hash_func(tag, hash_table_size);
57
58 spin_lock(&htable_ops_in_progress_lock);
59 list_for_each_entry_safe(op,
60 next,
61 &htable_ops_in_progress[index],
62 list) {
63 if (op->tag == tag) {
64 list_del(&op->list);
65 spin_unlock(&htable_ops_in_progress_lock);
66 return op;
67 }
68 }
69
70 spin_unlock(&htable_ops_in_progress_lock);
71 return NULL;
72}
73
8bb8aefd 74static int orangefs_devreq_open(struct inode *inode, struct file *file)
5db11c21
MM
75{
76 int ret = -EINVAL;
77
78 if (!(file->f_flags & O_NONBLOCK)) {
97f10027
MM
79 gossip_err("%s: device cannot be opened in blocking mode\n",
80 __func__);
5db11c21
MM
81 goto out;
82 }
83 ret = -EACCES;
97f10027 84 gossip_debug(GOSSIP_DEV_DEBUG, "client-core: opening device\n");
5db11c21
MM
85 mutex_lock(&devreq_mutex);
86
87 if (open_access_count == 0) {
88 ret = generic_file_open(inode, file);
89 if (ret == 0)
90 open_access_count++;
91 } else {
92 DUMP_DEVICE_ERROR();
93 }
94 mutex_unlock(&devreq_mutex);
95
96out:
97
98 gossip_debug(GOSSIP_DEV_DEBUG,
99 "pvfs2-client-core: open device complete (ret = %d)\n",
100 ret);
101 return ret;
102}
103
97f10027 104/* Function for read() callers into the device */
8bb8aefd 105static ssize_t orangefs_devreq_read(struct file *file,
5db11c21
MM
106 char __user *buf,
107 size_t count, loff_t *offset)
108{
8bb8aefd
YL
109 struct orangefs_kernel_op_s *op, *temp;
110 __s32 proto_ver = ORANGEFS_KERNEL_PROTO_VERSION;
111 static __s32 magic = ORANGEFS_DEVREQ_MAGIC;
112 struct orangefs_kernel_op_s *cur_op = NULL;
24c8d080 113 unsigned long ret;
5db11c21 114
24c8d080 115 /* We do not support blocking IO. */
5db11c21 116 if (!(file->f_flags & O_NONBLOCK)) {
97f10027
MM
117 gossip_err("%s: blocking read from client-core.\n",
118 __func__);
5db11c21 119 return -EINVAL;
24c8d080
MB
120 }
121
122 /*
123 * The client will do an ioctl to find MAX_ALIGNED_DEV_REQ_UPSIZE, then
124 * always read with that size buffer.
125 */
126 if (count != MAX_ALIGNED_DEV_REQ_UPSIZE) {
127 gossip_err("orangefs: client-core tried to read wrong size\n");
128 return -EINVAL;
129 }
130
131 /* Get next op (if any) from top of list. */
8bb8aefd
YL
132 spin_lock(&orangefs_request_list_lock);
133 list_for_each_entry_safe(op, temp, &orangefs_request_list, list) {
24c8d080
MB
134 __s32 fsid;
135 /* This lock is held past the end of the loop when we break. */
136 spin_lock(&op->lock);
137
138 fsid = fsid_of_op(op);
8bb8aefd 139 if (fsid != ORANGEFS_FS_ID_NULL) {
24c8d080
MB
140 int ret;
141 /* Skip ops whose filesystem needs to be mounted. */
142 ret = fs_mount_pending(fsid);
143 if (ret == 1) {
5db11c21 144 gossip_debug(GOSSIP_DEV_DEBUG,
24c8d080
MB
145 "orangefs: skipping op tag %llu %s\n",
146 llu(op->tag), get_opname_string(op));
147 spin_unlock(&op->lock);
148 continue;
97f10027
MM
149 /*
150 * Skip ops whose filesystem we don't know about unless
151 * it is being mounted.
152 */
24c8d080
MB
153 /* XXX: is there a better way to detect this? */
154 } else if (ret == -1 &&
97f10027
MM
155 !(op->upcall.type ==
156 ORANGEFS_VFS_OP_FS_MOUNT ||
157 op->upcall.type ==
158 ORANGEFS_VFS_OP_GETATTR)) {
24c8d080
MB
159 gossip_debug(GOSSIP_DEV_DEBUG,
160 "orangefs: skipping op tag %llu %s\n",
161 llu(op->tag), get_opname_string(op));
162 gossip_err(
163 "orangefs: ERROR: fs_mount_pending %d\n",
164 fsid);
165 spin_unlock(&op->lock);
5db11c21 166 continue;
5db11c21
MM
167 }
168 }
24c8d080
MB
169 /*
170 * Either this op does not pertain to a filesystem, is mounting
171 * a filesystem, or pertains to a mounted filesystem. Let it
172 * through.
173 */
174 cur_op = op;
175 break;
176 }
177
178 /*
179 * At this point we either have a valid op and can continue or have not
180 * found an op and must ask the client to try again later.
181 */
182 if (!cur_op) {
8bb8aefd 183 spin_unlock(&orangefs_request_list_lock);
24c8d080 184 return -EAGAIN;
5db11c21
MM
185 }
186
24c8d080
MB
187 gossip_debug(GOSSIP_DEV_DEBUG, "orangefs: reading op tag %llu %s\n",
188 llu(cur_op->tag), get_opname_string(cur_op));
5db11c21 189
24c8d080
MB
190 /*
191 * Such an op should never be on the list in the first place. If so, we
192 * will abort.
193 */
194 if (op_state_in_progress(cur_op) || op_state_serviced(cur_op)) {
195 gossip_err("orangefs: ERROR: Current op already queued.\n");
196 list_del(&cur_op->list);
5db11c21 197 spin_unlock(&cur_op->lock);
8bb8aefd 198 spin_unlock(&orangefs_request_list_lock);
24c8d080 199 return -EAGAIN;
5db11c21 200 }
24c8d080
MB
201
202 /*
203 * Set the operation to be in progress and move it between lists since
204 * it has been sent to the client.
205 */
206 set_op_state_inprogress(cur_op);
207
208 list_del(&cur_op->list);
8bb8aefd
YL
209 spin_unlock(&orangefs_request_list_lock);
210 orangefs_devreq_add_op(cur_op);
24c8d080
MB
211 spin_unlock(&cur_op->lock);
212
213 /* Push the upcall out. */
214 ret = copy_to_user(buf, &proto_ver, sizeof(__s32));
215 if (ret != 0)
216 goto error;
217 ret = copy_to_user(buf+sizeof(__s32), &magic, sizeof(__s32));
218 if (ret != 0)
219 goto error;
220 ret = copy_to_user(buf+2 * sizeof(__s32), &cur_op->tag, sizeof(__u64));
221 if (ret != 0)
222 goto error;
223 ret = copy_to_user(buf+2*sizeof(__s32)+sizeof(__u64), &cur_op->upcall,
8bb8aefd 224 sizeof(struct orangefs_upcall_s));
24c8d080
MB
225 if (ret != 0)
226 goto error;
227
228 /* The client only asks to read one size buffer. */
229 return MAX_ALIGNED_DEV_REQ_UPSIZE;
230error:
231 /*
232 * We were unable to copy the op data to the client. Put the op back in
233 * list. If client has crashed, the op will be purged later when the
234 * device is released.
235 */
236 gossip_err("orangefs: Failed to copy data to user space\n");
8bb8aefd 237 spin_lock(&orangefs_request_list_lock);
24c8d080
MB
238 spin_lock(&cur_op->lock);
239 set_op_state_waiting(cur_op);
8bb8aefd
YL
240 orangefs_devreq_remove_op(cur_op->tag);
241 list_add(&cur_op->list, &orangefs_request_list);
24c8d080 242 spin_unlock(&cur_op->lock);
8bb8aefd 243 spin_unlock(&orangefs_request_list_lock);
24c8d080 244 return -EFAULT;
5db11c21
MM
245}
246
97f10027
MM
247/*
248 * Function for writev() callers into the device. Readdir related
249 * operations have an extra iovec containing info about objects
250 * contained in directories.
251 */
8bb8aefd 252static ssize_t orangefs_devreq_writev(struct file *file,
5db11c21
MM
253 const struct iovec *iov,
254 size_t count,
255 loff_t *offset)
256{
8bb8aefd 257 struct orangefs_kernel_op_s *op = NULL;
5db11c21
MM
258 void *buffer = NULL;
259 void *ptr = NULL;
260 unsigned long i = 0;
97f10027
MM
261 int num_remaining = MAX_ALIGNED_DEV_REQ_DOWNSIZE;
262 int ret = 0;
263 /* num elements in iovec without trailer */
264 int notrailer_count = 4;
265 /*
266 * If there's a trailer, its iov index will be equal to
267 * notrailer_count.
268 */
269 int trailer_index = notrailer_count;
5db11c21 270 int payload_size = 0;
97f10027 271 int returned_downcall_size = 0;
5db11c21
MM
272 __s32 magic = 0;
273 __s32 proto_ver = 0;
274 __u64 tag = 0;
275 ssize_t total_returned_size = 0;
276
97f10027
MM
277 /*
278 * There will always be at least notrailer_count iovecs, and
279 * when there's a trailer, one more than notrailer_count. Check
280 * count's sanity.
281 */
5db11c21 282 if (count != notrailer_count && count != (notrailer_count + 1)) {
97f10027
MM
283 gossip_err("%s: count:%zu: notrailer_count :%d:\n",
284 __func__,
5db11c21
MM
285 count,
286 notrailer_count);
287 return -EPROTO;
288 }
97f10027
MM
289
290
291 /* Copy the non-trailer iovec data into a device request buffer. */
5db11c21 292 buffer = dev_req_alloc();
97f10027
MM
293 if (!buffer) {
294 gossip_err("%s: dev_req_alloc failed.\n", __func__);
5db11c21 295 return -ENOMEM;
97f10027 296 }
5db11c21 297 ptr = buffer;
5db11c21
MM
298 for (i = 0; i < notrailer_count; i++) {
299 if (iov[i].iov_len > num_remaining) {
300 gossip_err
301 ("writev error: Freeing buffer and returning\n");
302 dev_req_release(buffer);
303 return -EMSGSIZE;
304 }
305 ret = copy_from_user(ptr, iov[i].iov_base, iov[i].iov_len);
306 if (ret) {
307 gossip_err("Failed to copy data from user space\n");
308 dev_req_release(buffer);
309 return -EIO;
310 }
311 num_remaining -= iov[i].iov_len;
312 ptr += iov[i].iov_len;
313 payload_size += iov[i].iov_len;
314 }
315 total_returned_size = payload_size;
316
317 /* these elements are currently 8 byte aligned (8 bytes for (version +
318 * magic) 8 bytes for tag). If you add another element, either
319 * make it 8 bytes big, or use get_unaligned when asigning.
320 */
321 ptr = buffer;
97f10027 322 proto_ver = *((__s32 *) ptr); /* unused */
5db11c21
MM
323 ptr += sizeof(__s32);
324
325 magic = *((__s32 *) ptr);
326 ptr += sizeof(__s32);
327
328 tag = *((__u64 *) ptr);
329 ptr += sizeof(__u64);
330
8bb8aefd 331 if (magic != ORANGEFS_DEVREQ_MAGIC) {
5db11c21
MM
332 gossip_err("Error: Device magic number does not match.\n");
333 dev_req_release(buffer);
334 return -EPROTO;
335 }
336
8bb8aefd 337 op = orangefs_devreq_remove_op(tag);
5db11c21
MM
338 if (op) {
339 /* Increase ref count! */
340 get_op(op);
97f10027
MM
341
342 /* calculate the size of the returned downcall. */
343 returned_downcall_size =
344 payload_size - (2 * sizeof(__s32) + sizeof(__u64));
345
346 /* copy the passed in downcall into the op */
347 if (returned_downcall_size ==
348 sizeof(struct orangefs_downcall_s)) {
5db11c21
MM
349 memcpy(&op->downcall,
350 ptr,
8bb8aefd 351 sizeof(struct orangefs_downcall_s));
97f10027
MM
352 } else {
353 gossip_err("%s: returned downcall size:%d: \n",
354 __func__,
355 returned_downcall_size);
356 dev_req_release(buffer);
357 put_op(op);
358 return -EMSGSIZE;
359 }
360
361 /* Don't tolerate an unexpected trailer iovec. */
362 if ((op->downcall.trailer_size == 0) &&
363 (count != notrailer_count)) {
364 gossip_err("%s: unexpected trailer iovec.\n",
365 __func__);
366 dev_req_release(buffer);
367 put_op(op);
368 return -EPROTO;
369 }
370
371 /* Don't consider the trailer if there's a bad status. */
372 if (op->downcall.status != 0)
373 goto no_trailer;
374
375 /* get the trailer if there is one. */
376 if (op->downcall.trailer_size == 0)
377 goto no_trailer;
378
379 gossip_debug(GOSSIP_DEV_DEBUG,
380 "%s: op->downcall.trailer_size %lld\n",
381 __func__,
382 op->downcall.trailer_size);
5db11c21 383
97f10027
MM
384 /*
385 * Bail if we think think there should be a trailer, but
386 * there's no iovec for it.
5db11c21 387 */
97f10027
MM
388 if (count != (notrailer_count + 1)) {
389 gossip_err("%s: trailer_size:%lld: count:%zu:\n",
390 __func__,
391 op->downcall.trailer_size,
392 count);
393 dev_req_release(buffer);
394 put_op(op);
395 return -EPROTO;
396 }
397
398 /* Verify that trailer_size is accurate. */
399 if (op->downcall.trailer_size != iov[trailer_index].iov_len) {
400 gossip_err("%s: trailer_size:%lld: != iov_len:%zd:\n",
401 __func__,
402 op->downcall.trailer_size,
403 iov[trailer_index].iov_len);
404 dev_req_release(buffer);
405 put_op(op);
406 return -EMSGSIZE;
407 }
408
409 total_returned_size += iov[trailer_index].iov_len;
410
411 /*
412 * Allocate a buffer, copy the trailer bytes into it and
413 * attach it to the downcall.
414 */
415 op->downcall.trailer_buf = vmalloc(iov[trailer_index].iov_len);
416 if (op->downcall.trailer_buf != NULL) {
417 gossip_debug(GOSSIP_DEV_DEBUG, "vmalloc: %p\n",
418 op->downcall.trailer_buf);
419 ret = copy_from_user(op->downcall.trailer_buf,
420 iov[trailer_index].iov_base,
421 iov[trailer_index].iov_len);
422 if (ret) {
423 gossip_err("%s: Failed to copy trailer.\n",
424 __func__);
5db11c21 425 dev_req_release(buffer);
97f10027
MM
426 gossip_debug(GOSSIP_DEV_DEBUG,
427 "vfree: %p\n",
5db11c21 428 op->downcall.trailer_buf);
97f10027
MM
429 vfree(op->downcall.trailer_buf);
430 op->downcall.trailer_buf = NULL;
431 put_op(op);
432 return -EIO;
5db11c21 433 }
97f10027 434 } else {
97f10027
MM
435 gossip_err("writev: could not vmalloc for trailer!\n");
436 dev_req_release(buffer);
437 put_op(op);
438 return -ENOMEM;
5db11c21
MM
439 }
440
97f10027
MM
441no_trailer:
442
443 /* if this operation is an I/O operation we need to wait
5db11c21
MM
444 * for all data to be copied before we can return to avoid
445 * buffer corruption and races that can pull the buffers
446 * out from under us.
447 *
448 * Essentially we're synchronizing with other parts of the
449 * vfs implicitly by not allowing the user space
450 * application reading/writing this device to return until
451 * the buffers are done being used.
452 */
97f10027 453 if (op->upcall.type == ORANGEFS_VFS_OP_FILE_IO) {
5db11c21 454 int timed_out = 0;
ce6c414e 455 DEFINE_WAIT(wait_entry);
5db11c21 456
97f10027
MM
457 /*
458 * tell the vfs op waiting on a waitqueue
5db11c21
MM
459 * that this op is done
460 */
461 spin_lock(&op->lock);
462 set_op_state_serviced(op);
463 spin_unlock(&op->lock);
464
5db11c21
MM
465 wake_up_interruptible(&op->waitq);
466
467 while (1) {
5db11c21 468 spin_lock(&op->lock);
ce6c414e
MM
469 prepare_to_wait_exclusive(
470 &op->io_completion_waitq,
471 &wait_entry,
472 TASK_INTERRUPTIBLE);
5db11c21
MM
473 if (op->io_completed) {
474 spin_unlock(&op->lock);
475 break;
476 }
477 spin_unlock(&op->lock);
478
479 if (!signal_pending(current)) {
480 int timeout =
481 MSECS_TO_JIFFIES(1000 *
482 op_timeout_secs);
483 if (!schedule_timeout(timeout)) {
97f10027
MM
484 gossip_debug(GOSSIP_DEV_DEBUG,
485 "%s: timed out.\n",
486 __func__);
5db11c21
MM
487 timed_out = 1;
488 break;
489 }
490 continue;
491 }
492
97f10027
MM
493 gossip_debug(GOSSIP_DEV_DEBUG,
494 "%s: signal on I/O wait, aborting\n",
495 __func__);
5db11c21
MM
496 break;
497 }
498
ce6c414e
MM
499 spin_lock(&op->lock);
500 finish_wait(&op->io_completion_waitq, &wait_entry);
501 spin_unlock(&op->lock);
5db11c21
MM
502
503 /* NOTE: for I/O operations we handle releasing the op
504 * object except in the case of timeout. the reason we
505 * can't free the op in timeout cases is that the op
506 * service logic in the vfs retries operations using
507 * the same op ptr, thus it can't be freed.
508 */
509 if (!timed_out)
510 op_release(op);
511 } else {
512
513 /*
514 * tell the vfs op waiting on a waitqueue that
515 * this op is done
516 */
517 spin_lock(&op->lock);
518 set_op_state_serviced(op);
519 spin_unlock(&op->lock);
520 /*
54804949
MM
521 * for every other operation (i.e. non-I/O), we need to
522 * wake up the callers for downcall completion
523 * notification
5db11c21
MM
524 */
525 wake_up_interruptible(&op->waitq);
526 }
527 } else {
528 /* ignore downcalls that we're not interested in */
529 gossip_debug(GOSSIP_DEV_DEBUG,
530 "WARNING: No one's waiting for tag %llu\n",
531 llu(tag));
532 }
97f10027 533 /* put_op? */
5db11c21
MM
534 dev_req_release(buffer);
535
536 return total_returned_size;
537}
538
8bb8aefd 539static ssize_t orangefs_devreq_write_iter(struct kiocb *iocb,
5db11c21
MM
540 struct iov_iter *iter)
541{
8bb8aefd 542 return orangefs_devreq_writev(iocb->ki_filp,
5db11c21
MM
543 iter->iov,
544 iter->nr_segs,
545 &iocb->ki_pos);
546}
547
548/* Returns whether any FS are still pending remounted */
549static int mark_all_pending_mounts(void)
550{
551 int unmounted = 1;
8bb8aefd 552 struct orangefs_sb_info_s *orangefs_sb = NULL;
5db11c21 553
8bb8aefd
YL
554 spin_lock(&orangefs_superblocks_lock);
555 list_for_each_entry(orangefs_sb, &orangefs_superblocks, list) {
5db11c21 556 /* All of these file system require a remount */
8bb8aefd 557 orangefs_sb->mount_pending = 1;
5db11c21
MM
558 unmounted = 0;
559 }
8bb8aefd 560 spin_unlock(&orangefs_superblocks_lock);
5db11c21
MM
561 return unmounted;
562}
563
564/*
565 * Determine if a given file system needs to be remounted or not
566 * Returns -1 on error
567 * 0 if already mounted
568 * 1 if needs remount
569 */
570int fs_mount_pending(__s32 fsid)
571{
572 int mount_pending = -1;
8bb8aefd 573 struct orangefs_sb_info_s *orangefs_sb = NULL;
5db11c21 574
8bb8aefd
YL
575 spin_lock(&orangefs_superblocks_lock);
576 list_for_each_entry(orangefs_sb, &orangefs_superblocks, list) {
577 if (orangefs_sb->fs_id == fsid) {
578 mount_pending = orangefs_sb->mount_pending;
5db11c21
MM
579 break;
580 }
581 }
8bb8aefd 582 spin_unlock(&orangefs_superblocks_lock);
5db11c21
MM
583 return mount_pending;
584}
585
586/*
587 * NOTE: gets called when the last reference to this device is dropped.
588 * Using the open_access_count variable, we enforce a reference count
589 * on this file so that it can be opened by only one process at a time.
590 * the devreq_mutex is used to make sure all i/o has completed
8bb8aefd 591 * before we call orangefs_bufmap_finalize, and similar such tricky
5db11c21
MM
592 * situations
593 */
8bb8aefd 594static int orangefs_devreq_release(struct inode *inode, struct file *file)
5db11c21
MM
595{
596 int unmounted = 0;
597
598 gossip_debug(GOSSIP_DEV_DEBUG,
599 "%s:pvfs2-client-core: exiting, closing device\n",
600 __func__);
601
602 mutex_lock(&devreq_mutex);
8bb8aefd 603 orangefs_bufmap_finalize();
5db11c21
MM
604
605 open_access_count--;
606
607 unmounted = mark_all_pending_mounts();
8bb8aefd 608 gossip_debug(GOSSIP_DEV_DEBUG, "ORANGEFS Device Close: Filesystem(s) %s\n",
5db11c21
MM
609 (unmounted ? "UNMOUNTED" : "MOUNTED"));
610 mutex_unlock(&devreq_mutex);
611
612 /*
613 * Walk through the list of ops in the request list, mark them
614 * as purged and wake them up.
615 */
616 purge_waiting_ops();
617 /*
618 * Walk through the hash table of in progress operations; mark
619 * them as purged and wake them up
620 */
621 purge_inprogress_ops();
622 gossip_debug(GOSSIP_DEV_DEBUG,
623 "pvfs2-client-core: device close complete\n");
624 return 0;
625}
626
627int is_daemon_in_service(void)
628{
629 int in_service;
630
631 /*
632 * What this function does is checks if client-core is alive
633 * based on the access count we maintain on the device.
634 */
635 mutex_lock(&devreq_mutex);
636 in_service = open_access_count == 1 ? 0 : -EIO;
637 mutex_unlock(&devreq_mutex);
638 return in_service;
639}
640
641static inline long check_ioctl_command(unsigned int command)
642{
643 /* Check for valid ioctl codes */
8bb8aefd 644 if (_IOC_TYPE(command) != ORANGEFS_DEV_MAGIC) {
5db11c21
MM
645 gossip_err("device ioctl magic numbers don't match! Did you rebuild pvfs2-client-core/libpvfs2? [cmd %x, magic %x != %x]\n",
646 command,
647 _IOC_TYPE(command),
8bb8aefd 648 ORANGEFS_DEV_MAGIC);
5db11c21
MM
649 return -EINVAL;
650 }
651 /* and valid ioctl commands */
8bb8aefd 652 if (_IOC_NR(command) >= ORANGEFS_DEV_MAXNR || _IOC_NR(command) <= 0) {
5db11c21 653 gossip_err("Invalid ioctl command number [%d >= %d]\n",
8bb8aefd 654 _IOC_NR(command), ORANGEFS_DEV_MAXNR);
5db11c21
MM
655 return -ENOIOCTLCMD;
656 }
657 return 0;
658}
659
660static long dispatch_ioctl_command(unsigned int command, unsigned long arg)
661{
8bb8aefd 662 static __s32 magic = ORANGEFS_DEVREQ_MAGIC;
5db11c21
MM
663 static __s32 max_up_size = MAX_ALIGNED_DEV_REQ_UPSIZE;
664 static __s32 max_down_size = MAX_ALIGNED_DEV_REQ_DOWNSIZE;
8bb8aefd 665 struct ORANGEFS_dev_map_desc user_desc;
5db11c21
MM
666 int ret = 0;
667 struct dev_mask_info_s mask_info = { 0 };
668 struct dev_mask2_info_s mask2_info = { 0, 0 };
669 int upstream_kmod = 1;
670 struct list_head *tmp = NULL;
8bb8aefd 671 struct orangefs_sb_info_s *orangefs_sb = NULL;
5db11c21
MM
672
673 /* mtmoore: add locking here */
674
675 switch (command) {
8bb8aefd 676 case ORANGEFS_DEV_GET_MAGIC:
5db11c21
MM
677 return ((put_user(magic, (__s32 __user *) arg) == -EFAULT) ?
678 -EIO :
679 0);
8bb8aefd 680 case ORANGEFS_DEV_GET_MAX_UPSIZE:
5db11c21
MM
681 return ((put_user(max_up_size,
682 (__s32 __user *) arg) == -EFAULT) ?
683 -EIO :
684 0);
8bb8aefd 685 case ORANGEFS_DEV_GET_MAX_DOWNSIZE:
5db11c21
MM
686 return ((put_user(max_down_size,
687 (__s32 __user *) arg) == -EFAULT) ?
688 -EIO :
689 0);
8bb8aefd 690 case ORANGEFS_DEV_MAP:
5db11c21 691 ret = copy_from_user(&user_desc,
8bb8aefd 692 (struct ORANGEFS_dev_map_desc __user *)
5db11c21 693 arg,
8bb8aefd
YL
694 sizeof(struct ORANGEFS_dev_map_desc));
695 return ret ? -EIO : orangefs_bufmap_initialize(&user_desc);
696 case ORANGEFS_DEV_REMOUNT_ALL:
5db11c21 697 gossip_debug(GOSSIP_DEV_DEBUG,
97f10027
MM
698 "%s: got ORANGEFS_DEV_REMOUNT_ALL\n",
699 __func__);
5db11c21
MM
700
701 /*
8bb8aefd 702 * remount all mounted orangefs volumes to regain the lost
5db11c21
MM
703 * dynamic mount tables (if any) -- NOTE: this is done
704 * without keeping the superblock list locked due to the
705 * upcall/downcall waiting. also, the request semaphore is
706 * used to ensure that no operations will be serviced until
707 * all of the remounts are serviced (to avoid ops between
708 * mounts to fail)
709 */
710 ret = mutex_lock_interruptible(&request_mutex);
711 if (ret < 0)
712 return ret;
713 gossip_debug(GOSSIP_DEV_DEBUG,
97f10027
MM
714 "%s: priority remount in progress\n",
715 __func__);
8bb8aefd
YL
716 list_for_each(tmp, &orangefs_superblocks) {
717 orangefs_sb =
97f10027
MM
718 list_entry(tmp,
719 struct orangefs_sb_info_s,
720 list);
8bb8aefd 721 if (orangefs_sb && (orangefs_sb->sb)) {
5db11c21 722 gossip_debug(GOSSIP_DEV_DEBUG,
97f10027
MM
723 "%s: Remounting SB %p\n",
724 __func__,
8bb8aefd 725 orangefs_sb);
5db11c21 726
8bb8aefd 727 ret = orangefs_remount(orangefs_sb->sb);
5db11c21
MM
728 if (ret) {
729 gossip_debug(GOSSIP_DEV_DEBUG,
730 "SB %p remount failed\n",
8bb8aefd 731 orangefs_sb);
97f10027 732 break;
5db11c21
MM
733 }
734 }
735 }
736 gossip_debug(GOSSIP_DEV_DEBUG,
97f10027
MM
737 "%s: priority remount complete\n",
738 __func__);
5db11c21
MM
739 mutex_unlock(&request_mutex);
740 return ret;
741
8bb8aefd 742 case ORANGEFS_DEV_UPSTREAM:
5db11c21
MM
743 ret = copy_to_user((void __user *)arg,
744 &upstream_kmod,
745 sizeof(upstream_kmod));
746
747 if (ret != 0)
748 return -EIO;
749 else
750 return ret;
751
8bb8aefd 752 case ORANGEFS_DEV_CLIENT_MASK:
5db11c21
MM
753 ret = copy_from_user(&mask2_info,
754 (void __user *)arg,
755 sizeof(struct dev_mask2_info_s));
756
757 if (ret != 0)
758 return -EIO;
759
760 client_debug_mask.mask1 = mask2_info.mask1_value;
761 client_debug_mask.mask2 = mask2_info.mask2_value;
762
763 pr_info("%s: client debug mask has been been received "
764 ":%llx: :%llx:\n",
765 __func__,
766 (unsigned long long)client_debug_mask.mask1,
767 (unsigned long long)client_debug_mask.mask2);
768
769 return ret;
770
8bb8aefd 771 case ORANGEFS_DEV_CLIENT_STRING:
5db11c21
MM
772 ret = copy_from_user(&client_debug_array_string,
773 (void __user *)arg,
8bb8aefd 774 ORANGEFS_MAX_DEBUG_STRING_LEN);
5db11c21 775 if (ret != 0) {
97f10027 776 pr_info("%s: CLIENT_STRING: copy_from_user failed\n",
5db11c21
MM
777 __func__);
778 return -EIO;
779 }
780
97f10027 781 pr_info("%s: client debug array string has been received.\n",
5db11c21
MM
782 __func__);
783
784 if (!help_string_initialized) {
785
786 /* Free the "we don't know yet" default string... */
787 kfree(debug_help_string);
788
789 /* build a proper debug help string */
790 if (orangefs_prepare_debugfs_help_string(0)) {
97f10027 791 gossip_err("%s: no debug help string \n",
5db11c21
MM
792 __func__);
793 return -EIO;
794 }
795
796 /* Replace the boilerplate boot-time debug-help file. */
797 debugfs_remove(help_file_dentry);
798
799 help_file_dentry =
800 debugfs_create_file(
801 ORANGEFS_KMOD_DEBUG_HELP_FILE,
802 0444,
803 debug_dir,
804 debug_help_string,
805 &debug_help_fops);
806
807 if (!help_file_dentry) {
808 gossip_err("%s: debugfs_create_file failed for"
809 " :%s:!\n",
810 __func__,
811 ORANGEFS_KMOD_DEBUG_HELP_FILE);
812 return -EIO;
813 }
814 }
815
816 debug_mask_to_string(&client_debug_mask, 1);
817
818 debugfs_remove(client_debug_dentry);
819
8bb8aefd 820 orangefs_client_debug_init();
5db11c21
MM
821
822 help_string_initialized++;
823
824 return ret;
825
8bb8aefd 826 case ORANGEFS_DEV_DEBUG:
5db11c21
MM
827 ret = copy_from_user(&mask_info,
828 (void __user *)arg,
829 sizeof(mask_info));
830
831 if (ret != 0)
832 return -EIO;
833
834 if (mask_info.mask_type == KERNEL_MASK) {
835 if ((mask_info.mask_value == 0)
836 && (kernel_mask_set_mod_init)) {
837 /*
838 * the kernel debug mask was set when the
839 * kernel module was loaded; don't override
840 * it if the client-core was started without
8bb8aefd 841 * a value for ORANGEFS_KMODMASK.
5db11c21
MM
842 */
843 return 0;
844 }
845 debug_mask_to_string(&mask_info.mask_value,
846 mask_info.mask_type);
847 gossip_debug_mask = mask_info.mask_value;
97f10027 848 pr_info("%s: kernel debug mask has been modified to "
5db11c21 849 ":%s: :%llx:\n",
97f10027 850 __func__,
5db11c21
MM
851 kernel_debug_string,
852 (unsigned long long)gossip_debug_mask);
853 } else if (mask_info.mask_type == CLIENT_MASK) {
854 debug_mask_to_string(&mask_info.mask_value,
855 mask_info.mask_type);
97f10027 856 pr_info("%s: client debug mask has been modified to"
5db11c21 857 ":%s: :%llx:\n",
97f10027 858 __func__,
5db11c21
MM
859 client_debug_string,
860 llu(mask_info.mask_value));
861 } else {
862 gossip_lerr("Invalid mask type....\n");
863 return -EINVAL;
864 }
865
866 return ret;
867
868 default:
869 return -ENOIOCTLCMD;
870 }
871 return -ENOIOCTLCMD;
872}
873
8bb8aefd 874static long orangefs_devreq_ioctl(struct file *file,
5db11c21
MM
875 unsigned int command, unsigned long arg)
876{
877 long ret;
878
879 /* Check for properly constructed commands */
880 ret = check_ioctl_command(command);
881 if (ret < 0)
882 return (int)ret;
883
884 return (int)dispatch_ioctl_command(command, arg);
885}
886
887#ifdef CONFIG_COMPAT /* CONFIG_COMPAT is in .config */
888
8bb8aefd
YL
889/* Compat structure for the ORANGEFS_DEV_MAP ioctl */
890struct ORANGEFS_dev_map_desc32 {
5db11c21
MM
891 compat_uptr_t ptr;
892 __s32 total_size;
893 __s32 size;
894 __s32 count;
895};
896
897static unsigned long translate_dev_map26(unsigned long args, long *error)
898{
8bb8aefd 899 struct ORANGEFS_dev_map_desc32 __user *p32 = (void __user *)args;
5db11c21
MM
900 /*
901 * Depending on the architecture, allocate some space on the
902 * user-call-stack based on our expected layout.
903 */
8bb8aefd 904 struct ORANGEFS_dev_map_desc __user *p =
5db11c21 905 compat_alloc_user_space(sizeof(*p));
84d02150 906 compat_uptr_t addr;
5db11c21
MM
907
908 *error = 0;
909 /* get the ptr from the 32 bit user-space */
910 if (get_user(addr, &p32->ptr))
911 goto err;
912 /* try to put that into a 64-bit layout */
913 if (put_user(compat_ptr(addr), &p->ptr))
914 goto err;
915 /* copy the remaining fields */
916 if (copy_in_user(&p->total_size, &p32->total_size, sizeof(__s32)))
917 goto err;
918 if (copy_in_user(&p->size, &p32->size, sizeof(__s32)))
919 goto err;
920 if (copy_in_user(&p->count, &p32->count, sizeof(__s32)))
921 goto err;
922 return (unsigned long)p;
923err:
924 *error = -EFAULT;
925 return 0;
926}
927
928/*
929 * 32 bit user-space apps' ioctl handlers when kernel modules
930 * is compiled as a 64 bit one
931 */
8bb8aefd 932static long orangefs_devreq_compat_ioctl(struct file *filp, unsigned int cmd,
5db11c21
MM
933 unsigned long args)
934{
935 long ret;
936 unsigned long arg = args;
937
938 /* Check for properly constructed commands */
939 ret = check_ioctl_command(cmd);
940 if (ret < 0)
941 return ret;
8bb8aefd 942 if (cmd == ORANGEFS_DEV_MAP) {
5db11c21
MM
943 /*
944 * convert the arguments to what we expect internally
945 * in kernel space
946 */
947 arg = translate_dev_map26(args, &ret);
948 if (ret < 0) {
949 gossip_err("Could not translate dev map\n");
950 return ret;
951 }
952 }
953 /* no other ioctl requires translation */
954 return dispatch_ioctl_command(cmd, arg);
955}
956
2c590d5f
MM
957#endif /* CONFIG_COMPAT is in .config */
958
959/*
960 * The following two ioctl32 functions had been refactored into the above
961 * CONFIG_COMPAT ifdef, but that was an over simplification that was
962 * not noticed until we tried to compile on power pc...
963 */
964#if (defined(CONFIG_COMPAT) && !defined(HAVE_REGISTER_IOCTL32_CONVERSION)) || !defined(CONFIG_COMPAT)
8bb8aefd 965static int orangefs_ioctl32_init(void)
5db11c21
MM
966{
967 return 0;
968}
969
8bb8aefd 970static void orangefs_ioctl32_cleanup(void)
5db11c21
MM
971{
972 return;
973}
2c590d5f 974#endif
5db11c21
MM
975
976/* the assigned character device major number */
8bb8aefd 977static int orangefs_dev_major;
5db11c21
MM
978
979/*
8bb8aefd 980 * Initialize orangefs device specific state:
5db11c21
MM
981 * Must be called at module load time only
982 */
8bb8aefd 983int orangefs_dev_init(void)
5db11c21
MM
984{
985 int ret;
986
987 /* register the ioctl32 sub-system */
8bb8aefd 988 ret = orangefs_ioctl32_init();
5db11c21
MM
989 if (ret < 0)
990 return ret;
991
8bb8aefd
YL
992 /* register orangefs-req device */
993 orangefs_dev_major = register_chrdev(0,
994 ORANGEFS_REQDEVICE_NAME,
995 &orangefs_devreq_file_operations);
996 if (orangefs_dev_major < 0) {
5db11c21
MM
997 gossip_debug(GOSSIP_DEV_DEBUG,
998 "Failed to register /dev/%s (error %d)\n",
8bb8aefd
YL
999 ORANGEFS_REQDEVICE_NAME, orangefs_dev_major);
1000 orangefs_ioctl32_cleanup();
1001 return orangefs_dev_major;
5db11c21
MM
1002 }
1003
1004 gossip_debug(GOSSIP_DEV_DEBUG,
1005 "*** /dev/%s character device registered ***\n",
8bb8aefd 1006 ORANGEFS_REQDEVICE_NAME);
5db11c21 1007 gossip_debug(GOSSIP_DEV_DEBUG, "'mknod /dev/%s c %d 0'.\n",
8bb8aefd 1008 ORANGEFS_REQDEVICE_NAME, orangefs_dev_major);
5db11c21
MM
1009 return 0;
1010}
1011
8bb8aefd 1012void orangefs_dev_cleanup(void)
5db11c21 1013{
8bb8aefd 1014 unregister_chrdev(orangefs_dev_major, ORANGEFS_REQDEVICE_NAME);
5db11c21
MM
1015 gossip_debug(GOSSIP_DEV_DEBUG,
1016 "*** /dev/%s character device unregistered ***\n",
8bb8aefd 1017 ORANGEFS_REQDEVICE_NAME);
5db11c21 1018 /* unregister the ioctl32 sub-system */
8bb8aefd 1019 orangefs_ioctl32_cleanup();
5db11c21
MM
1020}
1021
8bb8aefd 1022static unsigned int orangefs_devreq_poll(struct file *file,
5db11c21
MM
1023 struct poll_table_struct *poll_table)
1024{
1025 int poll_revent_mask = 0;
1026
1027 if (open_access_count == 1) {
8bb8aefd 1028 poll_wait(file, &orangefs_request_list_waitq, poll_table);
5db11c21 1029
8bb8aefd
YL
1030 spin_lock(&orangefs_request_list_lock);
1031 if (!list_empty(&orangefs_request_list))
5db11c21 1032 poll_revent_mask |= POLL_IN;
8bb8aefd 1033 spin_unlock(&orangefs_request_list_lock);
5db11c21
MM
1034 }
1035 return poll_revent_mask;
1036}
1037
8bb8aefd 1038const struct file_operations orangefs_devreq_file_operations = {
5db11c21 1039 .owner = THIS_MODULE,
8bb8aefd
YL
1040 .read = orangefs_devreq_read,
1041 .write_iter = orangefs_devreq_write_iter,
1042 .open = orangefs_devreq_open,
1043 .release = orangefs_devreq_release,
1044 .unlocked_ioctl = orangefs_devreq_ioctl,
5db11c21
MM
1045
1046#ifdef CONFIG_COMPAT /* CONFIG_COMPAT is in .config */
8bb8aefd 1047 .compat_ioctl = orangefs_devreq_compat_ioctl,
5db11c21 1048#endif
8bb8aefd 1049 .poll = orangefs_devreq_poll
5db11c21 1050};
This page took 0.121386 seconds and 5 git commands to generate.