Commit | Line | Data |
---|---|---|
1107ba88 AZ |
1 | /* |
2 | * Driver giving user-space access to the kernel's xenbus connection | |
3 | * to xenstore. | |
4 | * | |
5 | * Copyright (c) 2005, Christian Limpach | |
6 | * Copyright (c) 2005, Rusty Russell, IBM Corporation | |
7 | * | |
8 | * This program is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU General Public License version 2 | |
10 | * as published by the Free Software Foundation; or, when distributed | |
11 | * separately from the Linux kernel or incorporated into other | |
12 | * software packages, subject to the following license: | |
13 | * | |
14 | * Permission is hereby granted, free of charge, to any person obtaining a copy | |
15 | * of this source file (the "Software"), to deal in the Software without | |
16 | * restriction, including without limitation the rights to use, copy, modify, | |
17 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, | |
18 | * and to permit persons to whom the Software is furnished to do so, subject to | |
19 | * the following conditions: | |
20 | * | |
21 | * The above copyright notice and this permission notice shall be included in | |
22 | * all copies or substantial portions of the Software. | |
23 | * | |
24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
25 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
26 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
27 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
28 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
29 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
30 | * IN THE SOFTWARE. | |
31 | * | |
32 | * Changes: | |
33 | * 2008-10-07 Alex Zeffertt Replaced /proc/xen/xenbus with xenfs filesystem | |
34 | * and /proc/xen compatibility mount point. | |
35 | * Turned xenfs into a loadable module. | |
36 | */ | |
37 | ||
38 | #include <linux/kernel.h> | |
39 | #include <linux/errno.h> | |
40 | #include <linux/uio.h> | |
41 | #include <linux/notifier.h> | |
42 | #include <linux/wait.h> | |
43 | #include <linux/fs.h> | |
44 | #include <linux/poll.h> | |
45 | #include <linux/mutex.h> | |
a99bbaf5 | 46 | #include <linux/sched.h> |
1107ba88 AZ |
47 | #include <linux/spinlock.h> |
48 | #include <linux/mount.h> | |
49 | #include <linux/pagemap.h> | |
50 | #include <linux/uaccess.h> | |
51 | #include <linux/init.h> | |
52 | #include <linux/namei.h> | |
53 | #include <linux/string.h> | |
5a0e3ad6 | 54 | #include <linux/slab.h> |
1107ba88 AZ |
55 | |
56 | #include "xenfs.h" | |
57 | #include "../xenbus/xenbus_comms.h" | |
58 | ||
59 | #include <xen/xenbus.h> | |
60 | #include <asm/xen/hypervisor.h> | |
61 | ||
62 | /* | |
63 | * An element of a list of outstanding transactions, for which we're | |
64 | * still waiting a reply. | |
65 | */ | |
66 | struct xenbus_transaction_holder { | |
67 | struct list_head list; | |
68 | struct xenbus_transaction handle; | |
69 | }; | |
70 | ||
71 | /* | |
72 | * A buffer of data on the queue. | |
73 | */ | |
74 | struct read_buffer { | |
75 | struct list_head list; | |
76 | unsigned int cons; | |
77 | unsigned int len; | |
78 | char msg[]; | |
79 | }; | |
80 | ||
81 | struct xenbus_file_priv { | |
82 | /* | |
83 | * msgbuffer_mutex is held while partial requests are built up | |
84 | * and complete requests are acted on. It therefore protects | |
85 | * the "transactions" and "watches" lists, and the partial | |
86 | * request length and buffer. | |
87 | * | |
88 | * reply_mutex protects the reply being built up to return to | |
89 | * usermode. It nests inside msgbuffer_mutex but may be held | |
90 | * alone during a watch callback. | |
91 | */ | |
92 | struct mutex msgbuffer_mutex; | |
93 | ||
94 | /* In-progress transactions */ | |
95 | struct list_head transactions; | |
96 | ||
97 | /* Active watches. */ | |
98 | struct list_head watches; | |
99 | ||
100 | /* Partial request. */ | |
101 | unsigned int len; | |
102 | union { | |
103 | struct xsd_sockmsg msg; | |
104 | char buffer[PAGE_SIZE]; | |
105 | } u; | |
106 | ||
107 | /* Response queue. */ | |
108 | struct mutex reply_mutex; | |
109 | struct list_head read_buffers; | |
110 | wait_queue_head_t read_waitq; | |
111 | ||
112 | }; | |
113 | ||
114 | /* Read out any raw xenbus messages queued up. */ | |
115 | static ssize_t xenbus_file_read(struct file *filp, | |
116 | char __user *ubuf, | |
117 | size_t len, loff_t *ppos) | |
118 | { | |
119 | struct xenbus_file_priv *u = filp->private_data; | |
120 | struct read_buffer *rb; | |
121 | unsigned i; | |
122 | int ret; | |
123 | ||
124 | mutex_lock(&u->reply_mutex); | |
7808121b | 125 | again: |
1107ba88 AZ |
126 | while (list_empty(&u->read_buffers)) { |
127 | mutex_unlock(&u->reply_mutex); | |
6280f190 PB |
128 | if (filp->f_flags & O_NONBLOCK) |
129 | return -EAGAIN; | |
130 | ||
1107ba88 AZ |
131 | ret = wait_event_interruptible(u->read_waitq, |
132 | !list_empty(&u->read_buffers)); | |
133 | if (ret) | |
134 | return ret; | |
135 | mutex_lock(&u->reply_mutex); | |
136 | } | |
137 | ||
138 | rb = list_entry(u->read_buffers.next, struct read_buffer, list); | |
139 | i = 0; | |
140 | while (i < len) { | |
141 | unsigned sz = min((unsigned)len - i, rb->len - rb->cons); | |
142 | ||
143 | ret = copy_to_user(ubuf + i, &rb->msg[rb->cons], sz); | |
144 | ||
145 | i += sz - ret; | |
146 | rb->cons += sz - ret; | |
147 | ||
fb27cfbc | 148 | if (ret != 0) { |
1107ba88 AZ |
149 | if (i == 0) |
150 | i = -EFAULT; | |
151 | goto out; | |
152 | } | |
153 | ||
154 | /* Clear out buffer if it has been consumed */ | |
155 | if (rb->cons == rb->len) { | |
156 | list_del(&rb->list); | |
157 | kfree(rb); | |
158 | if (list_empty(&u->read_buffers)) | |
159 | break; | |
160 | rb = list_entry(u->read_buffers.next, | |
161 | struct read_buffer, list); | |
162 | } | |
163 | } | |
7808121b DDG |
164 | if (i == 0) |
165 | goto again; | |
1107ba88 AZ |
166 | |
167 | out: | |
168 | mutex_unlock(&u->reply_mutex); | |
169 | return i; | |
170 | } | |
171 | ||
172 | /* | |
173 | * Add a buffer to the queue. Caller must hold the appropriate lock | |
174 | * if the queue is not local. (Commonly the caller will build up | |
175 | * multiple queued buffers on a temporary local list, and then add it | |
176 | * to the appropriate list under lock once all the buffers have een | |
177 | * successfully allocated.) | |
178 | */ | |
179 | static int queue_reply(struct list_head *queue, const void *data, size_t len) | |
180 | { | |
181 | struct read_buffer *rb; | |
182 | ||
183 | if (len == 0) | |
184 | return 0; | |
185 | ||
186 | rb = kmalloc(sizeof(*rb) + len, GFP_KERNEL); | |
187 | if (rb == NULL) | |
188 | return -ENOMEM; | |
189 | ||
190 | rb->cons = 0; | |
191 | rb->len = len; | |
192 | ||
193 | memcpy(rb->msg, data, len); | |
194 | ||
195 | list_add_tail(&rb->list, queue); | |
196 | return 0; | |
197 | } | |
198 | ||
199 | /* | |
200 | * Free all the read_buffer s on a list. | |
201 | * Caller must have sole reference to list. | |
202 | */ | |
203 | static void queue_cleanup(struct list_head *list) | |
204 | { | |
205 | struct read_buffer *rb; | |
206 | ||
207 | while (!list_empty(list)) { | |
208 | rb = list_entry(list->next, struct read_buffer, list); | |
209 | list_del(list->next); | |
210 | kfree(rb); | |
211 | } | |
212 | } | |
213 | ||
214 | struct watch_adapter { | |
215 | struct list_head list; | |
216 | struct xenbus_watch watch; | |
217 | struct xenbus_file_priv *dev_data; | |
218 | char *token; | |
219 | }; | |
220 | ||
221 | static void free_watch_adapter(struct watch_adapter *watch) | |
222 | { | |
223 | kfree(watch->watch.node); | |
224 | kfree(watch->token); | |
225 | kfree(watch); | |
226 | } | |
227 | ||
228 | static struct watch_adapter *alloc_watch_adapter(const char *path, | |
229 | const char *token) | |
230 | { | |
231 | struct watch_adapter *watch; | |
232 | ||
233 | watch = kzalloc(sizeof(*watch), GFP_KERNEL); | |
234 | if (watch == NULL) | |
235 | goto out_fail; | |
236 | ||
237 | watch->watch.node = kstrdup(path, GFP_KERNEL); | |
238 | if (watch->watch.node == NULL) | |
239 | goto out_free; | |
240 | ||
241 | watch->token = kstrdup(token, GFP_KERNEL); | |
242 | if (watch->token == NULL) | |
243 | goto out_free; | |
244 | ||
245 | return watch; | |
246 | ||
247 | out_free: | |
248 | free_watch_adapter(watch); | |
249 | ||
250 | out_fail: | |
251 | return NULL; | |
252 | } | |
253 | ||
254 | static void watch_fired(struct xenbus_watch *watch, | |
255 | const char **vec, | |
256 | unsigned int len) | |
257 | { | |
258 | struct watch_adapter *adap; | |
259 | struct xsd_sockmsg hdr; | |
260 | const char *path, *token; | |
261 | int path_len, tok_len, body_len, data_len = 0; | |
262 | int ret; | |
263 | LIST_HEAD(staging_q); | |
264 | ||
265 | adap = container_of(watch, struct watch_adapter, watch); | |
266 | ||
267 | path = vec[XS_WATCH_PATH]; | |
268 | token = adap->token; | |
269 | ||
270 | path_len = strlen(path) + 1; | |
271 | tok_len = strlen(token) + 1; | |
272 | if (len > 2) | |
273 | data_len = vec[len] - vec[2] + 1; | |
274 | body_len = path_len + tok_len + data_len; | |
275 | ||
276 | hdr.type = XS_WATCH_EVENT; | |
277 | hdr.len = body_len; | |
278 | ||
279 | mutex_lock(&adap->dev_data->reply_mutex); | |
280 | ||
281 | ret = queue_reply(&staging_q, &hdr, sizeof(hdr)); | |
282 | if (!ret) | |
283 | ret = queue_reply(&staging_q, path, path_len); | |
284 | if (!ret) | |
285 | ret = queue_reply(&staging_q, token, tok_len); | |
286 | if (!ret && len > 2) | |
287 | ret = queue_reply(&staging_q, vec[2], data_len); | |
288 | ||
289 | if (!ret) { | |
290 | /* success: pass reply list onto watcher */ | |
291 | list_splice_tail(&staging_q, &adap->dev_data->read_buffers); | |
292 | wake_up(&adap->dev_data->read_waitq); | |
293 | } else | |
294 | queue_cleanup(&staging_q); | |
295 | ||
296 | mutex_unlock(&adap->dev_data->reply_mutex); | |
297 | } | |
298 | ||
299 | static int xenbus_write_transaction(unsigned msg_type, | |
300 | struct xenbus_file_priv *u) | |
301 | { | |
e88a0faa | 302 | int rc; |
1107ba88 AZ |
303 | void *reply; |
304 | struct xenbus_transaction_holder *trans = NULL; | |
305 | LIST_HEAD(staging_q); | |
306 | ||
307 | if (msg_type == XS_TRANSACTION_START) { | |
308 | trans = kmalloc(sizeof(*trans), GFP_KERNEL); | |
309 | if (!trans) { | |
310 | rc = -ENOMEM; | |
311 | goto out; | |
312 | } | |
313 | } | |
314 | ||
315 | reply = xenbus_dev_request_and_reply(&u->u.msg); | |
316 | if (IS_ERR(reply)) { | |
317 | kfree(trans); | |
318 | rc = PTR_ERR(reply); | |
319 | goto out; | |
320 | } | |
321 | ||
322 | if (msg_type == XS_TRANSACTION_START) { | |
323 | trans->handle.id = simple_strtoul(reply, NULL, 0); | |
324 | ||
325 | list_add(&trans->list, &u->transactions); | |
326 | } else if (msg_type == XS_TRANSACTION_END) { | |
327 | list_for_each_entry(trans, &u->transactions, list) | |
328 | if (trans->handle.id == u->u.msg.tx_id) | |
329 | break; | |
330 | BUG_ON(&trans->list == &u->transactions); | |
331 | list_del(&trans->list); | |
332 | ||
333 | kfree(trans); | |
334 | } | |
335 | ||
336 | mutex_lock(&u->reply_mutex); | |
e88a0faa IC |
337 | rc = queue_reply(&staging_q, &u->u.msg, sizeof(u->u.msg)); |
338 | if (!rc) | |
339 | rc = queue_reply(&staging_q, reply, u->u.msg.len); | |
340 | if (!rc) { | |
1107ba88 AZ |
341 | list_splice_tail(&staging_q, &u->read_buffers); |
342 | wake_up(&u->read_waitq); | |
343 | } else { | |
344 | queue_cleanup(&staging_q); | |
1107ba88 AZ |
345 | } |
346 | mutex_unlock(&u->reply_mutex); | |
347 | ||
348 | kfree(reply); | |
349 | ||
350 | out: | |
351 | return rc; | |
352 | } | |
353 | ||
354 | static int xenbus_write_watch(unsigned msg_type, struct xenbus_file_priv *u) | |
355 | { | |
356 | struct watch_adapter *watch, *tmp_watch; | |
357 | char *path, *token; | |
358 | int err, rc; | |
359 | LIST_HEAD(staging_q); | |
360 | ||
361 | path = u->u.buffer + sizeof(u->u.msg); | |
362 | token = memchr(path, 0, u->u.msg.len); | |
363 | if (token == NULL) { | |
364 | rc = -EILSEQ; | |
365 | goto out; | |
366 | } | |
367 | token++; | |
368 | ||
369 | if (msg_type == XS_WATCH) { | |
370 | watch = alloc_watch_adapter(path, token); | |
371 | if (watch == NULL) { | |
372 | rc = -ENOMEM; | |
373 | goto out; | |
374 | } | |
375 | ||
376 | watch->watch.callback = watch_fired; | |
377 | watch->dev_data = u; | |
378 | ||
379 | err = register_xenbus_watch(&watch->watch); | |
380 | if (err) { | |
381 | free_watch_adapter(watch); | |
382 | rc = err; | |
383 | goto out; | |
384 | } | |
385 | list_add(&watch->list, &u->watches); | |
386 | } else { | |
387 | list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) { | |
388 | if (!strcmp(watch->token, token) && | |
389 | !strcmp(watch->watch.node, path)) { | |
390 | unregister_xenbus_watch(&watch->watch); | |
391 | list_del(&watch->list); | |
392 | free_watch_adapter(watch); | |
393 | break; | |
394 | } | |
395 | } | |
396 | } | |
397 | ||
398 | /* Success. Synthesize a reply to say all is OK. */ | |
399 | { | |
400 | struct { | |
401 | struct xsd_sockmsg hdr; | |
402 | char body[3]; | |
403 | } __packed reply = { | |
404 | { | |
405 | .type = msg_type, | |
406 | .len = sizeof(reply.body) | |
407 | }, | |
408 | "OK" | |
409 | }; | |
410 | ||
411 | mutex_lock(&u->reply_mutex); | |
412 | rc = queue_reply(&u->read_buffers, &reply, sizeof(reply)); | |
76ce7618 | 413 | wake_up(&u->read_waitq); |
1107ba88 AZ |
414 | mutex_unlock(&u->reply_mutex); |
415 | } | |
416 | ||
417 | out: | |
418 | return rc; | |
419 | } | |
420 | ||
421 | static ssize_t xenbus_file_write(struct file *filp, | |
422 | const char __user *ubuf, | |
423 | size_t len, loff_t *ppos) | |
424 | { | |
425 | struct xenbus_file_priv *u = filp->private_data; | |
426 | uint32_t msg_type; | |
427 | int rc = len; | |
428 | int ret; | |
429 | LIST_HEAD(staging_q); | |
430 | ||
431 | /* | |
432 | * We're expecting usermode to be writing properly formed | |
433 | * xenbus messages. If they write an incomplete message we | |
434 | * buffer it up. Once it is complete, we act on it. | |
435 | */ | |
436 | ||
437 | /* | |
438 | * Make sure concurrent writers can't stomp all over each | |
439 | * other's messages and make a mess of our partial message | |
440 | * buffer. We don't make any attemppt to stop multiple | |
441 | * writers from making a mess of each other's incomplete | |
442 | * messages; we're just trying to guarantee our own internal | |
443 | * consistency and make sure that single writes are handled | |
444 | * atomically. | |
445 | */ | |
446 | mutex_lock(&u->msgbuffer_mutex); | |
447 | ||
448 | /* Get this out of the way early to avoid confusion */ | |
449 | if (len == 0) | |
450 | goto out; | |
451 | ||
452 | /* Can't write a xenbus message larger we can buffer */ | |
453 | if ((len + u->len) > sizeof(u->u.buffer)) { | |
454 | /* On error, dump existing buffer */ | |
455 | u->len = 0; | |
456 | rc = -EINVAL; | |
457 | goto out; | |
458 | } | |
459 | ||
460 | ret = copy_from_user(u->u.buffer + u->len, ubuf, len); | |
461 | ||
fb27cfbc | 462 | if (ret != 0) { |
1107ba88 AZ |
463 | rc = -EFAULT; |
464 | goto out; | |
465 | } | |
466 | ||
467 | /* Deal with a partial copy. */ | |
468 | len -= ret; | |
469 | rc = len; | |
470 | ||
471 | u->len += len; | |
472 | ||
473 | /* Return if we haven't got a full message yet */ | |
474 | if (u->len < sizeof(u->u.msg)) | |
475 | goto out; /* not even the header yet */ | |
476 | ||
477 | /* If we're expecting a message that's larger than we can | |
478 | possibly send, dump what we have and return an error. */ | |
479 | if ((sizeof(u->u.msg) + u->u.msg.len) > sizeof(u->u.buffer)) { | |
480 | rc = -E2BIG; | |
481 | u->len = 0; | |
482 | goto out; | |
483 | } | |
484 | ||
485 | if (u->len < (sizeof(u->u.msg) + u->u.msg.len)) | |
486 | goto out; /* incomplete data portion */ | |
487 | ||
488 | /* | |
489 | * OK, now we have a complete message. Do something with it. | |
490 | */ | |
491 | ||
492 | msg_type = u->u.msg.type; | |
493 | ||
494 | switch (msg_type) { | |
1107ba88 AZ |
495 | case XS_WATCH: |
496 | case XS_UNWATCH: | |
497 | /* (Un)Ask for some path to be watched for changes */ | |
498 | ret = xenbus_write_watch(msg_type, u); | |
499 | break; | |
500 | ||
501 | default: | |
6d6df2e4 DO |
502 | /* Send out a transaction */ |
503 | ret = xenbus_write_transaction(msg_type, u); | |
1107ba88 AZ |
504 | break; |
505 | } | |
506 | if (ret != 0) | |
507 | rc = ret; | |
508 | ||
509 | /* Buffered message consumed */ | |
510 | u->len = 0; | |
511 | ||
512 | out: | |
513 | mutex_unlock(&u->msgbuffer_mutex); | |
514 | return rc; | |
515 | } | |
516 | ||
517 | static int xenbus_file_open(struct inode *inode, struct file *filp) | |
518 | { | |
519 | struct xenbus_file_priv *u; | |
520 | ||
521 | if (xen_store_evtchn == 0) | |
522 | return -ENOENT; | |
523 | ||
524 | nonseekable_open(inode, filp); | |
525 | ||
526 | u = kzalloc(sizeof(*u), GFP_KERNEL); | |
527 | if (u == NULL) | |
528 | return -ENOMEM; | |
529 | ||
530 | INIT_LIST_HEAD(&u->transactions); | |
531 | INIT_LIST_HEAD(&u->watches); | |
532 | INIT_LIST_HEAD(&u->read_buffers); | |
533 | init_waitqueue_head(&u->read_waitq); | |
534 | ||
535 | mutex_init(&u->reply_mutex); | |
536 | mutex_init(&u->msgbuffer_mutex); | |
537 | ||
538 | filp->private_data = u; | |
539 | ||
540 | return 0; | |
541 | } | |
542 | ||
543 | static int xenbus_file_release(struct inode *inode, struct file *filp) | |
544 | { | |
545 | struct xenbus_file_priv *u = filp->private_data; | |
546 | struct xenbus_transaction_holder *trans, *tmp; | |
547 | struct watch_adapter *watch, *tmp_watch; | |
6a5b3bef | 548 | struct read_buffer *rb, *tmp_rb; |
1107ba88 AZ |
549 | |
550 | /* | |
551 | * No need for locking here because there are no other users, | |
552 | * by definition. | |
553 | */ | |
554 | ||
555 | list_for_each_entry_safe(trans, tmp, &u->transactions, list) { | |
556 | xenbus_transaction_end(trans->handle, 1); | |
557 | list_del(&trans->list); | |
558 | kfree(trans); | |
559 | } | |
560 | ||
561 | list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) { | |
562 | unregister_xenbus_watch(&watch->watch); | |
563 | list_del(&watch->list); | |
564 | free_watch_adapter(watch); | |
565 | } | |
566 | ||
6a5b3bef DDG |
567 | list_for_each_entry_safe(rb, tmp_rb, &u->read_buffers, list) { |
568 | list_del(&rb->list); | |
569 | kfree(rb); | |
570 | } | |
1107ba88 AZ |
571 | kfree(u); |
572 | ||
573 | return 0; | |
574 | } | |
575 | ||
576 | static unsigned int xenbus_file_poll(struct file *file, poll_table *wait) | |
577 | { | |
578 | struct xenbus_file_priv *u = file->private_data; | |
579 | ||
580 | poll_wait(file, &u->read_waitq, wait); | |
581 | if (!list_empty(&u->read_buffers)) | |
582 | return POLLIN | POLLRDNORM; | |
583 | return 0; | |
584 | } | |
585 | ||
586 | const struct file_operations xenbus_file_ops = { | |
587 | .read = xenbus_file_read, | |
588 | .write = xenbus_file_write, | |
589 | .open = xenbus_file_open, | |
590 | .release = xenbus_file_release, | |
591 | .poll = xenbus_file_poll, | |
6038f373 | 592 | .llseek = no_llseek, |
1107ba88 | 593 | }; |