Commit | Line | Data |
---|---|---|
19f1537b RR |
1 | /*P:050 Lguest guests use a very simple method to describe devices. It's a |
2 | * series of device descriptors contained just above the top of normal | |
3 | * memory. | |
4 | * | |
5 | * We use the standard "virtio" device infrastructure, which provides us with a | |
6 | * console, a network and a block driver. Each one expects some configuration | |
7 | * information and a "virtqueue" mechanism to send and receive data. :*/ | |
8 | #include <linux/init.h> | |
9 | #include <linux/bootmem.h> | |
10 | #include <linux/lguest_launcher.h> | |
11 | #include <linux/virtio.h> | |
12 | #include <linux/virtio_config.h> | |
13 | #include <linux/interrupt.h> | |
14 | #include <linux/virtio_ring.h> | |
15 | #include <linux/err.h> | |
16 | #include <asm/io.h> | |
17 | #include <asm/paravirt.h> | |
18 | #include <asm/lguest_hcall.h> | |
19 | ||
20 | /* The pointer to our (page) of device descriptions. */ | |
21 | static void *lguest_devices; | |
22 | ||
23 | /* Unique numbering for lguest devices. */ | |
24 | static unsigned int dev_index; | |
25 | ||
26 | /* For Guests, device memory can be used as normal memory, so we cast away the | |
27 | * __iomem to quieten sparse. */ | |
28 | static inline void *lguest_map(unsigned long phys_addr, unsigned long pages) | |
29 | { | |
30 | return (__force void *)ioremap(phys_addr, PAGE_SIZE*pages); | |
31 | } | |
32 | ||
33 | static inline void lguest_unmap(void *addr) | |
34 | { | |
35 | iounmap((__force void __iomem *)addr); | |
36 | } | |
37 | ||
38 | /*D:100 Each lguest device is just a virtio device plus a pointer to its entry | |
39 | * in the lguest_devices page. */ | |
40 | struct lguest_device { | |
41 | struct virtio_device vdev; | |
42 | ||
43 | /* The entry in the lguest_devices page for this device. */ | |
44 | struct lguest_device_desc *desc; | |
45 | }; | |
46 | ||
47 | /* Since the virtio infrastructure hands us a pointer to the virtio_device all | |
48 | * the time, it helps to have a curt macro to get a pointer to the struct | |
49 | * lguest_device it's enclosed in. */ | |
50 | #define to_lgdev(vdev) container_of(vdev, struct lguest_device, vdev) | |
51 | ||
52 | /*D:130 | |
53 | * Device configurations | |
54 | * | |
55 | * The configuration information for a device consists of a series of fields. | |
e1e72965 RR |
56 | * We don't really care what they are: the Launcher set them up, and the driver |
57 | * will look at them during setup. | |
19f1537b RR |
58 | * |
59 | * For us these fields come immediately after that device's descriptor in the | |
60 | * lguest_devices page. | |
61 | * | |
62 | * Each field starts with a "type" byte, a "length" byte, then that number of | |
63 | * bytes of configuration information. The device descriptor tells us the | |
64 | * total configuration length so we know when we've reached the last field. */ | |
65 | ||
66 | /* type + length bytes */ | |
67 | #define FHDR_LEN 2 | |
68 | ||
69 | /* This finds the first field of a given type for a device's configuration. */ | |
70 | static void *lg_find(struct virtio_device *vdev, u8 type, unsigned int *len) | |
71 | { | |
72 | struct lguest_device_desc *desc = to_lgdev(vdev)->desc; | |
73 | int i; | |
74 | ||
75 | for (i = 0; i < desc->config_len; i += FHDR_LEN + desc->config[i+1]) { | |
76 | if (desc->config[i] == type) { | |
77 | /* Mark it used, so Host can know we looked at it, and | |
78 | * also so we won't find the same one twice. */ | |
79 | desc->config[i] |= 0x80; | |
80 | /* Remember, the second byte is the length. */ | |
81 | *len = desc->config[i+1]; | |
82 | /* We return a pointer to the field header. */ | |
83 | return desc->config + i; | |
84 | } | |
85 | } | |
86 | ||
87 | /* Not found: return NULL for failure. */ | |
88 | return NULL; | |
89 | } | |
90 | ||
91 | /* Once they've found a field, getting a copy of it is easy. */ | |
92 | static void lg_get(struct virtio_device *vdev, void *token, | |
93 | void *buf, unsigned len) | |
94 | { | |
95 | /* Check they didn't ask for more than the length of the field! */ | |
96 | BUG_ON(len > ((u8 *)token)[1]); | |
97 | memcpy(buf, token + FHDR_LEN, len); | |
98 | } | |
99 | ||
100 | /* Setting the contents is also trivial. */ | |
101 | static void lg_set(struct virtio_device *vdev, void *token, | |
102 | const void *buf, unsigned len) | |
103 | { | |
104 | BUG_ON(len > ((u8 *)token)[1]); | |
105 | memcpy(token + FHDR_LEN, buf, len); | |
106 | } | |
107 | ||
108 | /* The operations to get and set the status word just access the status field | |
109 | * of the device descriptor. */ | |
110 | static u8 lg_get_status(struct virtio_device *vdev) | |
111 | { | |
112 | return to_lgdev(vdev)->desc->status; | |
113 | } | |
114 | ||
115 | static void lg_set_status(struct virtio_device *vdev, u8 status) | |
116 | { | |
117 | to_lgdev(vdev)->desc->status = status; | |
118 | } | |
119 | ||
120 | /* | |
121 | * Virtqueues | |
122 | * | |
123 | * The other piece of infrastructure virtio needs is a "virtqueue": a way of | |
124 | * the Guest device registering buffers for the other side to read from or | |
125 | * write into (ie. send and receive buffers). Each device can have multiple | |
e1e72965 RR |
126 | * virtqueues: for example the console driver uses one queue for sending and |
127 | * another for receiving. | |
19f1537b RR |
128 | * |
129 | * Fortunately for us, a very fast shared-memory-plus-descriptors virtqueue | |
130 | * already exists in virtio_ring.c. We just need to connect it up. | |
131 | * | |
132 | * We start with the information we need to keep about each virtqueue. | |
133 | */ | |
134 | ||
135 | /*D:140 This is the information we remember about each virtqueue. */ | |
136 | struct lguest_vq_info | |
137 | { | |
138 | /* A copy of the information contained in the device config. */ | |
139 | struct lguest_vqconfig config; | |
140 | ||
141 | /* The address where we mapped the virtio ring, so we can unmap it. */ | |
142 | void *pages; | |
143 | }; | |
144 | ||
145 | /* When the virtio_ring code wants to prod the Host, it calls us here and we | |
146 | * make a hypercall. We hand the page number of the virtqueue so the Host | |
147 | * knows which virtqueue we're talking about. */ | |
148 | static void lg_notify(struct virtqueue *vq) | |
149 | { | |
150 | /* We store our virtqueue information in the "priv" pointer of the | |
151 | * virtqueue structure. */ | |
152 | struct lguest_vq_info *lvq = vq->priv; | |
153 | ||
154 | hcall(LHCALL_NOTIFY, lvq->config.pfn << PAGE_SHIFT, 0, 0); | |
155 | } | |
156 | ||
157 | /* This routine finds the first virtqueue described in the configuration of | |
158 | * this device and sets it up. | |
159 | * | |
160 | * This is kind of an ugly duckling. It'd be nicer to have a standard | |
161 | * representation of a virtqueue in the configuration space, but it seems that | |
e1e72965 | 162 | * everyone wants to do it differently. The KVM coders want the Guest to |
19f1537b RR |
163 | * allocate its own pages and tell the Host where they are, but for lguest it's |
164 | * simpler for the Host to simply tell us where the pages are. | |
165 | * | |
166 | * So we provide devices with a "find virtqueue and set it up" function. */ | |
167 | static struct virtqueue *lg_find_vq(struct virtio_device *vdev, | |
168 | bool (*callback)(struct virtqueue *vq)) | |
169 | { | |
170 | struct lguest_vq_info *lvq; | |
171 | struct virtqueue *vq; | |
172 | unsigned int len; | |
173 | void *token; | |
174 | int err; | |
175 | ||
176 | /* Look for a field of the correct type to mark a virtqueue. Note that | |
177 | * if this succeeds, then the type will be changed so it won't be found | |
178 | * again, and future lg_find_vq() calls will find the next | |
179 | * virtqueue (if any). */ | |
180 | token = vdev->config->find(vdev, VIRTIO_CONFIG_F_VIRTQUEUE, &len); | |
181 | if (!token) | |
182 | return ERR_PTR(-ENOENT); | |
183 | ||
184 | lvq = kmalloc(sizeof(*lvq), GFP_KERNEL); | |
185 | if (!lvq) | |
186 | return ERR_PTR(-ENOMEM); | |
187 | ||
188 | /* Note: we could use a configuration space inside here, just like we | |
189 | * do for the device. This would allow expansion in future, because | |
190 | * our configuration system is designed to be expansible. But this is | |
191 | * way easier. */ | |
192 | if (len != sizeof(lvq->config)) { | |
193 | dev_err(&vdev->dev, "Unexpected virtio config len %u\n", len); | |
194 | err = -EIO; | |
195 | goto free_lvq; | |
196 | } | |
197 | /* Make a copy of the "struct lguest_vqconfig" field. We need a copy | |
198 | * because the config space might not be aligned correctly. */ | |
199 | vdev->config->get(vdev, token, &lvq->config, sizeof(lvq->config)); | |
200 | ||
201 | /* Figure out how many pages the ring will take, and map that memory */ | |
202 | lvq->pages = lguest_map((unsigned long)lvq->config.pfn << PAGE_SHIFT, | |
42b36cc0 RR |
203 | DIV_ROUND_UP(vring_size(lvq->config.num, |
204 | PAGE_SIZE), | |
19f1537b RR |
205 | PAGE_SIZE)); |
206 | if (!lvq->pages) { | |
207 | err = -ENOMEM; | |
208 | goto free_lvq; | |
209 | } | |
210 | ||
211 | /* OK, tell virtio_ring.c to set up a virtqueue now we know its size | |
212 | * and we've got a pointer to its pages. */ | |
213 | vq = vring_new_virtqueue(lvq->config.num, vdev, lvq->pages, | |
214 | lg_notify, callback); | |
215 | if (!vq) { | |
216 | err = -ENOMEM; | |
217 | goto unmap; | |
218 | } | |
219 | ||
220 | /* Tell the interrupt for this virtqueue to go to the virtio_ring | |
221 | * interrupt handler. */ | |
222 | /* FIXME: We used to have a flag for the Host to tell us we could use | |
223 | * the interrupt as a source of randomness: it'd be nice to have that | |
224 | * back.. */ | |
225 | err = request_irq(lvq->config.irq, vring_interrupt, IRQF_SHARED, | |
226 | vdev->dev.bus_id, vq); | |
227 | if (err) | |
228 | goto destroy_vring; | |
229 | ||
230 | /* Last of all we hook up our 'struct lguest_vq_info" to the | |
231 | * virtqueue's priv pointer. */ | |
232 | vq->priv = lvq; | |
233 | return vq; | |
234 | ||
235 | destroy_vring: | |
236 | vring_del_virtqueue(vq); | |
237 | unmap: | |
238 | lguest_unmap(lvq->pages); | |
239 | free_lvq: | |
240 | kfree(lvq); | |
241 | return ERR_PTR(err); | |
242 | } | |
243 | /*:*/ | |
244 | ||
245 | /* Cleaning up a virtqueue is easy */ | |
246 | static void lg_del_vq(struct virtqueue *vq) | |
247 | { | |
248 | struct lguest_vq_info *lvq = vq->priv; | |
249 | ||
74b2553f RR |
250 | /* Release the interrupt */ |
251 | free_irq(lvq->config.irq, vq); | |
19f1537b RR |
252 | /* Tell virtio_ring.c to free the virtqueue. */ |
253 | vring_del_virtqueue(vq); | |
254 | /* Unmap the pages containing the ring. */ | |
255 | lguest_unmap(lvq->pages); | |
256 | /* Free our own queue information. */ | |
257 | kfree(lvq); | |
258 | } | |
259 | ||
260 | /* The ops structure which hooks everything together. */ | |
261 | static struct virtio_config_ops lguest_config_ops = { | |
262 | .find = lg_find, | |
263 | .get = lg_get, | |
264 | .set = lg_set, | |
265 | .get_status = lg_get_status, | |
266 | .set_status = lg_set_status, | |
267 | .find_vq = lg_find_vq, | |
268 | .del_vq = lg_del_vq, | |
269 | }; | |
270 | ||
271 | /* The root device for the lguest virtio devices. This makes them appear as | |
272 | * /sys/devices/lguest/0,1,2 not /sys/devices/0,1,2. */ | |
273 | static struct device lguest_root = { | |
274 | .parent = NULL, | |
275 | .bus_id = "lguest", | |
276 | }; | |
277 | ||
278 | /*D:120 This is the core of the lguest bus: actually adding a new device. | |
279 | * It's a separate function because it's neater that way, and because an | |
280 | * earlier version of the code supported hotplug and unplug. They were removed | |
281 | * early on because they were never used. | |
282 | * | |
283 | * As Andrew Tridgell says, "Untested code is buggy code". | |
284 | * | |
285 | * It's worth reading this carefully: we start with a pointer to the new device | |
286 | * descriptor in the "lguest_devices" page. */ | |
287 | static void add_lguest_device(struct lguest_device_desc *d) | |
288 | { | |
289 | struct lguest_device *ldev; | |
290 | ||
e1e72965 RR |
291 | /* Start with zeroed memory; Linux's device layer seems to count on |
292 | * it. */ | |
19f1537b RR |
293 | ldev = kzalloc(sizeof(*ldev), GFP_KERNEL); |
294 | if (!ldev) { | |
295 | printk(KERN_EMERG "Cannot allocate lguest dev %u\n", | |
296 | dev_index++); | |
297 | return; | |
298 | } | |
299 | ||
300 | /* This devices' parent is the lguest/ dir. */ | |
301 | ldev->vdev.dev.parent = &lguest_root; | |
302 | /* We have a unique device index thanks to the dev_index counter. */ | |
303 | ldev->vdev.index = dev_index++; | |
304 | /* The device type comes straight from the descriptor. There's also a | |
305 | * device vendor field in the virtio_device struct, which we leave as | |
306 | * 0. */ | |
307 | ldev->vdev.id.device = d->type; | |
308 | /* We have a simple set of routines for querying the device's | |
309 | * configuration information and setting its status. */ | |
310 | ldev->vdev.config = &lguest_config_ops; | |
311 | /* And we remember the device's descriptor for lguest_config_ops. */ | |
312 | ldev->desc = d; | |
313 | ||
314 | /* register_virtio_device() sets up the generic fields for the struct | |
315 | * virtio_device and calls device_register(). This makes the bus | |
316 | * infrastructure look for a matching driver. */ | |
317 | if (register_virtio_device(&ldev->vdev) != 0) { | |
318 | printk(KERN_ERR "Failed to register lguest device %u\n", | |
319 | ldev->vdev.index); | |
320 | kfree(ldev); | |
321 | } | |
322 | } | |
323 | ||
324 | /*D:110 scan_devices() simply iterates through the device page. The type 0 is | |
325 | * reserved to mean "end of devices". */ | |
326 | static void scan_devices(void) | |
327 | { | |
328 | unsigned int i; | |
329 | struct lguest_device_desc *d; | |
330 | ||
331 | /* We start at the page beginning, and skip over each entry. */ | |
332 | for (i = 0; i < PAGE_SIZE; i += sizeof(*d) + d->config_len) { | |
333 | d = lguest_devices + i; | |
334 | ||
335 | /* Once we hit a zero, stop. */ | |
336 | if (d->type == 0) | |
337 | break; | |
338 | ||
339 | add_lguest_device(d); | |
340 | } | |
341 | } | |
342 | ||
343 | /*D:105 Fairly early in boot, lguest_devices_init() is called to set up the | |
344 | * lguest device infrastructure. We check that we are a Guest by checking | |
345 | * pv_info.name: there are other ways of checking, but this seems most | |
346 | * obvious to me. | |
347 | * | |
348 | * So we can access the "struct lguest_device_desc"s easily, we map that memory | |
349 | * and store the pointer in the global "lguest_devices". Then we register a | |
350 | * root device from which all our devices will hang (this seems to be the | |
351 | * correct sysfs incantation). | |
352 | * | |
353 | * Finally we call scan_devices() which adds all the devices found in the | |
354 | * lguest_devices page. */ | |
355 | static int __init lguest_devices_init(void) | |
356 | { | |
357 | if (strcmp(pv_info.name, "lguest") != 0) | |
358 | return 0; | |
359 | ||
360 | if (device_register(&lguest_root) != 0) | |
361 | panic("Could not register lguest root"); | |
362 | ||
363 | /* Devices are in a single page above top of "normal" mem */ | |
364 | lguest_devices = lguest_map(max_pfn<<PAGE_SHIFT, 1); | |
365 | ||
366 | scan_devices(); | |
367 | return 0; | |
368 | } | |
369 | /* We do this after core stuff, but before the drivers. */ | |
370 | postcore_initcall(lguest_devices_init); | |
371 | ||
372 | /*D:150 At this point in the journey we used to now wade through the lguest | |
373 | * devices themselves: net, block and console. Since they're all now virtio | |
374 | * devices rather than lguest-specific, I've decided to ignore them. Mostly, | |
375 | * they're kind of boring. But this does mean you'll never experience the | |
376 | * thrill of reading the forbidden love scene buried deep in the block driver. | |
377 | * | |
378 | * "make Launcher" beckons, where we answer questions like "Where do Guests | |
379 | * come from?", and "What do you do when someone asks for optimization?". */ |