Commit | Line | Data |
---|---|---|
30edc14b KRW |
1 | /* |
2 | * PCI Backend Operations - respond to PCI requests from Frontend | |
3 | * | |
4 | * Author: Ryan Wilson <hap9@epoch.ncsc.mil> | |
5 | */ | |
283c0972 JP |
6 | |
7 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | |
8 | ||
30edc14b KRW |
9 | #include <linux/module.h> |
10 | #include <linux/wait.h> | |
11 | #include <linux/bitops.h> | |
12 | #include <xen/events.h> | |
13 | #include <linux/sched.h> | |
14 | #include "pciback.h" | |
15 | ||
16 | int verbose_request; | |
17 | module_param(verbose_request, int, 0644); | |
18 | ||
a92336a1 KRW |
19 | static irqreturn_t xen_pcibk_guest_interrupt(int irq, void *dev_id); |
20 | ||
0513fe9e | 21 | /* Ensure a device is has the fake IRQ handler "turned on/off" and is |
a92336a1 | 22 | * ready to be exported. This MUST be run after xen_pcibk_reset_device |
0513fe9e KRW |
23 | * which does the actual PCI device enable/disable. |
24 | */ | |
a92336a1 | 25 | static void xen_pcibk_control_isr(struct pci_dev *dev, int reset) |
0513fe9e | 26 | { |
a92336a1 | 27 | struct xen_pcibk_dev_data *dev_data; |
0513fe9e KRW |
28 | int rc; |
29 | int enable = 0; | |
30 | ||
31 | dev_data = pci_get_drvdata(dev); | |
32 | if (!dev_data) | |
33 | return; | |
34 | ||
35 | /* We don't deal with bridges */ | |
36 | if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) | |
37 | return; | |
38 | ||
39 | if (reset) { | |
40 | dev_data->enable_intx = 0; | |
41 | dev_data->ack_intr = 0; | |
42 | } | |
43 | enable = dev_data->enable_intx; | |
44 | ||
45 | /* Asked to disable, but ISR isn't runnig */ | |
46 | if (!enable && !dev_data->isr_on) | |
47 | return; | |
48 | ||
49 | /* Squirrel away the IRQs in the dev_data. We need this | |
50 | * b/c when device transitions to MSI, the dev->irq is | |
51 | * overwritten with the MSI vector. | |
52 | */ | |
53 | if (enable) | |
54 | dev_data->irq = dev->irq; | |
55 | ||
e17ab35f KRW |
56 | /* |
57 | * SR-IOV devices in all use MSI-X and have no legacy | |
58 | * interrupts, so inhibit creating a fake IRQ handler for them. | |
59 | */ | |
60 | if (dev_data->irq == 0) | |
61 | goto out; | |
62 | ||
0513fe9e KRW |
63 | dev_dbg(&dev->dev, "%s: #%d %s %s%s %s-> %s\n", |
64 | dev_data->irq_name, | |
65 | dev_data->irq, | |
66 | pci_is_enabled(dev) ? "on" : "off", | |
67 | dev->msi_enabled ? "MSI" : "", | |
68 | dev->msix_enabled ? "MSI/X" : "", | |
69 | dev_data->isr_on ? "enable" : "disable", | |
70 | enable ? "enable" : "disable"); | |
71 | ||
72 | if (enable) { | |
73 | rc = request_irq(dev_data->irq, | |
a92336a1 | 74 | xen_pcibk_guest_interrupt, IRQF_SHARED, |
0513fe9e KRW |
75 | dev_data->irq_name, dev); |
76 | if (rc) { | |
77 | dev_err(&dev->dev, "%s: failed to install fake IRQ " \ | |
78 | "handler for IRQ %d! (rc:%d)\n", | |
79 | dev_data->irq_name, dev_data->irq, rc); | |
80 | goto out; | |
81 | } | |
82 | } else { | |
83 | free_irq(dev_data->irq, dev); | |
84 | dev_data->irq = 0; | |
85 | } | |
86 | dev_data->isr_on = enable; | |
87 | dev_data->ack_intr = enable; | |
88 | out: | |
89 | dev_dbg(&dev->dev, "%s: #%d %s %s%s %s\n", | |
90 | dev_data->irq_name, | |
91 | dev_data->irq, | |
92 | pci_is_enabled(dev) ? "on" : "off", | |
93 | dev->msi_enabled ? "MSI" : "", | |
94 | dev->msix_enabled ? "MSI/X" : "", | |
95 | enable ? (dev_data->isr_on ? "enabled" : "failed to enable") : | |
96 | (dev_data->isr_on ? "failed to disable" : "disabled")); | |
97 | } | |
98 | ||
30edc14b | 99 | /* Ensure a device is "turned off" and ready to be exported. |
a92336a1 | 100 | * (Also see xen_pcibk_config_reset to ensure virtual configuration space is |
30edc14b KRW |
101 | * ready to be re-exported) |
102 | */ | |
a92336a1 | 103 | void xen_pcibk_reset_device(struct pci_dev *dev) |
30edc14b KRW |
104 | { |
105 | u16 cmd; | |
106 | ||
a92336a1 | 107 | xen_pcibk_control_isr(dev, 1 /* reset device */); |
0513fe9e | 108 | |
30edc14b KRW |
109 | /* Disable devices (but not bridges) */ |
110 | if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) { | |
a2be65fd KRW |
111 | #ifdef CONFIG_PCI_MSI |
112 | /* The guest could have been abruptly killed without | |
113 | * disabling MSI/MSI-X interrupts.*/ | |
114 | if (dev->msix_enabled) | |
115 | pci_disable_msix(dev); | |
116 | if (dev->msi_enabled) | |
117 | pci_disable_msi(dev); | |
118 | #endif | |
bdc5c181 KRW |
119 | if (pci_is_enabled(dev)) |
120 | pci_disable_device(dev); | |
30edc14b KRW |
121 | |
122 | pci_write_config_word(dev, PCI_COMMAND, 0); | |
123 | ||
124 | dev->is_busmaster = 0; | |
125 | } else { | |
126 | pci_read_config_word(dev, PCI_COMMAND, &cmd); | |
127 | if (cmd & (PCI_COMMAND_INVALIDATE)) { | |
128 | cmd &= ~(PCI_COMMAND_INVALIDATE); | |
129 | pci_write_config_word(dev, PCI_COMMAND, cmd); | |
130 | ||
131 | dev->is_busmaster = 0; | |
132 | } | |
133 | } | |
134 | } | |
a92336a1 KRW |
135 | |
136 | #ifdef CONFIG_PCI_MSI | |
137 | static | |
138 | int xen_pcibk_enable_msi(struct xen_pcibk_device *pdev, | |
139 | struct pci_dev *dev, struct xen_pci_op *op) | |
140 | { | |
141 | struct xen_pcibk_dev_data *dev_data; | |
a92336a1 KRW |
142 | int status; |
143 | ||
144 | if (unlikely(verbose_request)) | |
145 | printk(KERN_DEBUG DRV_NAME ": %s: enable MSI\n", pci_name(dev)); | |
146 | ||
56441f3c KRW |
147 | if (dev->msi_enabled) |
148 | status = -EALREADY; | |
149 | else if (dev->msix_enabled) | |
150 | status = -ENXIO; | |
151 | else | |
152 | status = pci_enable_msi(dev); | |
a92336a1 KRW |
153 | |
154 | if (status) { | |
283c0972 | 155 | pr_warn_ratelimited("%s: error enabling MSI for guest %u: err %d\n", |
51ac8893 JB |
156 | pci_name(dev), pdev->xdev->otherend_id, |
157 | status); | |
a92336a1 KRW |
158 | op->value = 0; |
159 | return XEN_PCI_ERR_op_failed; | |
160 | } | |
161 | ||
162 | /* The value the guest needs is actually the IDT vector, not the | |
163 | * the local domain's IRQ number. */ | |
164 | ||
165 | op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; | |
166 | if (unlikely(verbose_request)) | |
167 | printk(KERN_DEBUG DRV_NAME ": %s: MSI: %d\n", pci_name(dev), | |
168 | op->value); | |
169 | ||
170 | dev_data = pci_get_drvdata(dev); | |
171 | if (dev_data) | |
172 | dev_data->ack_intr = 0; | |
173 | ||
174 | return 0; | |
175 | } | |
176 | ||
177 | static | |
178 | int xen_pcibk_disable_msi(struct xen_pcibk_device *pdev, | |
179 | struct pci_dev *dev, struct xen_pci_op *op) | |
180 | { | |
181 | struct xen_pcibk_dev_data *dev_data; | |
182 | ||
183 | if (unlikely(verbose_request)) | |
184 | printk(KERN_DEBUG DRV_NAME ": %s: disable MSI\n", | |
185 | pci_name(dev)); | |
186 | pci_disable_msi(dev); | |
187 | ||
188 | op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; | |
189 | if (unlikely(verbose_request)) | |
190 | printk(KERN_DEBUG DRV_NAME ": %s: MSI: %d\n", pci_name(dev), | |
191 | op->value); | |
192 | dev_data = pci_get_drvdata(dev); | |
193 | if (dev_data) | |
194 | dev_data->ack_intr = 1; | |
195 | return 0; | |
196 | } | |
197 | ||
198 | static | |
199 | int xen_pcibk_enable_msix(struct xen_pcibk_device *pdev, | |
200 | struct pci_dev *dev, struct xen_pci_op *op) | |
201 | { | |
202 | struct xen_pcibk_dev_data *dev_data; | |
203 | int i, result; | |
204 | struct msix_entry *entries; | |
205 | ||
206 | if (unlikely(verbose_request)) | |
207 | printk(KERN_DEBUG DRV_NAME ": %s: enable MSI-X\n", | |
208 | pci_name(dev)); | |
5e0ce145 | 209 | |
a92336a1 KRW |
210 | if (op->value > SH_INFO_MAX_VEC) |
211 | return -EINVAL; | |
212 | ||
5e0ce145 KRW |
213 | if (dev->msix_enabled) |
214 | return -EALREADY; | |
215 | ||
216 | if (dev->msi_enabled) | |
217 | return -ENXIO; | |
218 | ||
a92336a1 KRW |
219 | entries = kmalloc(op->value * sizeof(*entries), GFP_KERNEL); |
220 | if (entries == NULL) | |
221 | return -ENOMEM; | |
222 | ||
223 | for (i = 0; i < op->value; i++) { | |
224 | entries[i].entry = op->msix_entries[i].entry; | |
225 | entries[i].vector = op->msix_entries[i].vector; | |
226 | } | |
227 | ||
efdfa3ed | 228 | result = pci_enable_msix_exact(dev, entries, op->value); |
a92336a1 KRW |
229 | if (result == 0) { |
230 | for (i = 0; i < op->value; i++) { | |
231 | op->msix_entries[i].entry = entries[i].entry; | |
c0914e61 | 232 | if (entries[i].vector) { |
a92336a1 KRW |
233 | op->msix_entries[i].vector = |
234 | xen_pirq_from_irq(entries[i].vector); | |
235 | if (unlikely(verbose_request)) | |
236 | printk(KERN_DEBUG DRV_NAME ": %s: " \ | |
237 | "MSI-X[%d]: %d\n", | |
238 | pci_name(dev), i, | |
239 | op->msix_entries[i].vector); | |
c0914e61 | 240 | } |
a92336a1 | 241 | } |
51ac8893 | 242 | } else |
283c0972 | 243 | pr_warn_ratelimited("%s: error enabling MSI-X for guest %u: err %d!\n", |
51ac8893 JB |
244 | pci_name(dev), pdev->xdev->otherend_id, |
245 | result); | |
a92336a1 KRW |
246 | kfree(entries); |
247 | ||
248 | op->value = result; | |
249 | dev_data = pci_get_drvdata(dev); | |
250 | if (dev_data) | |
251 | dev_data->ack_intr = 0; | |
252 | ||
0ee46eca | 253 | return result > 0 ? 0 : result; |
a92336a1 KRW |
254 | } |
255 | ||
256 | static | |
257 | int xen_pcibk_disable_msix(struct xen_pcibk_device *pdev, | |
258 | struct pci_dev *dev, struct xen_pci_op *op) | |
259 | { | |
260 | struct xen_pcibk_dev_data *dev_data; | |
261 | if (unlikely(verbose_request)) | |
262 | printk(KERN_DEBUG DRV_NAME ": %s: disable MSI-X\n", | |
263 | pci_name(dev)); | |
264 | pci_disable_msix(dev); | |
265 | ||
266 | /* | |
267 | * SR-IOV devices (which don't have any legacy IRQ) have | |
268 | * an undefined IRQ value of zero. | |
269 | */ | |
270 | op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; | |
271 | if (unlikely(verbose_request)) | |
272 | printk(KERN_DEBUG DRV_NAME ": %s: MSI-X: %d\n", pci_name(dev), | |
273 | op->value); | |
274 | dev_data = pci_get_drvdata(dev); | |
275 | if (dev_data) | |
276 | dev_data->ack_intr = 1; | |
277 | return 0; | |
278 | } | |
279 | #endif | |
30edc14b KRW |
280 | /* |
281 | * Now the same evtchn is used for both pcifront conf_read_write request | |
282 | * as well as pcie aer front end ack. We use a new work_queue to schedule | |
a92336a1 | 283 | * xen_pcibk conf_read_write service for avoiding confict with aer_core |
30edc14b KRW |
284 | * do_recovery job which also use the system default work_queue |
285 | */ | |
a92336a1 | 286 | void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev) |
30edc14b KRW |
287 | { |
288 | /* Check that frontend is requesting an operation and that we are not | |
289 | * already processing a request */ | |
290 | if (test_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags) | |
291 | && !test_and_set_bit(_PDEVF_op_active, &pdev->flags)) { | |
a92336a1 | 292 | queue_work(xen_pcibk_wq, &pdev->op_work); |
30edc14b KRW |
293 | } |
294 | /*_XEN_PCIB_active should have been cleared by pcifront. And also make | |
a92336a1 | 295 | sure xen_pcibk is waiting for ack by checking _PCIB_op_pending*/ |
30edc14b KRW |
296 | if (!test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags) |
297 | && test_bit(_PCIB_op_pending, &pdev->flags)) { | |
a92336a1 | 298 | wake_up(&xen_pcibk_aer_wait_queue); |
30edc14b KRW |
299 | } |
300 | } | |
301 | ||
302 | /* Performing the configuration space reads/writes must not be done in atomic | |
303 | * context because some of the pci_* functions can sleep (mostly due to ACPI | |
304 | * use of semaphores). This function is intended to be called from a work | |
a92336a1 | 305 | * queue in process context taking a struct xen_pcibk_device as a parameter */ |
30edc14b | 306 | |
a92336a1 | 307 | void xen_pcibk_do_op(struct work_struct *data) |
30edc14b | 308 | { |
a92336a1 KRW |
309 | struct xen_pcibk_device *pdev = |
310 | container_of(data, struct xen_pcibk_device, op_work); | |
30edc14b | 311 | struct pci_dev *dev; |
a92336a1 | 312 | struct xen_pcibk_dev_data *dev_data = NULL; |
8135cf8b | 313 | struct xen_pci_op *op = &pdev->op; |
0513fe9e | 314 | int test_intx = 0; |
30edc14b | 315 | |
8135cf8b KRW |
316 | *op = pdev->sh_info->op; |
317 | barrier(); | |
a92336a1 | 318 | dev = xen_pcibk_get_pci_dev(pdev, op->domain, op->bus, op->devfn); |
30edc14b KRW |
319 | |
320 | if (dev == NULL) | |
321 | op->err = XEN_PCI_ERR_dev_not_found; | |
322 | else { | |
0513fe9e KRW |
323 | dev_data = pci_get_drvdata(dev); |
324 | if (dev_data) | |
325 | test_intx = dev_data->enable_intx; | |
30edc14b KRW |
326 | switch (op->cmd) { |
327 | case XEN_PCI_OP_conf_read: | |
a92336a1 | 328 | op->err = xen_pcibk_config_read(dev, |
30edc14b KRW |
329 | op->offset, op->size, &op->value); |
330 | break; | |
331 | case XEN_PCI_OP_conf_write: | |
a92336a1 | 332 | op->err = xen_pcibk_config_write(dev, |
30edc14b KRW |
333 | op->offset, op->size, op->value); |
334 | break; | |
335 | #ifdef CONFIG_PCI_MSI | |
336 | case XEN_PCI_OP_enable_msi: | |
a92336a1 | 337 | op->err = xen_pcibk_enable_msi(pdev, dev, op); |
30edc14b KRW |
338 | break; |
339 | case XEN_PCI_OP_disable_msi: | |
a92336a1 | 340 | op->err = xen_pcibk_disable_msi(pdev, dev, op); |
30edc14b KRW |
341 | break; |
342 | case XEN_PCI_OP_enable_msix: | |
a92336a1 | 343 | op->err = xen_pcibk_enable_msix(pdev, dev, op); |
30edc14b KRW |
344 | break; |
345 | case XEN_PCI_OP_disable_msix: | |
a92336a1 | 346 | op->err = xen_pcibk_disable_msix(pdev, dev, op); |
30edc14b KRW |
347 | break; |
348 | #endif | |
349 | default: | |
350 | op->err = XEN_PCI_ERR_not_implemented; | |
351 | break; | |
352 | } | |
353 | } | |
0513fe9e KRW |
354 | if (!op->err && dev && dev_data) { |
355 | /* Transition detected */ | |
356 | if ((dev_data->enable_intx != test_intx)) | |
a92336a1 | 357 | xen_pcibk_control_isr(dev, 0 /* no reset */); |
0513fe9e | 358 | } |
8135cf8b KRW |
359 | pdev->sh_info->op.err = op->err; |
360 | pdev->sh_info->op.value = op->value; | |
361 | #ifdef CONFIG_PCI_MSI | |
362 | if (op->cmd == XEN_PCI_OP_enable_msix && op->err == 0) { | |
363 | unsigned int i; | |
364 | ||
365 | for (i = 0; i < op->value; i++) | |
366 | pdev->sh_info->op.msix_entries[i].vector = | |
367 | op->msix_entries[i].vector; | |
368 | } | |
369 | #endif | |
30edc14b KRW |
370 | /* Tell the driver domain that we're done. */ |
371 | wmb(); | |
372 | clear_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags); | |
373 | notify_remote_via_irq(pdev->evtchn_irq); | |
374 | ||
375 | /* Mark that we're done. */ | |
4e857c58 | 376 | smp_mb__before_atomic(); /* /after/ clearing PCIF_active */ |
30edc14b | 377 | clear_bit(_PDEVF_op_active, &pdev->flags); |
4e857c58 | 378 | smp_mb__after_atomic(); /* /before/ final check for work */ |
30edc14b KRW |
379 | |
380 | /* Check to see if the driver domain tried to start another request in | |
381 | * between clearing _XEN_PCIF_active and clearing _PDEVF_op_active. | |
382 | */ | |
a92336a1 | 383 | xen_pcibk_test_and_schedule_op(pdev); |
30edc14b KRW |
384 | } |
385 | ||
a92336a1 | 386 | irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id) |
30edc14b | 387 | { |
a92336a1 | 388 | struct xen_pcibk_device *pdev = dev_id; |
30edc14b | 389 | |
a92336a1 | 390 | xen_pcibk_test_and_schedule_op(pdev); |
30edc14b KRW |
391 | |
392 | return IRQ_HANDLED; | |
393 | } | |
a92336a1 | 394 | static irqreturn_t xen_pcibk_guest_interrupt(int irq, void *dev_id) |
0513fe9e KRW |
395 | { |
396 | struct pci_dev *dev = (struct pci_dev *)dev_id; | |
a92336a1 | 397 | struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev); |
0513fe9e KRW |
398 | |
399 | if (dev_data->isr_on && dev_data->ack_intr) { | |
400 | dev_data->handled++; | |
401 | if ((dev_data->handled % 1000) == 0) { | |
402 | if (xen_test_irq_shared(irq)) { | |
283c0972 | 403 | pr_info("%s IRQ line is not shared " |
0513fe9e KRW |
404 | "with other domains. Turning ISR off\n", |
405 | dev_data->irq_name); | |
406 | dev_data->ack_intr = 0; | |
407 | } | |
408 | } | |
409 | return IRQ_HANDLED; | |
410 | } | |
411 | return IRQ_NONE; | |
412 | } |