Commit | Line | Data |
---|---|---|
30edc14b KRW |
1 | /* |
2 | * PCI Backend Operations - respond to PCI requests from Frontend | |
3 | * | |
4 | * Author: Ryan Wilson <hap9@epoch.ncsc.mil> | |
5 | */ | |
6 | #include <linux/module.h> | |
7 | #include <linux/wait.h> | |
8 | #include <linux/bitops.h> | |
9 | #include <xen/events.h> | |
10 | #include <linux/sched.h> | |
11 | #include "pciback.h" | |
12 | ||
13 | int verbose_request; | |
14 | module_param(verbose_request, int, 0644); | |
15 | ||
a92336a1 KRW |
16 | static irqreturn_t xen_pcibk_guest_interrupt(int irq, void *dev_id); |
17 | ||
0513fe9e | 18 | /* Ensure a device is has the fake IRQ handler "turned on/off" and is |
a92336a1 | 19 | * ready to be exported. This MUST be run after xen_pcibk_reset_device |
0513fe9e KRW |
20 | * which does the actual PCI device enable/disable. |
21 | */ | |
a92336a1 | 22 | static void xen_pcibk_control_isr(struct pci_dev *dev, int reset) |
0513fe9e | 23 | { |
a92336a1 | 24 | struct xen_pcibk_dev_data *dev_data; |
0513fe9e KRW |
25 | int rc; |
26 | int enable = 0; | |
27 | ||
28 | dev_data = pci_get_drvdata(dev); | |
29 | if (!dev_data) | |
30 | return; | |
31 | ||
32 | /* We don't deal with bridges */ | |
33 | if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) | |
34 | return; | |
35 | ||
36 | if (reset) { | |
37 | dev_data->enable_intx = 0; | |
38 | dev_data->ack_intr = 0; | |
39 | } | |
40 | enable = dev_data->enable_intx; | |
41 | ||
42 | /* Asked to disable, but ISR isn't runnig */ | |
43 | if (!enable && !dev_data->isr_on) | |
44 | return; | |
45 | ||
46 | /* Squirrel away the IRQs in the dev_data. We need this | |
47 | * b/c when device transitions to MSI, the dev->irq is | |
48 | * overwritten with the MSI vector. | |
49 | */ | |
50 | if (enable) | |
51 | dev_data->irq = dev->irq; | |
52 | ||
e17ab35f KRW |
53 | /* |
54 | * SR-IOV devices in all use MSI-X and have no legacy | |
55 | * interrupts, so inhibit creating a fake IRQ handler for them. | |
56 | */ | |
57 | if (dev_data->irq == 0) | |
58 | goto out; | |
59 | ||
0513fe9e KRW |
60 | dev_dbg(&dev->dev, "%s: #%d %s %s%s %s-> %s\n", |
61 | dev_data->irq_name, | |
62 | dev_data->irq, | |
63 | pci_is_enabled(dev) ? "on" : "off", | |
64 | dev->msi_enabled ? "MSI" : "", | |
65 | dev->msix_enabled ? "MSI/X" : "", | |
66 | dev_data->isr_on ? "enable" : "disable", | |
67 | enable ? "enable" : "disable"); | |
68 | ||
69 | if (enable) { | |
70 | rc = request_irq(dev_data->irq, | |
a92336a1 | 71 | xen_pcibk_guest_interrupt, IRQF_SHARED, |
0513fe9e KRW |
72 | dev_data->irq_name, dev); |
73 | if (rc) { | |
74 | dev_err(&dev->dev, "%s: failed to install fake IRQ " \ | |
75 | "handler for IRQ %d! (rc:%d)\n", | |
76 | dev_data->irq_name, dev_data->irq, rc); | |
77 | goto out; | |
78 | } | |
79 | } else { | |
80 | free_irq(dev_data->irq, dev); | |
81 | dev_data->irq = 0; | |
82 | } | |
83 | dev_data->isr_on = enable; | |
84 | dev_data->ack_intr = enable; | |
85 | out: | |
86 | dev_dbg(&dev->dev, "%s: #%d %s %s%s %s\n", | |
87 | dev_data->irq_name, | |
88 | dev_data->irq, | |
89 | pci_is_enabled(dev) ? "on" : "off", | |
90 | dev->msi_enabled ? "MSI" : "", | |
91 | dev->msix_enabled ? "MSI/X" : "", | |
92 | enable ? (dev_data->isr_on ? "enabled" : "failed to enable") : | |
93 | (dev_data->isr_on ? "failed to disable" : "disabled")); | |
94 | } | |
95 | ||
30edc14b | 96 | /* Ensure a device is "turned off" and ready to be exported. |
a92336a1 | 97 | * (Also see xen_pcibk_config_reset to ensure virtual configuration space is |
30edc14b KRW |
98 | * ready to be re-exported) |
99 | */ | |
a92336a1 | 100 | void xen_pcibk_reset_device(struct pci_dev *dev) |
30edc14b KRW |
101 | { |
102 | u16 cmd; | |
103 | ||
a92336a1 | 104 | xen_pcibk_control_isr(dev, 1 /* reset device */); |
0513fe9e | 105 | |
30edc14b KRW |
106 | /* Disable devices (but not bridges) */ |
107 | if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) { | |
a2be65fd KRW |
108 | #ifdef CONFIG_PCI_MSI |
109 | /* The guest could have been abruptly killed without | |
110 | * disabling MSI/MSI-X interrupts.*/ | |
111 | if (dev->msix_enabled) | |
112 | pci_disable_msix(dev); | |
113 | if (dev->msi_enabled) | |
114 | pci_disable_msi(dev); | |
115 | #endif | |
30edc14b KRW |
116 | pci_disable_device(dev); |
117 | ||
118 | pci_write_config_word(dev, PCI_COMMAND, 0); | |
119 | ||
120 | dev->is_busmaster = 0; | |
121 | } else { | |
122 | pci_read_config_word(dev, PCI_COMMAND, &cmd); | |
123 | if (cmd & (PCI_COMMAND_INVALIDATE)) { | |
124 | cmd &= ~(PCI_COMMAND_INVALIDATE); | |
125 | pci_write_config_word(dev, PCI_COMMAND, cmd); | |
126 | ||
127 | dev->is_busmaster = 0; | |
128 | } | |
129 | } | |
130 | } | |
a92336a1 KRW |
131 | |
132 | #ifdef CONFIG_PCI_MSI | |
133 | static | |
134 | int xen_pcibk_enable_msi(struct xen_pcibk_device *pdev, | |
135 | struct pci_dev *dev, struct xen_pci_op *op) | |
136 | { | |
137 | struct xen_pcibk_dev_data *dev_data; | |
a92336a1 KRW |
138 | int status; |
139 | ||
140 | if (unlikely(verbose_request)) | |
141 | printk(KERN_DEBUG DRV_NAME ": %s: enable MSI\n", pci_name(dev)); | |
142 | ||
143 | status = pci_enable_msi(dev); | |
144 | ||
145 | if (status) { | |
51ac8893 JB |
146 | pr_warn_ratelimited(DRV_NAME ": %s: error enabling MSI for guest %u: err %d\n", |
147 | pci_name(dev), pdev->xdev->otherend_id, | |
148 | status); | |
a92336a1 KRW |
149 | op->value = 0; |
150 | return XEN_PCI_ERR_op_failed; | |
151 | } | |
152 | ||
153 | /* The value the guest needs is actually the IDT vector, not the | |
154 | * the local domain's IRQ number. */ | |
155 | ||
156 | op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; | |
157 | if (unlikely(verbose_request)) | |
158 | printk(KERN_DEBUG DRV_NAME ": %s: MSI: %d\n", pci_name(dev), | |
159 | op->value); | |
160 | ||
161 | dev_data = pci_get_drvdata(dev); | |
162 | if (dev_data) | |
163 | dev_data->ack_intr = 0; | |
164 | ||
165 | return 0; | |
166 | } | |
167 | ||
168 | static | |
169 | int xen_pcibk_disable_msi(struct xen_pcibk_device *pdev, | |
170 | struct pci_dev *dev, struct xen_pci_op *op) | |
171 | { | |
172 | struct xen_pcibk_dev_data *dev_data; | |
173 | ||
174 | if (unlikely(verbose_request)) | |
175 | printk(KERN_DEBUG DRV_NAME ": %s: disable MSI\n", | |
176 | pci_name(dev)); | |
177 | pci_disable_msi(dev); | |
178 | ||
179 | op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; | |
180 | if (unlikely(verbose_request)) | |
181 | printk(KERN_DEBUG DRV_NAME ": %s: MSI: %d\n", pci_name(dev), | |
182 | op->value); | |
183 | dev_data = pci_get_drvdata(dev); | |
184 | if (dev_data) | |
185 | dev_data->ack_intr = 1; | |
186 | return 0; | |
187 | } | |
188 | ||
189 | static | |
190 | int xen_pcibk_enable_msix(struct xen_pcibk_device *pdev, | |
191 | struct pci_dev *dev, struct xen_pci_op *op) | |
192 | { | |
193 | struct xen_pcibk_dev_data *dev_data; | |
194 | int i, result; | |
195 | struct msix_entry *entries; | |
196 | ||
197 | if (unlikely(verbose_request)) | |
198 | printk(KERN_DEBUG DRV_NAME ": %s: enable MSI-X\n", | |
199 | pci_name(dev)); | |
200 | if (op->value > SH_INFO_MAX_VEC) | |
201 | return -EINVAL; | |
202 | ||
203 | entries = kmalloc(op->value * sizeof(*entries), GFP_KERNEL); | |
204 | if (entries == NULL) | |
205 | return -ENOMEM; | |
206 | ||
207 | for (i = 0; i < op->value; i++) { | |
208 | entries[i].entry = op->msix_entries[i].entry; | |
209 | entries[i].vector = op->msix_entries[i].vector; | |
210 | } | |
211 | ||
212 | result = pci_enable_msix(dev, entries, op->value); | |
213 | ||
214 | if (result == 0) { | |
215 | for (i = 0; i < op->value; i++) { | |
216 | op->msix_entries[i].entry = entries[i].entry; | |
217 | if (entries[i].vector) | |
218 | op->msix_entries[i].vector = | |
219 | xen_pirq_from_irq(entries[i].vector); | |
220 | if (unlikely(verbose_request)) | |
221 | printk(KERN_DEBUG DRV_NAME ": %s: " \ | |
222 | "MSI-X[%d]: %d\n", | |
223 | pci_name(dev), i, | |
224 | op->msix_entries[i].vector); | |
225 | } | |
51ac8893 JB |
226 | } else |
227 | pr_warn_ratelimited(DRV_NAME ": %s: error enabling MSI-X for guest %u: err %d!\n", | |
228 | pci_name(dev), pdev->xdev->otherend_id, | |
229 | result); | |
a92336a1 KRW |
230 | kfree(entries); |
231 | ||
232 | op->value = result; | |
233 | dev_data = pci_get_drvdata(dev); | |
234 | if (dev_data) | |
235 | dev_data->ack_intr = 0; | |
236 | ||
0ee46eca | 237 | return result > 0 ? 0 : result; |
a92336a1 KRW |
238 | } |
239 | ||
240 | static | |
241 | int xen_pcibk_disable_msix(struct xen_pcibk_device *pdev, | |
242 | struct pci_dev *dev, struct xen_pci_op *op) | |
243 | { | |
244 | struct xen_pcibk_dev_data *dev_data; | |
245 | if (unlikely(verbose_request)) | |
246 | printk(KERN_DEBUG DRV_NAME ": %s: disable MSI-X\n", | |
247 | pci_name(dev)); | |
248 | pci_disable_msix(dev); | |
249 | ||
250 | /* | |
251 | * SR-IOV devices (which don't have any legacy IRQ) have | |
252 | * an undefined IRQ value of zero. | |
253 | */ | |
254 | op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; | |
255 | if (unlikely(verbose_request)) | |
256 | printk(KERN_DEBUG DRV_NAME ": %s: MSI-X: %d\n", pci_name(dev), | |
257 | op->value); | |
258 | dev_data = pci_get_drvdata(dev); | |
259 | if (dev_data) | |
260 | dev_data->ack_intr = 1; | |
261 | return 0; | |
262 | } | |
263 | #endif | |
30edc14b KRW |
264 | /* |
265 | * Now the same evtchn is used for both pcifront conf_read_write request | |
266 | * as well as pcie aer front end ack. We use a new work_queue to schedule | |
a92336a1 | 267 | * xen_pcibk conf_read_write service for avoiding confict with aer_core |
30edc14b KRW |
268 | * do_recovery job which also use the system default work_queue |
269 | */ | |
a92336a1 | 270 | void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev) |
30edc14b KRW |
271 | { |
272 | /* Check that frontend is requesting an operation and that we are not | |
273 | * already processing a request */ | |
274 | if (test_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags) | |
275 | && !test_and_set_bit(_PDEVF_op_active, &pdev->flags)) { | |
a92336a1 | 276 | queue_work(xen_pcibk_wq, &pdev->op_work); |
30edc14b KRW |
277 | } |
278 | /*_XEN_PCIB_active should have been cleared by pcifront. And also make | |
a92336a1 | 279 | sure xen_pcibk is waiting for ack by checking _PCIB_op_pending*/ |
30edc14b KRW |
280 | if (!test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags) |
281 | && test_bit(_PCIB_op_pending, &pdev->flags)) { | |
a92336a1 | 282 | wake_up(&xen_pcibk_aer_wait_queue); |
30edc14b KRW |
283 | } |
284 | } | |
285 | ||
286 | /* Performing the configuration space reads/writes must not be done in atomic | |
287 | * context because some of the pci_* functions can sleep (mostly due to ACPI | |
288 | * use of semaphores). This function is intended to be called from a work | |
a92336a1 | 289 | * queue in process context taking a struct xen_pcibk_device as a parameter */ |
30edc14b | 290 | |
a92336a1 | 291 | void xen_pcibk_do_op(struct work_struct *data) |
30edc14b | 292 | { |
a92336a1 KRW |
293 | struct xen_pcibk_device *pdev = |
294 | container_of(data, struct xen_pcibk_device, op_work); | |
30edc14b | 295 | struct pci_dev *dev; |
a92336a1 | 296 | struct xen_pcibk_dev_data *dev_data = NULL; |
30edc14b | 297 | struct xen_pci_op *op = &pdev->sh_info->op; |
0513fe9e | 298 | int test_intx = 0; |
30edc14b | 299 | |
a92336a1 | 300 | dev = xen_pcibk_get_pci_dev(pdev, op->domain, op->bus, op->devfn); |
30edc14b KRW |
301 | |
302 | if (dev == NULL) | |
303 | op->err = XEN_PCI_ERR_dev_not_found; | |
304 | else { | |
0513fe9e KRW |
305 | dev_data = pci_get_drvdata(dev); |
306 | if (dev_data) | |
307 | test_intx = dev_data->enable_intx; | |
30edc14b KRW |
308 | switch (op->cmd) { |
309 | case XEN_PCI_OP_conf_read: | |
a92336a1 | 310 | op->err = xen_pcibk_config_read(dev, |
30edc14b KRW |
311 | op->offset, op->size, &op->value); |
312 | break; | |
313 | case XEN_PCI_OP_conf_write: | |
a92336a1 | 314 | op->err = xen_pcibk_config_write(dev, |
30edc14b KRW |
315 | op->offset, op->size, op->value); |
316 | break; | |
317 | #ifdef CONFIG_PCI_MSI | |
318 | case XEN_PCI_OP_enable_msi: | |
a92336a1 | 319 | op->err = xen_pcibk_enable_msi(pdev, dev, op); |
30edc14b KRW |
320 | break; |
321 | case XEN_PCI_OP_disable_msi: | |
a92336a1 | 322 | op->err = xen_pcibk_disable_msi(pdev, dev, op); |
30edc14b KRW |
323 | break; |
324 | case XEN_PCI_OP_enable_msix: | |
a92336a1 | 325 | op->err = xen_pcibk_enable_msix(pdev, dev, op); |
30edc14b KRW |
326 | break; |
327 | case XEN_PCI_OP_disable_msix: | |
a92336a1 | 328 | op->err = xen_pcibk_disable_msix(pdev, dev, op); |
30edc14b KRW |
329 | break; |
330 | #endif | |
331 | default: | |
332 | op->err = XEN_PCI_ERR_not_implemented; | |
333 | break; | |
334 | } | |
335 | } | |
0513fe9e KRW |
336 | if (!op->err && dev && dev_data) { |
337 | /* Transition detected */ | |
338 | if ((dev_data->enable_intx != test_intx)) | |
a92336a1 | 339 | xen_pcibk_control_isr(dev, 0 /* no reset */); |
0513fe9e | 340 | } |
30edc14b KRW |
341 | /* Tell the driver domain that we're done. */ |
342 | wmb(); | |
343 | clear_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags); | |
344 | notify_remote_via_irq(pdev->evtchn_irq); | |
345 | ||
346 | /* Mark that we're done. */ | |
347 | smp_mb__before_clear_bit(); /* /after/ clearing PCIF_active */ | |
348 | clear_bit(_PDEVF_op_active, &pdev->flags); | |
349 | smp_mb__after_clear_bit(); /* /before/ final check for work */ | |
350 | ||
351 | /* Check to see if the driver domain tried to start another request in | |
352 | * between clearing _XEN_PCIF_active and clearing _PDEVF_op_active. | |
353 | */ | |
a92336a1 | 354 | xen_pcibk_test_and_schedule_op(pdev); |
30edc14b KRW |
355 | } |
356 | ||
a92336a1 | 357 | irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id) |
30edc14b | 358 | { |
a92336a1 | 359 | struct xen_pcibk_device *pdev = dev_id; |
30edc14b | 360 | |
a92336a1 | 361 | xen_pcibk_test_and_schedule_op(pdev); |
30edc14b KRW |
362 | |
363 | return IRQ_HANDLED; | |
364 | } | |
a92336a1 | 365 | static irqreturn_t xen_pcibk_guest_interrupt(int irq, void *dev_id) |
0513fe9e KRW |
366 | { |
367 | struct pci_dev *dev = (struct pci_dev *)dev_id; | |
a92336a1 | 368 | struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev); |
0513fe9e KRW |
369 | |
370 | if (dev_data->isr_on && dev_data->ack_intr) { | |
371 | dev_data->handled++; | |
372 | if ((dev_data->handled % 1000) == 0) { | |
373 | if (xen_test_irq_shared(irq)) { | |
374 | printk(KERN_INFO "%s IRQ line is not shared " | |
375 | "with other domains. Turning ISR off\n", | |
376 | dev_data->irq_name); | |
377 | dev_data->ack_intr = 0; | |
378 | } | |
379 | } | |
380 | return IRQ_HANDLED; | |
381 | } | |
382 | return IRQ_NONE; | |
383 | } |