iommu/amd: Convert IOMMUv2 state_table into state_list
[deliverable/linux.git] / drivers / iommu / amd_iommu_v2.c
CommitLineData
e3c495c7
JR
1/*
2 * Copyright (C) 2010-2012 Advanced Micro Devices, Inc.
3 * Author: Joerg Roedel <joerg.roedel@amd.com>
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 as published
7 * by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
8736b2c3 19#include <linux/mmu_notifier.h>
ed96f228
JR
20#include <linux/amd-iommu.h>
21#include <linux/mm_types.h>
8736b2c3 22#include <linux/profile.h>
e3c495c7 23#include <linux/module.h>
2d5503b6 24#include <linux/sched.h>
ed96f228 25#include <linux/iommu.h>
028eeacc 26#include <linux/wait.h>
ed96f228
JR
27#include <linux/pci.h>
28#include <linux/gfp.h>
29
028eeacc 30#include "amd_iommu_types.h"
ed96f228 31#include "amd_iommu_proto.h"
e3c495c7
JR
32
33MODULE_LICENSE("GPL v2");
34MODULE_AUTHOR("Joerg Roedel <joerg.roedel@amd.com>");
35
ed96f228
JR
36#define MAX_DEVICES 0x10000
37#define PRI_QUEUE_SIZE 512
38
39struct pri_queue {
40 atomic_t inflight;
41 bool finish;
028eeacc 42 int status;
ed96f228
JR
43};
44
45struct pasid_state {
46 struct list_head list; /* For global state-list */
47 atomic_t count; /* Reference count */
48 struct task_struct *task; /* Task bound to this PASID */
49 struct mm_struct *mm; /* mm_struct for the faults */
8736b2c3 50 struct mmu_notifier mn; /* mmu_otifier handle */
ed96f228
JR
51 struct pri_queue pri[PRI_QUEUE_SIZE]; /* PRI tag states */
52 struct device_state *device_state; /* Link to our device_state */
53 int pasid; /* PASID index */
028eeacc
JR
54 spinlock_t lock; /* Protect pri_queues */
55 wait_queue_head_t wq; /* To wait for count == 0 */
ed96f228
JR
56};
57
58struct device_state {
741669c7
JR
59 struct list_head list;
60 u16 devid;
ed96f228
JR
61 atomic_t count;
62 struct pci_dev *pdev;
63 struct pasid_state **states;
64 struct iommu_domain *domain;
65 int pasid_levels;
66 int max_pasids;
175d6146 67 amd_iommu_invalid_ppr_cb inv_ppr_cb;
bc21662f 68 amd_iommu_invalidate_ctx inv_ctx_cb;
ed96f228 69 spinlock_t lock;
028eeacc
JR
70 wait_queue_head_t wq;
71};
72
73struct fault {
74 struct work_struct work;
75 struct device_state *dev_state;
76 struct pasid_state *state;
77 struct mm_struct *mm;
78 u64 address;
79 u16 devid;
80 u16 pasid;
81 u16 tag;
82 u16 finish;
83 u16 flags;
ed96f228
JR
84};
85
741669c7 86static LIST_HEAD(state_list);
ed96f228
JR
87static spinlock_t state_lock;
88
89/* List and lock for all pasid_states */
90static LIST_HEAD(pasid_state_list);
2d5503b6
JR
91static DEFINE_SPINLOCK(ps_lock);
92
028eeacc
JR
93static struct workqueue_struct *iommu_wq;
94
8736b2c3
JR
95/*
96 * Empty page table - Used between
97 * mmu_notifier_invalidate_range_start and
98 * mmu_notifier_invalidate_range_end
99 */
100static u64 *empty_page_table;
101
2d5503b6
JR
102static void free_pasid_states(struct device_state *dev_state);
103static void unbind_pasid(struct device_state *dev_state, int pasid);
8736b2c3 104static int task_exit(struct notifier_block *nb, unsigned long e, void *data);
ed96f228
JR
105
106static u16 device_id(struct pci_dev *pdev)
107{
108 u16 devid;
109
110 devid = pdev->bus->number;
111 devid = (devid << 8) | pdev->devfn;
112
113 return devid;
114}
115
b87d2d7c
JR
116static struct device_state *__get_device_state(u16 devid)
117{
741669c7
JR
118 struct device_state *dev_state;
119
120 list_for_each_entry(dev_state, &state_list, list) {
121 if (dev_state->devid == devid)
122 return dev_state;
123 }
124
125 return NULL;
b87d2d7c
JR
126}
127
ed96f228
JR
128static struct device_state *get_device_state(u16 devid)
129{
130 struct device_state *dev_state;
131 unsigned long flags;
132
133 spin_lock_irqsave(&state_lock, flags);
b87d2d7c 134 dev_state = __get_device_state(devid);
ed96f228
JR
135 if (dev_state != NULL)
136 atomic_inc(&dev_state->count);
137 spin_unlock_irqrestore(&state_lock, flags);
138
139 return dev_state;
140}
141
142static void free_device_state(struct device_state *dev_state)
143{
2d5503b6
JR
144 /*
145 * First detach device from domain - No more PRI requests will arrive
146 * from that device after it is unbound from the IOMMUv2 domain.
147 */
ed96f228 148 iommu_detach_device(dev_state->domain, &dev_state->pdev->dev);
2d5503b6
JR
149
150 /* Everything is down now, free the IOMMUv2 domain */
ed96f228 151 iommu_domain_free(dev_state->domain);
2d5503b6
JR
152
153 /* Finally get rid of the device-state */
ed96f228
JR
154 kfree(dev_state);
155}
156
157static void put_device_state(struct device_state *dev_state)
158{
159 if (atomic_dec_and_test(&dev_state->count))
028eeacc 160 wake_up(&dev_state->wq);
ed96f228
JR
161}
162
028eeacc
JR
163static void put_device_state_wait(struct device_state *dev_state)
164{
165 DEFINE_WAIT(wait);
166
167 prepare_to_wait(&dev_state->wq, &wait, TASK_UNINTERRUPTIBLE);
168 if (!atomic_dec_and_test(&dev_state->count))
169 schedule();
170 finish_wait(&dev_state->wq, &wait);
171
172 free_device_state(dev_state);
173}
8736b2c3
JR
174
175static struct notifier_block profile_nb = {
176 .notifier_call = task_exit,
177};
178
2d5503b6
JR
179static void link_pasid_state(struct pasid_state *pasid_state)
180{
181 spin_lock(&ps_lock);
182 list_add_tail(&pasid_state->list, &pasid_state_list);
183 spin_unlock(&ps_lock);
184}
185
186static void __unlink_pasid_state(struct pasid_state *pasid_state)
187{
188 list_del(&pasid_state->list);
189}
190
191static void unlink_pasid_state(struct pasid_state *pasid_state)
192{
193 spin_lock(&ps_lock);
194 __unlink_pasid_state(pasid_state);
195 spin_unlock(&ps_lock);
196}
197
198/* Must be called under dev_state->lock */
199static struct pasid_state **__get_pasid_state_ptr(struct device_state *dev_state,
200 int pasid, bool alloc)
201{
202 struct pasid_state **root, **ptr;
203 int level, index;
204
205 level = dev_state->pasid_levels;
206 root = dev_state->states;
207
208 while (true) {
209
210 index = (pasid >> (9 * level)) & 0x1ff;
211 ptr = &root[index];
212
213 if (level == 0)
214 break;
215
216 if (*ptr == NULL) {
217 if (!alloc)
218 return NULL;
219
220 *ptr = (void *)get_zeroed_page(GFP_ATOMIC);
221 if (*ptr == NULL)
222 return NULL;
223 }
224
225 root = (struct pasid_state **)*ptr;
226 level -= 1;
227 }
228
229 return ptr;
230}
231
232static int set_pasid_state(struct device_state *dev_state,
233 struct pasid_state *pasid_state,
234 int pasid)
235{
236 struct pasid_state **ptr;
237 unsigned long flags;
238 int ret;
239
240 spin_lock_irqsave(&dev_state->lock, flags);
241 ptr = __get_pasid_state_ptr(dev_state, pasid, true);
242
243 ret = -ENOMEM;
244 if (ptr == NULL)
245 goto out_unlock;
246
247 ret = -ENOMEM;
248 if (*ptr != NULL)
249 goto out_unlock;
250
251 *ptr = pasid_state;
252
253 ret = 0;
254
255out_unlock:
256 spin_unlock_irqrestore(&dev_state->lock, flags);
257
258 return ret;
259}
260
261static void clear_pasid_state(struct device_state *dev_state, int pasid)
262{
263 struct pasid_state **ptr;
264 unsigned long flags;
265
266 spin_lock_irqsave(&dev_state->lock, flags);
267 ptr = __get_pasid_state_ptr(dev_state, pasid, true);
268
269 if (ptr == NULL)
270 goto out_unlock;
271
272 *ptr = NULL;
273
274out_unlock:
275 spin_unlock_irqrestore(&dev_state->lock, flags);
276}
277
278static struct pasid_state *get_pasid_state(struct device_state *dev_state,
279 int pasid)
280{
281 struct pasid_state **ptr, *ret = NULL;
282 unsigned long flags;
283
284 spin_lock_irqsave(&dev_state->lock, flags);
285 ptr = __get_pasid_state_ptr(dev_state, pasid, false);
286
287 if (ptr == NULL)
288 goto out_unlock;
289
290 ret = *ptr;
291 if (ret)
292 atomic_inc(&ret->count);
293
294out_unlock:
295 spin_unlock_irqrestore(&dev_state->lock, flags);
296
297 return ret;
298}
299
300static void free_pasid_state(struct pasid_state *pasid_state)
301{
302 kfree(pasid_state);
303}
304
305static void put_pasid_state(struct pasid_state *pasid_state)
306{
307 if (atomic_dec_and_test(&pasid_state->count)) {
308 put_device_state(pasid_state->device_state);
028eeacc 309 wake_up(&pasid_state->wq);
2d5503b6
JR
310 }
311}
312
028eeacc
JR
313static void put_pasid_state_wait(struct pasid_state *pasid_state)
314{
315 DEFINE_WAIT(wait);
316
317 prepare_to_wait(&pasid_state->wq, &wait, TASK_UNINTERRUPTIBLE);
318
319 if (atomic_dec_and_test(&pasid_state->count))
320 put_device_state(pasid_state->device_state);
321 else
322 schedule();
323
324 finish_wait(&pasid_state->wq, &wait);
325 mmput(pasid_state->mm);
326 free_pasid_state(pasid_state);
327}
328
8736b2c3
JR
329static void __unbind_pasid(struct pasid_state *pasid_state)
330{
331 struct iommu_domain *domain;
332
333 domain = pasid_state->device_state->domain;
334
335 amd_iommu_domain_clear_gcr3(domain, pasid_state->pasid);
336 clear_pasid_state(pasid_state->device_state, pasid_state->pasid);
337
338 /* Make sure no more pending faults are in the queue */
339 flush_workqueue(iommu_wq);
340
341 mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm);
342
343 put_pasid_state(pasid_state); /* Reference taken in bind() function */
344}
345
2d5503b6
JR
346static void unbind_pasid(struct device_state *dev_state, int pasid)
347{
348 struct pasid_state *pasid_state;
349
350 pasid_state = get_pasid_state(dev_state, pasid);
351 if (pasid_state == NULL)
352 return;
353
354 unlink_pasid_state(pasid_state);
8736b2c3
JR
355 __unbind_pasid(pasid_state);
356 put_pasid_state_wait(pasid_state); /* Reference taken in this function */
2d5503b6
JR
357}
358
359static void free_pasid_states_level1(struct pasid_state **tbl)
360{
361 int i;
362
363 for (i = 0; i < 512; ++i) {
364 if (tbl[i] == NULL)
365 continue;
366
367 free_page((unsigned long)tbl[i]);
368 }
369}
370
371static void free_pasid_states_level2(struct pasid_state **tbl)
372{
373 struct pasid_state **ptr;
374 int i;
375
376 for (i = 0; i < 512; ++i) {
377 if (tbl[i] == NULL)
378 continue;
379
380 ptr = (struct pasid_state **)tbl[i];
381 free_pasid_states_level1(ptr);
382 }
383}
384
385static void free_pasid_states(struct device_state *dev_state)
386{
387 struct pasid_state *pasid_state;
388 int i;
389
390 for (i = 0; i < dev_state->max_pasids; ++i) {
391 pasid_state = get_pasid_state(dev_state, i);
392 if (pasid_state == NULL)
393 continue;
394
2d5503b6 395 put_pasid_state(pasid_state);
028eeacc 396 unbind_pasid(dev_state, i);
2d5503b6
JR
397 }
398
399 if (dev_state->pasid_levels == 2)
400 free_pasid_states_level2(dev_state->states);
401 else if (dev_state->pasid_levels == 1)
402 free_pasid_states_level1(dev_state->states);
403 else if (dev_state->pasid_levels != 0)
404 BUG();
405
406 free_page((unsigned long)dev_state->states);
407}
408
8736b2c3
JR
409static struct pasid_state *mn_to_state(struct mmu_notifier *mn)
410{
411 return container_of(mn, struct pasid_state, mn);
412}
413
414static void __mn_flush_page(struct mmu_notifier *mn,
415 unsigned long address)
416{
417 struct pasid_state *pasid_state;
418 struct device_state *dev_state;
419
420 pasid_state = mn_to_state(mn);
421 dev_state = pasid_state->device_state;
422
423 amd_iommu_flush_page(dev_state->domain, pasid_state->pasid, address);
424}
425
426static int mn_clear_flush_young(struct mmu_notifier *mn,
427 struct mm_struct *mm,
428 unsigned long address)
429{
430 __mn_flush_page(mn, address);
431
432 return 0;
433}
434
435static void mn_change_pte(struct mmu_notifier *mn,
436 struct mm_struct *mm,
437 unsigned long address,
438 pte_t pte)
439{
440 __mn_flush_page(mn, address);
441}
442
443static void mn_invalidate_page(struct mmu_notifier *mn,
444 struct mm_struct *mm,
445 unsigned long address)
446{
447 __mn_flush_page(mn, address);
448}
449
450static void mn_invalidate_range_start(struct mmu_notifier *mn,
451 struct mm_struct *mm,
452 unsigned long start, unsigned long end)
453{
454 struct pasid_state *pasid_state;
455 struct device_state *dev_state;
456
457 pasid_state = mn_to_state(mn);
458 dev_state = pasid_state->device_state;
459
460 amd_iommu_domain_set_gcr3(dev_state->domain, pasid_state->pasid,
461 __pa(empty_page_table));
462}
463
464static void mn_invalidate_range_end(struct mmu_notifier *mn,
465 struct mm_struct *mm,
466 unsigned long start, unsigned long end)
467{
468 struct pasid_state *pasid_state;
469 struct device_state *dev_state;
470
471 pasid_state = mn_to_state(mn);
472 dev_state = pasid_state->device_state;
473
474 amd_iommu_domain_set_gcr3(dev_state->domain, pasid_state->pasid,
475 __pa(pasid_state->mm->pgd));
476}
477
478static struct mmu_notifier_ops iommu_mn = {
479 .clear_flush_young = mn_clear_flush_young,
480 .change_pte = mn_change_pte,
481 .invalidate_page = mn_invalidate_page,
482 .invalidate_range_start = mn_invalidate_range_start,
483 .invalidate_range_end = mn_invalidate_range_end,
484};
485
028eeacc
JR
486static void set_pri_tag_status(struct pasid_state *pasid_state,
487 u16 tag, int status)
488{
489 unsigned long flags;
490
491 spin_lock_irqsave(&pasid_state->lock, flags);
492 pasid_state->pri[tag].status = status;
493 spin_unlock_irqrestore(&pasid_state->lock, flags);
494}
495
496static void finish_pri_tag(struct device_state *dev_state,
497 struct pasid_state *pasid_state,
498 u16 tag)
499{
500 unsigned long flags;
501
502 spin_lock_irqsave(&pasid_state->lock, flags);
503 if (atomic_dec_and_test(&pasid_state->pri[tag].inflight) &&
504 pasid_state->pri[tag].finish) {
505 amd_iommu_complete_ppr(dev_state->pdev, pasid_state->pasid,
506 pasid_state->pri[tag].status, tag);
507 pasid_state->pri[tag].finish = false;
508 pasid_state->pri[tag].status = PPR_SUCCESS;
509 }
510 spin_unlock_irqrestore(&pasid_state->lock, flags);
511}
512
513static void do_fault(struct work_struct *work)
514{
515 struct fault *fault = container_of(work, struct fault, work);
516 int npages, write;
517 struct page *page;
518
519 write = !!(fault->flags & PPR_FAULT_WRITE);
520
4378d992 521 down_read(&fault->state->mm->mmap_sem);
028eeacc
JR
522 npages = get_user_pages(fault->state->task, fault->state->mm,
523 fault->address, 1, write, 0, &page, NULL);
4378d992 524 up_read(&fault->state->mm->mmap_sem);
028eeacc 525
175d6146 526 if (npages == 1) {
028eeacc 527 put_page(page);
175d6146
JR
528 } else if (fault->dev_state->inv_ppr_cb) {
529 int status;
530
531 status = fault->dev_state->inv_ppr_cb(fault->dev_state->pdev,
532 fault->pasid,
533 fault->address,
534 fault->flags);
535 switch (status) {
536 case AMD_IOMMU_INV_PRI_RSP_SUCCESS:
537 set_pri_tag_status(fault->state, fault->tag, PPR_SUCCESS);
538 break;
539 case AMD_IOMMU_INV_PRI_RSP_INVALID:
540 set_pri_tag_status(fault->state, fault->tag, PPR_INVALID);
541 break;
542 case AMD_IOMMU_INV_PRI_RSP_FAIL:
543 set_pri_tag_status(fault->state, fault->tag, PPR_FAILURE);
544 break;
545 default:
546 BUG();
547 }
548 } else {
028eeacc 549 set_pri_tag_status(fault->state, fault->tag, PPR_INVALID);
175d6146 550 }
028eeacc
JR
551
552 finish_pri_tag(fault->dev_state, fault->state, fault->tag);
553
554 put_pasid_state(fault->state);
555
556 kfree(fault);
557}
558
559static int ppr_notifier(struct notifier_block *nb, unsigned long e, void *data)
560{
561 struct amd_iommu_fault *iommu_fault;
562 struct pasid_state *pasid_state;
563 struct device_state *dev_state;
564 unsigned long flags;
565 struct fault *fault;
566 bool finish;
567 u16 tag;
568 int ret;
569
570 iommu_fault = data;
571 tag = iommu_fault->tag & 0x1ff;
572 finish = (iommu_fault->tag >> 9) & 1;
573
574 ret = NOTIFY_DONE;
575 dev_state = get_device_state(iommu_fault->device_id);
576 if (dev_state == NULL)
577 goto out;
578
579 pasid_state = get_pasid_state(dev_state, iommu_fault->pasid);
580 if (pasid_state == NULL) {
581 /* We know the device but not the PASID -> send INVALID */
582 amd_iommu_complete_ppr(dev_state->pdev, iommu_fault->pasid,
583 PPR_INVALID, tag);
584 goto out_drop_state;
585 }
586
587 spin_lock_irqsave(&pasid_state->lock, flags);
588 atomic_inc(&pasid_state->pri[tag].inflight);
589 if (finish)
590 pasid_state->pri[tag].finish = true;
591 spin_unlock_irqrestore(&pasid_state->lock, flags);
592
593 fault = kzalloc(sizeof(*fault), GFP_ATOMIC);
594 if (fault == NULL) {
595 /* We are OOM - send success and let the device re-fault */
596 finish_pri_tag(dev_state, pasid_state, tag);
597 goto out_drop_state;
598 }
599
600 fault->dev_state = dev_state;
601 fault->address = iommu_fault->address;
602 fault->state = pasid_state;
603 fault->tag = tag;
604 fault->finish = finish;
605 fault->flags = iommu_fault->flags;
606 INIT_WORK(&fault->work, do_fault);
607
608 queue_work(iommu_wq, &fault->work);
609
610 ret = NOTIFY_OK;
611
612out_drop_state:
613 put_device_state(dev_state);
614
615out:
616 return ret;
617}
618
619static struct notifier_block ppr_nb = {
620 .notifier_call = ppr_notifier,
621};
622
8736b2c3
JR
623static int task_exit(struct notifier_block *nb, unsigned long e, void *data)
624{
625 struct pasid_state *pasid_state;
626 struct task_struct *task;
627
628 task = data;
629
630 /*
631 * Using this notifier is a hack - but there is no other choice
632 * at the moment. What I really want is a sleeping notifier that
633 * is called when an MM goes down. But such a notifier doesn't
634 * exist yet. The notifier needs to sleep because it has to make
635 * sure that the device does not use the PASID and the address
636 * space anymore before it is destroyed. This includes waiting
637 * for pending PRI requests to pass the workqueue. The
638 * MMU-Notifiers would be a good fit, but they use RCU and so
639 * they are not allowed to sleep. Lets see how we can solve this
640 * in a more intelligent way in the future.
641 */
642again:
643 spin_lock(&ps_lock);
644 list_for_each_entry(pasid_state, &pasid_state_list, list) {
645 struct device_state *dev_state;
646 int pasid;
647
648 if (pasid_state->task != task)
649 continue;
650
651 /* Drop Lock and unbind */
652 spin_unlock(&ps_lock);
653
654 dev_state = pasid_state->device_state;
655 pasid = pasid_state->pasid;
656
bc21662f
JR
657 if (pasid_state->device_state->inv_ctx_cb)
658 dev_state->inv_ctx_cb(dev_state->pdev, pasid);
659
8736b2c3
JR
660 unbind_pasid(dev_state, pasid);
661
662 /* Task may be in the list multiple times */
663 goto again;
664 }
665 spin_unlock(&ps_lock);
666
667 return NOTIFY_OK;
668}
669
2d5503b6
JR
670int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid,
671 struct task_struct *task)
672{
673 struct pasid_state *pasid_state;
674 struct device_state *dev_state;
675 u16 devid;
676 int ret;
677
678 might_sleep();
679
680 if (!amd_iommu_v2_supported())
681 return -ENODEV;
682
683 devid = device_id(pdev);
684 dev_state = get_device_state(devid);
685
686 if (dev_state == NULL)
687 return -EINVAL;
688
689 ret = -EINVAL;
690 if (pasid < 0 || pasid >= dev_state->max_pasids)
691 goto out;
692
693 ret = -ENOMEM;
694 pasid_state = kzalloc(sizeof(*pasid_state), GFP_KERNEL);
695 if (pasid_state == NULL)
696 goto out;
697
698 atomic_set(&pasid_state->count, 1);
028eeacc 699 init_waitqueue_head(&pasid_state->wq);
2c13d47a
JR
700 spin_lock_init(&pasid_state->lock);
701
2d5503b6
JR
702 pasid_state->task = task;
703 pasid_state->mm = get_task_mm(task);
704 pasid_state->device_state = dev_state;
705 pasid_state->pasid = pasid;
8736b2c3 706 pasid_state->mn.ops = &iommu_mn;
2d5503b6
JR
707
708 if (pasid_state->mm == NULL)
709 goto out_free;
710
8736b2c3
JR
711 mmu_notifier_register(&pasid_state->mn, pasid_state->mm);
712
2d5503b6
JR
713 ret = set_pasid_state(dev_state, pasid_state, pasid);
714 if (ret)
8736b2c3 715 goto out_unregister;
2d5503b6
JR
716
717 ret = amd_iommu_domain_set_gcr3(dev_state->domain, pasid,
718 __pa(pasid_state->mm->pgd));
719 if (ret)
720 goto out_clear_state;
721
722 link_pasid_state(pasid_state);
723
724 return 0;
725
726out_clear_state:
727 clear_pasid_state(dev_state, pasid);
728
8736b2c3
JR
729out_unregister:
730 mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm);
731
2d5503b6 732out_free:
028eeacc 733 free_pasid_state(pasid_state);
2d5503b6
JR
734
735out:
736 put_device_state(dev_state);
737
738 return ret;
739}
740EXPORT_SYMBOL(amd_iommu_bind_pasid);
741
742void amd_iommu_unbind_pasid(struct pci_dev *pdev, int pasid)
743{
744 struct device_state *dev_state;
745 u16 devid;
746
747 might_sleep();
748
749 if (!amd_iommu_v2_supported())
750 return;
751
752 devid = device_id(pdev);
753 dev_state = get_device_state(devid);
754 if (dev_state == NULL)
755 return;
756
757 if (pasid < 0 || pasid >= dev_state->max_pasids)
758 goto out;
759
760 unbind_pasid(dev_state, pasid);
761
762out:
763 put_device_state(dev_state);
764}
765EXPORT_SYMBOL(amd_iommu_unbind_pasid);
766
ed96f228
JR
767int amd_iommu_init_device(struct pci_dev *pdev, int pasids)
768{
769 struct device_state *dev_state;
770 unsigned long flags;
771 int ret, tmp;
772 u16 devid;
773
774 might_sleep();
775
776 if (!amd_iommu_v2_supported())
777 return -ENODEV;
778
779 if (pasids <= 0 || pasids > (PASID_MASK + 1))
780 return -EINVAL;
781
782 devid = device_id(pdev);
783
784 dev_state = kzalloc(sizeof(*dev_state), GFP_KERNEL);
785 if (dev_state == NULL)
786 return -ENOMEM;
787
788 spin_lock_init(&dev_state->lock);
028eeacc 789 init_waitqueue_head(&dev_state->wq);
741669c7
JR
790 dev_state->pdev = pdev;
791 dev_state->devid = devid;
ed96f228
JR
792
793 tmp = pasids;
794 for (dev_state->pasid_levels = 0; (tmp - 1) & ~0x1ff; tmp >>= 9)
795 dev_state->pasid_levels += 1;
796
797 atomic_set(&dev_state->count, 1);
798 dev_state->max_pasids = pasids;
799
800 ret = -ENOMEM;
801 dev_state->states = (void *)get_zeroed_page(GFP_KERNEL);
802 if (dev_state->states == NULL)
803 goto out_free_dev_state;
804
805 dev_state->domain = iommu_domain_alloc(&pci_bus_type);
806 if (dev_state->domain == NULL)
807 goto out_free_states;
808
809 amd_iommu_domain_direct_map(dev_state->domain);
810
811 ret = amd_iommu_domain_enable_v2(dev_state->domain, pasids);
812 if (ret)
813 goto out_free_domain;
814
815 ret = iommu_attach_device(dev_state->domain, &pdev->dev);
816 if (ret != 0)
817 goto out_free_domain;
818
819 spin_lock_irqsave(&state_lock, flags);
820
741669c7 821 if (__get_device_state(devid) != NULL) {
ed96f228
JR
822 spin_unlock_irqrestore(&state_lock, flags);
823 ret = -EBUSY;
824 goto out_free_domain;
825 }
826
741669c7 827 list_add_tail(&dev_state->list, &state_list);
ed96f228
JR
828
829 spin_unlock_irqrestore(&state_lock, flags);
830
831 return 0;
832
833out_free_domain:
834 iommu_domain_free(dev_state->domain);
835
836out_free_states:
837 free_page((unsigned long)dev_state->states);
838
839out_free_dev_state:
840 kfree(dev_state);
841
842 return ret;
843}
844EXPORT_SYMBOL(amd_iommu_init_device);
845
846void amd_iommu_free_device(struct pci_dev *pdev)
847{
848 struct device_state *dev_state;
849 unsigned long flags;
850 u16 devid;
851
852 if (!amd_iommu_v2_supported())
853 return;
854
855 devid = device_id(pdev);
856
857 spin_lock_irqsave(&state_lock, flags);
858
b87d2d7c 859 dev_state = __get_device_state(devid);
ed96f228
JR
860 if (dev_state == NULL) {
861 spin_unlock_irqrestore(&state_lock, flags);
862 return;
863 }
864
741669c7 865 list_del(&dev_state->list);
ed96f228
JR
866
867 spin_unlock_irqrestore(&state_lock, flags);
868
2d5503b6
JR
869 /* Get rid of any remaining pasid states */
870 free_pasid_states(dev_state);
871
028eeacc 872 put_device_state_wait(dev_state);
ed96f228
JR
873}
874EXPORT_SYMBOL(amd_iommu_free_device);
875
175d6146
JR
876int amd_iommu_set_invalid_ppr_cb(struct pci_dev *pdev,
877 amd_iommu_invalid_ppr_cb cb)
878{
879 struct device_state *dev_state;
880 unsigned long flags;
881 u16 devid;
882 int ret;
883
884 if (!amd_iommu_v2_supported())
885 return -ENODEV;
886
887 devid = device_id(pdev);
888
889 spin_lock_irqsave(&state_lock, flags);
890
891 ret = -EINVAL;
b87d2d7c 892 dev_state = __get_device_state(devid);
175d6146
JR
893 if (dev_state == NULL)
894 goto out_unlock;
895
896 dev_state->inv_ppr_cb = cb;
897
898 ret = 0;
899
900out_unlock:
901 spin_unlock_irqrestore(&state_lock, flags);
902
903 return ret;
904}
905EXPORT_SYMBOL(amd_iommu_set_invalid_ppr_cb);
906
bc21662f
JR
907int amd_iommu_set_invalidate_ctx_cb(struct pci_dev *pdev,
908 amd_iommu_invalidate_ctx cb)
909{
910 struct device_state *dev_state;
911 unsigned long flags;
912 u16 devid;
913 int ret;
914
915 if (!amd_iommu_v2_supported())
916 return -ENODEV;
917
918 devid = device_id(pdev);
919
920 spin_lock_irqsave(&state_lock, flags);
921
922 ret = -EINVAL;
b87d2d7c 923 dev_state = __get_device_state(devid);
bc21662f
JR
924 if (dev_state == NULL)
925 goto out_unlock;
926
927 dev_state->inv_ctx_cb = cb;
928
929 ret = 0;
930
931out_unlock:
932 spin_unlock_irqrestore(&state_lock, flags);
933
934 return ret;
935}
936EXPORT_SYMBOL(amd_iommu_set_invalidate_ctx_cb);
937
e3c495c7
JR
938static int __init amd_iommu_v2_init(void)
939{
028eeacc 940 int ret;
ed96f228 941
474d567d
JR
942 pr_info("AMD IOMMUv2 driver by Joerg Roedel <joerg.roedel@amd.com>\n");
943
944 if (!amd_iommu_v2_supported()) {
07db0409 945 pr_info("AMD IOMMUv2 functionality not available on this system\n");
474d567d
JR
946 /*
947 * Load anyway to provide the symbols to other modules
948 * which may use AMD IOMMUv2 optionally.
949 */
950 return 0;
951 }
e3c495c7 952
ed96f228
JR
953 spin_lock_init(&state_lock);
954
028eeacc
JR
955 ret = -ENOMEM;
956 iommu_wq = create_workqueue("amd_iommu_v2");
8736b2c3 957 if (iommu_wq == NULL)
741669c7 958 goto out;
8736b2c3
JR
959
960 ret = -ENOMEM;
961 empty_page_table = (u64 *)get_zeroed_page(GFP_KERNEL);
962 if (empty_page_table == NULL)
963 goto out_destroy_wq;
028eeacc
JR
964
965 amd_iommu_register_ppr_notifier(&ppr_nb);
8736b2c3 966 profile_event_register(PROFILE_TASK_EXIT, &profile_nb);
028eeacc 967
e3c495c7 968 return 0;
028eeacc 969
8736b2c3
JR
970out_destroy_wq:
971 destroy_workqueue(iommu_wq);
972
741669c7 973out:
028eeacc 974 return ret;
e3c495c7
JR
975}
976
977static void __exit amd_iommu_v2_exit(void)
978{
ed96f228 979 struct device_state *dev_state;
ed96f228
JR
980 int i;
981
474d567d
JR
982 if (!amd_iommu_v2_supported())
983 return;
984
8736b2c3 985 profile_event_unregister(PROFILE_TASK_EXIT, &profile_nb);
028eeacc
JR
986 amd_iommu_unregister_ppr_notifier(&ppr_nb);
987
988 flush_workqueue(iommu_wq);
989
990 /*
991 * The loop below might call flush_workqueue(), so call
992 * destroy_workqueue() after it
993 */
ed96f228
JR
994 for (i = 0; i < MAX_DEVICES; ++i) {
995 dev_state = get_device_state(i);
996
997 if (dev_state == NULL)
998 continue;
999
1000 WARN_ON_ONCE(1);
1001
ed96f228 1002 put_device_state(dev_state);
028eeacc 1003 amd_iommu_free_device(dev_state->pdev);
ed96f228
JR
1004 }
1005
028eeacc
JR
1006 destroy_workqueue(iommu_wq);
1007
8736b2c3 1008 free_page((unsigned long)empty_page_table);
e3c495c7
JR
1009}
1010
1011module_init(amd_iommu_v2_init);
1012module_exit(amd_iommu_v2_exit);
This page took 0.213892 seconds and 5 git commands to generate.