1 /* pci_sun4v.c: SUN4V specific PCI controller support.
3 * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net)
6 #include <linux/kernel.h>
7 #include <linux/types.h>
9 #include <linux/init.h>
10 #include <linux/slab.h>
11 #include <linux/interrupt.h>
12 #include <linux/percpu.h>
13 #include <linux/irq.h>
14 #include <linux/msi.h>
17 #include <asm/iommu.h>
20 #include <asm/pstate.h>
21 #include <asm/oplib.h>
22 #include <asm/hypervisor.h>
26 #include "iommu_common.h"
28 #include "pci_sun4v.h"
30 #define PGLIST_NENTS (PAGE_SIZE / sizeof(u64))
32 struct pci_iommu_batch
{
33 struct pci_dev
*pdev
; /* Device mapping is for. */
34 unsigned long prot
; /* IOMMU page protections */
35 unsigned long entry
; /* Index into IOTSB. */
36 u64
*pglist
; /* List of physical pages */
37 unsigned long npages
; /* Number of pages in list. */
40 static DEFINE_PER_CPU(struct pci_iommu_batch
, pci_iommu_batch
);
42 /* Interrupts must be disabled. */
43 static inline void pci_iommu_batch_start(struct pci_dev
*pdev
, unsigned long prot
, unsigned long entry
)
45 struct pci_iommu_batch
*p
= &__get_cpu_var(pci_iommu_batch
);
53 /* Interrupts must be disabled. */
54 static long pci_iommu_batch_flush(struct pci_iommu_batch
*p
)
56 struct pci_pbm_info
*pbm
= p
->pdev
->dev
.archdata
.host_controller
;
57 unsigned long devhandle
= pbm
->devhandle
;
58 unsigned long prot
= p
->prot
;
59 unsigned long entry
= p
->entry
;
60 u64
*pglist
= p
->pglist
;
61 unsigned long npages
= p
->npages
;
66 num
= pci_sun4v_iommu_map(devhandle
, HV_PCI_TSBID(0, entry
),
67 npages
, prot
, __pa(pglist
));
68 if (unlikely(num
< 0)) {
69 if (printk_ratelimit())
70 printk("pci_iommu_batch_flush: IOMMU map of "
71 "[%08lx:%08lx:%lx:%lx:%lx] failed with "
73 devhandle
, HV_PCI_TSBID(0, entry
),
74 npages
, prot
, __pa(pglist
), num
);
89 /* Interrupts must be disabled. */
90 static inline long pci_iommu_batch_add(u64 phys_page
)
92 struct pci_iommu_batch
*p
= &__get_cpu_var(pci_iommu_batch
);
94 BUG_ON(p
->npages
>= PGLIST_NENTS
);
96 p
->pglist
[p
->npages
++] = phys_page
;
97 if (p
->npages
== PGLIST_NENTS
)
98 return pci_iommu_batch_flush(p
);
103 /* Interrupts must be disabled. */
104 static inline long pci_iommu_batch_end(void)
106 struct pci_iommu_batch
*p
= &__get_cpu_var(pci_iommu_batch
);
108 BUG_ON(p
->npages
>= PGLIST_NENTS
);
110 return pci_iommu_batch_flush(p
);
113 static long pci_arena_alloc(struct pci_iommu_arena
*arena
, unsigned long npages
)
115 unsigned long n
, i
, start
, end
, limit
;
118 limit
= arena
->limit
;
123 n
= find_next_zero_bit(arena
->map
, limit
, start
);
125 if (unlikely(end
>= limit
)) {
126 if (likely(pass
< 1)) {
132 /* Scanned the whole thing, give up. */
137 for (i
= n
; i
< end
; i
++) {
138 if (test_bit(i
, arena
->map
)) {
144 for (i
= n
; i
< end
; i
++)
145 __set_bit(i
, arena
->map
);
152 static void pci_arena_free(struct pci_iommu_arena
*arena
, unsigned long base
, unsigned long npages
)
156 for (i
= base
; i
< (base
+ npages
); i
++)
157 __clear_bit(i
, arena
->map
);
160 static void *pci_4v_alloc_consistent(struct pci_dev
*pdev
, size_t size
, dma_addr_t
*dma_addrp
, gfp_t gfp
)
162 struct pci_iommu
*iommu
;
163 unsigned long flags
, order
, first_page
, npages
, n
;
167 size
= IO_PAGE_ALIGN(size
);
168 order
= get_order(size
);
169 if (unlikely(order
>= MAX_ORDER
))
172 npages
= size
>> IO_PAGE_SHIFT
;
174 first_page
= __get_free_pages(gfp
, order
);
175 if (unlikely(first_page
== 0UL))
178 memset((char *)first_page
, 0, PAGE_SIZE
<< order
);
180 iommu
= pdev
->dev
.archdata
.iommu
;
182 spin_lock_irqsave(&iommu
->lock
, flags
);
183 entry
= pci_arena_alloc(&iommu
->arena
, npages
);
184 spin_unlock_irqrestore(&iommu
->lock
, flags
);
186 if (unlikely(entry
< 0L))
187 goto arena_alloc_fail
;
189 *dma_addrp
= (iommu
->page_table_map_base
+
190 (entry
<< IO_PAGE_SHIFT
));
191 ret
= (void *) first_page
;
192 first_page
= __pa(first_page
);
194 local_irq_save(flags
);
196 pci_iommu_batch_start(pdev
,
197 (HV_PCI_MAP_ATTR_READ
|
198 HV_PCI_MAP_ATTR_WRITE
),
201 for (n
= 0; n
< npages
; n
++) {
202 long err
= pci_iommu_batch_add(first_page
+ (n
* PAGE_SIZE
));
203 if (unlikely(err
< 0L))
207 if (unlikely(pci_iommu_batch_end() < 0L))
210 local_irq_restore(flags
);
215 /* Interrupts are disabled. */
216 spin_lock(&iommu
->lock
);
217 pci_arena_free(&iommu
->arena
, entry
, npages
);
218 spin_unlock_irqrestore(&iommu
->lock
, flags
);
221 free_pages(first_page
, order
);
225 static void pci_4v_free_consistent(struct pci_dev
*pdev
, size_t size
, void *cpu
, dma_addr_t dvma
)
227 struct pci_pbm_info
*pbm
;
228 struct pci_iommu
*iommu
;
229 unsigned long flags
, order
, npages
, entry
;
232 npages
= IO_PAGE_ALIGN(size
) >> IO_PAGE_SHIFT
;
233 iommu
= pdev
->dev
.archdata
.iommu
;
234 pbm
= pdev
->dev
.archdata
.host_controller
;
235 devhandle
= pbm
->devhandle
;
236 entry
= ((dvma
- iommu
->page_table_map_base
) >> IO_PAGE_SHIFT
);
238 spin_lock_irqsave(&iommu
->lock
, flags
);
240 pci_arena_free(&iommu
->arena
, entry
, npages
);
245 num
= pci_sun4v_iommu_demap(devhandle
, HV_PCI_TSBID(0, entry
),
249 } while (npages
!= 0);
251 spin_unlock_irqrestore(&iommu
->lock
, flags
);
253 order
= get_order(size
);
255 free_pages((unsigned long)cpu
, order
);
258 static dma_addr_t
pci_4v_map_single(struct pci_dev
*pdev
, void *ptr
, size_t sz
, int direction
)
260 struct pci_iommu
*iommu
;
261 unsigned long flags
, npages
, oaddr
;
262 unsigned long i
, base_paddr
;
267 iommu
= pdev
->dev
.archdata
.iommu
;
269 if (unlikely(direction
== PCI_DMA_NONE
))
272 oaddr
= (unsigned long)ptr
;
273 npages
= IO_PAGE_ALIGN(oaddr
+ sz
) - (oaddr
& IO_PAGE_MASK
);
274 npages
>>= IO_PAGE_SHIFT
;
276 spin_lock_irqsave(&iommu
->lock
, flags
);
277 entry
= pci_arena_alloc(&iommu
->arena
, npages
);
278 spin_unlock_irqrestore(&iommu
->lock
, flags
);
280 if (unlikely(entry
< 0L))
283 bus_addr
= (iommu
->page_table_map_base
+
284 (entry
<< IO_PAGE_SHIFT
));
285 ret
= bus_addr
| (oaddr
& ~IO_PAGE_MASK
);
286 base_paddr
= __pa(oaddr
& IO_PAGE_MASK
);
287 prot
= HV_PCI_MAP_ATTR_READ
;
288 if (direction
!= PCI_DMA_TODEVICE
)
289 prot
|= HV_PCI_MAP_ATTR_WRITE
;
291 local_irq_save(flags
);
293 pci_iommu_batch_start(pdev
, prot
, entry
);
295 for (i
= 0; i
< npages
; i
++, base_paddr
+= IO_PAGE_SIZE
) {
296 long err
= pci_iommu_batch_add(base_paddr
);
297 if (unlikely(err
< 0L))
300 if (unlikely(pci_iommu_batch_end() < 0L))
303 local_irq_restore(flags
);
308 if (printk_ratelimit())
310 return PCI_DMA_ERROR_CODE
;
313 /* Interrupts are disabled. */
314 spin_lock(&iommu
->lock
);
315 pci_arena_free(&iommu
->arena
, entry
, npages
);
316 spin_unlock_irqrestore(&iommu
->lock
, flags
);
318 return PCI_DMA_ERROR_CODE
;
321 static void pci_4v_unmap_single(struct pci_dev
*pdev
, dma_addr_t bus_addr
, size_t sz
, int direction
)
323 struct pci_pbm_info
*pbm
;
324 struct pci_iommu
*iommu
;
325 unsigned long flags
, npages
;
329 if (unlikely(direction
== PCI_DMA_NONE
)) {
330 if (printk_ratelimit())
335 iommu
= pdev
->dev
.archdata
.iommu
;
336 pbm
= pdev
->dev
.archdata
.host_controller
;
337 devhandle
= pbm
->devhandle
;
339 npages
= IO_PAGE_ALIGN(bus_addr
+ sz
) - (bus_addr
& IO_PAGE_MASK
);
340 npages
>>= IO_PAGE_SHIFT
;
341 bus_addr
&= IO_PAGE_MASK
;
343 spin_lock_irqsave(&iommu
->lock
, flags
);
345 entry
= (bus_addr
- iommu
->page_table_map_base
) >> IO_PAGE_SHIFT
;
346 pci_arena_free(&iommu
->arena
, entry
, npages
);
351 num
= pci_sun4v_iommu_demap(devhandle
, HV_PCI_TSBID(0, entry
),
355 } while (npages
!= 0);
357 spin_unlock_irqrestore(&iommu
->lock
, flags
);
360 #define SG_ENT_PHYS_ADDRESS(SG) \
361 (__pa(page_address((SG)->page)) + (SG)->offset)
363 static inline long fill_sg(long entry
, struct pci_dev
*pdev
,
364 struct scatterlist
*sg
,
365 int nused
, int nelems
, unsigned long prot
)
367 struct scatterlist
*dma_sg
= sg
;
368 struct scatterlist
*sg_end
= sg
+ nelems
;
372 local_irq_save(flags
);
374 pci_iommu_batch_start(pdev
, prot
, entry
);
376 for (i
= 0; i
< nused
; i
++) {
377 unsigned long pteval
= ~0UL;
380 dma_npages
= ((dma_sg
->dma_address
& (IO_PAGE_SIZE
- 1UL)) +
382 ((IO_PAGE_SIZE
- 1UL))) >> IO_PAGE_SHIFT
;
384 unsigned long offset
;
387 /* If we are here, we know we have at least one
388 * more page to map. So walk forward until we
389 * hit a page crossing, and begin creating new
390 * mappings from that spot.
395 tmp
= SG_ENT_PHYS_ADDRESS(sg
);
397 if (((tmp
^ pteval
) >> IO_PAGE_SHIFT
) != 0UL) {
398 pteval
= tmp
& IO_PAGE_MASK
;
399 offset
= tmp
& (IO_PAGE_SIZE
- 1UL);
402 if (((tmp
^ (tmp
+ len
- 1UL)) >> IO_PAGE_SHIFT
) != 0UL) {
403 pteval
= (tmp
+ IO_PAGE_SIZE
) & IO_PAGE_MASK
;
405 len
-= (IO_PAGE_SIZE
- (tmp
& (IO_PAGE_SIZE
- 1UL)));
411 pteval
= (pteval
& IOPTE_PAGE
);
415 err
= pci_iommu_batch_add(pteval
);
416 if (unlikely(err
< 0L))
417 goto iommu_map_failed
;
419 pteval
+= IO_PAGE_SIZE
;
420 len
-= (IO_PAGE_SIZE
- offset
);
425 pteval
= (pteval
& IOPTE_PAGE
) + len
;
428 /* Skip over any tail mappings we've fully mapped,
429 * adjusting pteval along the way. Stop when we
430 * detect a page crossing event.
432 while (sg
< sg_end
&&
433 (pteval
<< (64 - IO_PAGE_SHIFT
)) != 0UL &&
434 (pteval
== SG_ENT_PHYS_ADDRESS(sg
)) &&
436 (SG_ENT_PHYS_ADDRESS(sg
) + sg
->length
- 1UL)) >> IO_PAGE_SHIFT
) == 0UL) {
437 pteval
+= sg
->length
;
440 if ((pteval
<< (64 - IO_PAGE_SHIFT
)) == 0UL)
442 } while (dma_npages
!= 0);
446 if (unlikely(pci_iommu_batch_end() < 0L))
447 goto iommu_map_failed
;
449 local_irq_restore(flags
);
453 local_irq_restore(flags
);
457 static int pci_4v_map_sg(struct pci_dev
*pdev
, struct scatterlist
*sglist
, int nelems
, int direction
)
459 struct pci_iommu
*iommu
;
460 unsigned long flags
, npages
, prot
;
462 struct scatterlist
*sgtmp
;
466 /* Fast path single entry scatterlists. */
468 sglist
->dma_address
=
469 pci_4v_map_single(pdev
,
470 (page_address(sglist
->page
) + sglist
->offset
),
471 sglist
->length
, direction
);
472 if (unlikely(sglist
->dma_address
== PCI_DMA_ERROR_CODE
))
474 sglist
->dma_length
= sglist
->length
;
478 iommu
= pdev
->dev
.archdata
.iommu
;
480 if (unlikely(direction
== PCI_DMA_NONE
))
483 /* Step 1: Prepare scatter list. */
484 npages
= prepare_sg(sglist
, nelems
);
486 /* Step 2: Allocate a cluster and context, if necessary. */
487 spin_lock_irqsave(&iommu
->lock
, flags
);
488 entry
= pci_arena_alloc(&iommu
->arena
, npages
);
489 spin_unlock_irqrestore(&iommu
->lock
, flags
);
491 if (unlikely(entry
< 0L))
494 dma_base
= iommu
->page_table_map_base
+
495 (entry
<< IO_PAGE_SHIFT
);
497 /* Step 3: Normalize DMA addresses. */
501 while (used
&& sgtmp
->dma_length
) {
502 sgtmp
->dma_address
+= dma_base
;
506 used
= nelems
- used
;
508 /* Step 4: Create the mappings. */
509 prot
= HV_PCI_MAP_ATTR_READ
;
510 if (direction
!= PCI_DMA_TODEVICE
)
511 prot
|= HV_PCI_MAP_ATTR_WRITE
;
513 err
= fill_sg(entry
, pdev
, sglist
, used
, nelems
, prot
);
514 if (unlikely(err
< 0L))
515 goto iommu_map_failed
;
520 if (printk_ratelimit())
525 spin_lock_irqsave(&iommu
->lock
, flags
);
526 pci_arena_free(&iommu
->arena
, entry
, npages
);
527 spin_unlock_irqrestore(&iommu
->lock
, flags
);
532 static void pci_4v_unmap_sg(struct pci_dev
*pdev
, struct scatterlist
*sglist
, int nelems
, int direction
)
534 struct pci_pbm_info
*pbm
;
535 struct pci_iommu
*iommu
;
536 unsigned long flags
, i
, npages
;
538 u32 devhandle
, bus_addr
;
540 if (unlikely(direction
== PCI_DMA_NONE
)) {
541 if (printk_ratelimit())
545 iommu
= pdev
->dev
.archdata
.iommu
;
546 pbm
= pdev
->dev
.archdata
.host_controller
;
547 devhandle
= pbm
->devhandle
;
549 bus_addr
= sglist
->dma_address
& IO_PAGE_MASK
;
551 for (i
= 1; i
< nelems
; i
++)
552 if (sglist
[i
].dma_length
== 0)
555 npages
= (IO_PAGE_ALIGN(sglist
[i
].dma_address
+ sglist
[i
].dma_length
) -
556 bus_addr
) >> IO_PAGE_SHIFT
;
558 entry
= ((bus_addr
- iommu
->page_table_map_base
) >> IO_PAGE_SHIFT
);
560 spin_lock_irqsave(&iommu
->lock
, flags
);
562 pci_arena_free(&iommu
->arena
, entry
, npages
);
567 num
= pci_sun4v_iommu_demap(devhandle
, HV_PCI_TSBID(0, entry
),
571 } while (npages
!= 0);
573 spin_unlock_irqrestore(&iommu
->lock
, flags
);
576 static void pci_4v_dma_sync_single_for_cpu(struct pci_dev
*pdev
, dma_addr_t bus_addr
, size_t sz
, int direction
)
578 /* Nothing to do... */
581 static void pci_4v_dma_sync_sg_for_cpu(struct pci_dev
*pdev
, struct scatterlist
*sglist
, int nelems
, int direction
)
583 /* Nothing to do... */
586 struct pci_iommu_ops pci_sun4v_iommu_ops
= {
587 .alloc_consistent
= pci_4v_alloc_consistent
,
588 .free_consistent
= pci_4v_free_consistent
,
589 .map_single
= pci_4v_map_single
,
590 .unmap_single
= pci_4v_unmap_single
,
591 .map_sg
= pci_4v_map_sg
,
592 .unmap_sg
= pci_4v_unmap_sg
,
593 .dma_sync_single_for_cpu
= pci_4v_dma_sync_single_for_cpu
,
594 .dma_sync_sg_for_cpu
= pci_4v_dma_sync_sg_for_cpu
,
597 static inline int pci_sun4v_out_of_range(struct pci_pbm_info
*pbm
, unsigned int bus
, unsigned int device
, unsigned int func
)
599 if (bus
< pbm
->pci_first_busno
||
600 bus
> pbm
->pci_last_busno
)
605 static int pci_sun4v_read_pci_cfg(struct pci_bus
*bus_dev
, unsigned int devfn
,
606 int where
, int size
, u32
*value
)
608 struct pci_pbm_info
*pbm
= bus_dev
->sysdata
;
609 u32 devhandle
= pbm
->devhandle
;
610 unsigned int bus
= bus_dev
->number
;
611 unsigned int device
= PCI_SLOT(devfn
);
612 unsigned int func
= PCI_FUNC(devfn
);
615 if (pci_sun4v_out_of_range(pbm
, bus
, device
, func
)) {
618 ret
= pci_sun4v_config_get(devhandle
,
619 HV_PCI_DEVICE_BUILD(bus
, device
, func
),
622 printk("rcfg: [%x:%x:%x:%d]=[%lx]\n",
623 devhandle
, HV_PCI_DEVICE_BUILD(bus
, device
, func
),
632 *value
= ret
& 0xffff;
635 *value
= ret
& 0xffffffff;
640 return PCIBIOS_SUCCESSFUL
;
643 static int pci_sun4v_write_pci_cfg(struct pci_bus
*bus_dev
, unsigned int devfn
,
644 int where
, int size
, u32 value
)
646 struct pci_pbm_info
*pbm
= bus_dev
->sysdata
;
647 u32 devhandle
= pbm
->devhandle
;
648 unsigned int bus
= bus_dev
->number
;
649 unsigned int device
= PCI_SLOT(devfn
);
650 unsigned int func
= PCI_FUNC(devfn
);
653 if (pci_sun4v_out_of_range(pbm
, bus
, device
, func
)) {
656 ret
= pci_sun4v_config_put(devhandle
,
657 HV_PCI_DEVICE_BUILD(bus
, device
, func
),
660 printk("wcfg: [%x:%x:%x:%d] v[%x] == [%lx]\n",
661 devhandle
, HV_PCI_DEVICE_BUILD(bus
, device
, func
),
662 where
, size
, value
, ret
);
665 return PCIBIOS_SUCCESSFUL
;
668 static struct pci_ops pci_sun4v_ops
= {
669 .read
= pci_sun4v_read_pci_cfg
,
670 .write
= pci_sun4v_write_pci_cfg
,
674 static void pbm_scan_bus(struct pci_controller_info
*p
,
675 struct pci_pbm_info
*pbm
)
677 pbm
->pci_bus
= pci_scan_one_pbm(pbm
);
680 static void pci_sun4v_scan_bus(struct pci_controller_info
*p
)
682 struct property
*prop
;
683 struct device_node
*dp
;
685 if ((dp
= p
->pbm_A
.prom_node
) != NULL
) {
686 prop
= of_find_property(dp
, "66mhz-capable", NULL
);
687 p
->pbm_A
.is_66mhz_capable
= (prop
!= NULL
);
689 pbm_scan_bus(p
, &p
->pbm_A
);
691 if ((dp
= p
->pbm_B
.prom_node
) != NULL
) {
692 prop
= of_find_property(dp
, "66mhz-capable", NULL
);
693 p
->pbm_B
.is_66mhz_capable
= (prop
!= NULL
);
695 pbm_scan_bus(p
, &p
->pbm_B
);
698 /* XXX register error interrupt handlers XXX */
701 static void pci_sun4v_base_address_update(struct pci_dev
*pdev
, int resource
)
703 struct pci_pbm_info
*pbm
= pdev
->dev
.archdata
.host_controller
;
704 struct resource
*res
, *root
;
706 int where
, size
, is_64bit
;
708 res
= &pdev
->resource
[resource
];
710 where
= PCI_BASE_ADDRESS_0
+ (resource
* 4);
711 } else if (resource
== PCI_ROM_RESOURCE
) {
712 where
= pdev
->rom_base_reg
;
714 /* Somebody might have asked allocation of a non-standard resource */
718 /* XXX 64-bit MEM handling is not %100 correct... XXX */
720 if (res
->flags
& IORESOURCE_IO
)
721 root
= &pbm
->io_space
;
723 root
= &pbm
->mem_space
;
724 if ((res
->flags
& PCI_BASE_ADDRESS_MEM_TYPE_MASK
)
725 == PCI_BASE_ADDRESS_MEM_TYPE_64
)
729 size
= res
->end
- res
->start
;
730 pci_read_config_dword(pdev
, where
, ®
);
731 reg
= ((reg
& size
) |
732 (((u32
)(res
->start
- root
->start
)) & ~size
));
733 if (resource
== PCI_ROM_RESOURCE
) {
734 reg
|= PCI_ROM_ADDRESS_ENABLE
;
735 res
->flags
|= IORESOURCE_ROM_ENABLE
;
737 pci_write_config_dword(pdev
, where
, reg
);
739 /* This knows that the upper 32-bits of the address
740 * must be zero. Our PCI common layer enforces this.
743 pci_write_config_dword(pdev
, where
+ 4, 0);
746 static unsigned long probe_existing_entries(struct pci_pbm_info
*pbm
,
747 struct pci_iommu
*iommu
)
749 struct pci_iommu_arena
*arena
= &iommu
->arena
;
750 unsigned long i
, cnt
= 0;
753 devhandle
= pbm
->devhandle
;
754 for (i
= 0; i
< arena
->limit
; i
++) {
755 unsigned long ret
, io_attrs
, ra
;
757 ret
= pci_sun4v_iommu_getmap(devhandle
,
761 if (page_in_phys_avail(ra
)) {
762 pci_sun4v_iommu_demap(devhandle
,
763 HV_PCI_TSBID(0, i
), 1);
766 __set_bit(i
, arena
->map
);
774 static void pci_sun4v_iommu_init(struct pci_pbm_info
*pbm
)
776 struct pci_iommu
*iommu
= pbm
->iommu
;
777 struct property
*prop
;
778 unsigned long num_tsb_entries
, sz
;
779 u32 vdma
[2], dma_mask
, dma_offset
;
782 prop
= of_find_property(pbm
->prom_node
, "virtual-dma", NULL
);
784 u32
*val
= prop
->value
;
789 /* No property, use default values. */
790 vdma
[0] = 0x80000000;
791 vdma
[1] = 0x80000000;
797 dma_mask
|= 0x1fffffff;
802 dma_mask
|= 0x3fffffff;
807 dma_mask
|= 0x7fffffff;
812 prom_printf("PCI-SUN4V: strange virtual-dma size.\n");
816 tsbsize
*= (8 * 1024);
818 num_tsb_entries
= tsbsize
/ sizeof(iopte_t
);
820 dma_offset
= vdma
[0];
822 /* Setup initial software IOMMU state. */
823 spin_lock_init(&iommu
->lock
);
824 iommu
->ctx_lowest_free
= 1;
825 iommu
->page_table_map_base
= dma_offset
;
826 iommu
->dma_addr_mask
= dma_mask
;
828 /* Allocate and initialize the free area map. */
829 sz
= num_tsb_entries
/ 8;
830 sz
= (sz
+ 7UL) & ~7UL;
831 iommu
->arena
.map
= kzalloc(sz
, GFP_KERNEL
);
832 if (!iommu
->arena
.map
) {
833 prom_printf("PCI_IOMMU: Error, kmalloc(arena.map) failed.\n");
836 iommu
->arena
.limit
= num_tsb_entries
;
838 sz
= probe_existing_entries(pbm
, iommu
);
840 printk("%s: Imported %lu TSB entries from OBP\n",
844 static void pci_sun4v_get_bus_range(struct pci_pbm_info
*pbm
)
846 struct property
*prop
;
847 unsigned int *busrange
;
849 prop
= of_find_property(pbm
->prom_node
, "bus-range", NULL
);
851 busrange
= prop
->value
;
853 pbm
->pci_first_busno
= busrange
[0];
854 pbm
->pci_last_busno
= busrange
[1];
858 #ifdef CONFIG_PCI_MSI
859 struct pci_sun4v_msiq_entry
{
861 #define MSIQ_VERSION_MASK 0xffffffff00000000UL
862 #define MSIQ_VERSION_SHIFT 32
863 #define MSIQ_TYPE_MASK 0x00000000000000ffUL
864 #define MSIQ_TYPE_SHIFT 0
865 #define MSIQ_TYPE_NONE 0x00
866 #define MSIQ_TYPE_MSG 0x01
867 #define MSIQ_TYPE_MSI32 0x02
868 #define MSIQ_TYPE_MSI64 0x03
869 #define MSIQ_TYPE_INTX 0x08
870 #define MSIQ_TYPE_NONE2 0xff
875 u64 req_id
; /* bus/device/func */
876 #define MSIQ_REQID_BUS_MASK 0xff00UL
877 #define MSIQ_REQID_BUS_SHIFT 8
878 #define MSIQ_REQID_DEVICE_MASK 0x00f8UL
879 #define MSIQ_REQID_DEVICE_SHIFT 3
880 #define MSIQ_REQID_FUNC_MASK 0x0007UL
881 #define MSIQ_REQID_FUNC_SHIFT 0
885 /* The format of this value is message type dependant.
886 * For MSI bits 15:0 are the data from the MSI packet.
887 * For MSI-X bits 31:0 are the data from the MSI packet.
888 * For MSG, the message code and message routing code where:
889 * bits 39:32 is the bus/device/fn of the msg target-id
890 * bits 18:16 is the message routing code
891 * bits 7:0 is the message code
892 * For INTx the low order 2-bits are:
903 /* For now this just runs as a pre-handler for the real interrupt handler.
904 * So we just walk through the queue and ACK all the entries, update the
905 * head pointer, and return.
907 * In the longer term it would be nice to do something more integrated
908 * wherein we can pass in some of this MSI info to the drivers. This
909 * would be most useful for PCIe fabric error messages, although we could
910 * invoke those directly from the loop here in order to pass the info around.
912 static void pci_sun4v_msi_prehandler(unsigned int ino
, void *data1
, void *data2
)
914 struct pci_pbm_info
*pbm
= data1
;
915 struct pci_sun4v_msiq_entry
*base
, *ep
;
916 unsigned long msiqid
, orig_head
, head
, type
, err
;
918 msiqid
= (unsigned long) data2
;
921 err
= pci_sun4v_msiq_gethead(pbm
->devhandle
, msiqid
, &head
);
925 if (unlikely(head
>= (pbm
->msiq_ent_count
* sizeof(struct pci_sun4v_msiq_entry
))))
928 head
/= sizeof(struct pci_sun4v_msiq_entry
);
930 base
= (pbm
->msi_queues
+ ((msiqid
- pbm
->msiq_first
) *
931 (pbm
->msiq_ent_count
*
932 sizeof(struct pci_sun4v_msiq_entry
))));
934 while ((ep
->version_type
& MSIQ_TYPE_MASK
) != 0) {
935 type
= (ep
->version_type
& MSIQ_TYPE_MASK
) >> MSIQ_TYPE_SHIFT
;
936 if (unlikely(type
!= MSIQ_TYPE_MSI32
&&
937 type
!= MSIQ_TYPE_MSI64
))
940 pci_sun4v_msi_setstate(pbm
->devhandle
,
941 ep
->msi_data
/* msi_num */,
944 /* Clear the entry. */
945 ep
->version_type
&= ~MSIQ_TYPE_MASK
;
947 /* Go to next entry in ring. */
949 if (head
>= pbm
->msiq_ent_count
)
954 if (likely(head
!= orig_head
)) {
955 /* ACK entries by updating head pointer. */
956 head
*= sizeof(struct pci_sun4v_msiq_entry
);
957 err
= pci_sun4v_msiq_sethead(pbm
->devhandle
, msiqid
, head
);
964 printk(KERN_EMERG
"MSI: Hypervisor set head gives error %lu\n", err
);
968 printk(KERN_EMERG
"MSI: Hypervisor get head gives error %lu\n", err
);
971 printk(KERN_EMERG
"MSI: devhandle[%x] msiqid[%lx] head[%lu]\n",
972 pbm
->devhandle
, msiqid
, head
);
976 printk(KERN_EMERG
"MSI: Hypervisor gives bad offset %lx max(%lx)\n",
977 head
, pbm
->msiq_ent_count
* sizeof(struct pci_sun4v_msiq_entry
));
981 printk(KERN_EMERG
"MSI: Entry has bad type %lx\n", type
);
985 static int msi_bitmap_alloc(struct pci_pbm_info
*pbm
)
987 unsigned long size
, bits_per_ulong
;
989 bits_per_ulong
= sizeof(unsigned long) * 8;
990 size
= (pbm
->msi_num
+ (bits_per_ulong
- 1)) & ~(bits_per_ulong
- 1);
992 BUG_ON(size
% sizeof(unsigned long));
994 pbm
->msi_bitmap
= kzalloc(size
, GFP_KERNEL
);
995 if (!pbm
->msi_bitmap
)
1001 static void msi_bitmap_free(struct pci_pbm_info
*pbm
)
1003 kfree(pbm
->msi_bitmap
);
1004 pbm
->msi_bitmap
= NULL
;
1007 static int msi_queue_alloc(struct pci_pbm_info
*pbm
)
1009 unsigned long q_size
, alloc_size
, pages
, order
;
1012 q_size
= pbm
->msiq_ent_count
* sizeof(struct pci_sun4v_msiq_entry
);
1013 alloc_size
= (pbm
->msiq_num
* q_size
);
1014 order
= get_order(alloc_size
);
1015 pages
= __get_free_pages(GFP_KERNEL
| __GFP_COMP
, order
);
1017 printk(KERN_ERR
"MSI: Cannot allocate MSI queues (o=%lu).\n",
1021 memset((char *)pages
, 0, PAGE_SIZE
<< order
);
1022 pbm
->msi_queues
= (void *) pages
;
1024 for (i
= 0; i
< pbm
->msiq_num
; i
++) {
1025 unsigned long err
, base
= __pa(pages
+ (i
* q_size
));
1026 unsigned long ret1
, ret2
;
1028 err
= pci_sun4v_msiq_conf(pbm
->devhandle
,
1029 pbm
->msiq_first
+ i
,
1030 base
, pbm
->msiq_ent_count
);
1032 printk(KERN_ERR
"MSI: msiq register fails (err=%lu)\n",
1037 err
= pci_sun4v_msiq_info(pbm
->devhandle
,
1038 pbm
->msiq_first
+ i
,
1041 printk(KERN_ERR
"MSI: Cannot read msiq (err=%lu)\n",
1045 if (ret1
!= base
|| ret2
!= pbm
->msiq_ent_count
) {
1046 printk(KERN_ERR
"MSI: Bogus qconf "
1047 "expected[%lx:%x] got[%lx:%lx]\n",
1048 base
, pbm
->msiq_ent_count
,
1057 free_pages(pages
, order
);
1061 static void pci_sun4v_msi_init(struct pci_pbm_info
*pbm
)
1066 val
= of_get_property(pbm
->prom_node
, "#msi-eqs", &len
);
1067 if (!val
|| len
!= 4)
1069 pbm
->msiq_num
= *val
;
1070 if (pbm
->msiq_num
) {
1071 const struct msiq_prop
{
1076 const struct msi_range_prop
{
1080 const struct addr_range_prop
{
1089 val
= of_get_property(pbm
->prom_node
, "msi-eq-size", &len
);
1090 if (!val
|| len
!= 4)
1093 pbm
->msiq_ent_count
= *val
;
1095 mqp
= of_get_property(pbm
->prom_node
,
1096 "msi-eq-to-devino", &len
);
1097 if (!mqp
|| len
!= sizeof(struct msiq_prop
))
1100 pbm
->msiq_first
= mqp
->first_msiq
;
1101 pbm
->msiq_first_devino
= mqp
->first_devino
;
1103 val
= of_get_property(pbm
->prom_node
, "#msi", &len
);
1104 if (!val
|| len
!= 4)
1106 pbm
->msi_num
= *val
;
1108 mrng
= of_get_property(pbm
->prom_node
, "msi-ranges", &len
);
1109 if (!mrng
|| len
!= sizeof(struct msi_range_prop
))
1111 pbm
->msi_first
= mrng
->first_msi
;
1113 val
= of_get_property(pbm
->prom_node
, "msi-data-mask", &len
);
1114 if (!val
|| len
!= 4)
1116 pbm
->msi_data_mask
= *val
;
1118 val
= of_get_property(pbm
->prom_node
, "msix-data-width", &len
);
1119 if (!val
|| len
!= 4)
1121 pbm
->msix_data_width
= *val
;
1123 arng
= of_get_property(pbm
->prom_node
, "msi-address-ranges",
1125 if (!arng
|| len
!= sizeof(struct addr_range_prop
))
1127 pbm
->msi32_start
= ((u64
)arng
->msi32_high
<< 32) |
1128 (u64
) arng
->msi32_low
;
1129 pbm
->msi64_start
= ((u64
)arng
->msi64_high
<< 32) |
1130 (u64
) arng
->msi64_low
;
1131 pbm
->msi32_len
= arng
->msi32_len
;
1132 pbm
->msi64_len
= arng
->msi64_len
;
1134 if (msi_bitmap_alloc(pbm
))
1137 if (msi_queue_alloc(pbm
)) {
1138 msi_bitmap_free(pbm
);
1142 printk(KERN_INFO
"%s: MSI Queue first[%u] num[%u] count[%u] "
1145 pbm
->msiq_first
, pbm
->msiq_num
,
1146 pbm
->msiq_ent_count
,
1147 pbm
->msiq_first_devino
);
1148 printk(KERN_INFO
"%s: MSI first[%u] num[%u] mask[0x%x] "
1151 pbm
->msi_first
, pbm
->msi_num
, pbm
->msi_data_mask
,
1152 pbm
->msix_data_width
);
1153 printk(KERN_INFO
"%s: MSI addr32[0x%lx:0x%x] "
1154 "addr64[0x%lx:0x%x]\n",
1156 pbm
->msi32_start
, pbm
->msi32_len
,
1157 pbm
->msi64_start
, pbm
->msi64_len
);
1158 printk(KERN_INFO
"%s: MSI queues at RA [%p]\n",
1167 printk(KERN_INFO
"%s: No MSI support.\n", pbm
->name
);
1170 static int alloc_msi(struct pci_pbm_info
*pbm
)
1174 for (i
= 0; i
< pbm
->msi_num
; i
++) {
1175 if (!test_and_set_bit(i
, pbm
->msi_bitmap
))
1176 return i
+ pbm
->msi_first
;
1182 static void free_msi(struct pci_pbm_info
*pbm
, int msi_num
)
1184 msi_num
-= pbm
->msi_first
;
1185 clear_bit(msi_num
, pbm
->msi_bitmap
);
1188 static int pci_sun4v_setup_msi_irq(unsigned int *virt_irq_p
,
1189 struct pci_dev
*pdev
,
1190 struct msi_desc
*entry
)
1192 struct pci_pbm_info
*pbm
= pdev
->dev
.archdata
.host_controller
;
1193 unsigned long devino
, msiqid
;
1199 msi_num
= alloc_msi(pbm
);
1203 devino
= sun4v_build_msi(pbm
->devhandle
, virt_irq_p
,
1204 pbm
->msiq_first_devino
,
1205 (pbm
->msiq_first_devino
+
1211 set_irq_msi(*virt_irq_p
, entry
);
1213 msiqid
= ((devino
- pbm
->msiq_first_devino
) +
1217 if (pci_sun4v_msiq_setstate(pbm
->devhandle
, msiqid
, HV_MSIQSTATE_IDLE
))
1221 if (pci_sun4v_msiq_setvalid(pbm
->devhandle
, msiqid
, HV_MSIQ_VALID
))
1224 if (pci_sun4v_msi_setmsiq(pbm
->devhandle
,
1226 (entry
->msi_attrib
.is_64
?
1227 HV_MSITYPE_MSI64
: HV_MSITYPE_MSI32
)))
1230 if (pci_sun4v_msi_setstate(pbm
->devhandle
, msi_num
, HV_MSISTATE_IDLE
))
1233 if (pci_sun4v_msi_setvalid(pbm
->devhandle
, msi_num
, HV_MSIVALID_VALID
))
1236 pdev
->dev
.archdata
.msi_num
= msi_num
;
1238 if (entry
->msi_attrib
.is_64
) {
1239 msg
.address_hi
= pbm
->msi64_start
>> 32;
1240 msg
.address_lo
= pbm
->msi64_start
& 0xffffffff;
1243 msg
.address_lo
= pbm
->msi32_start
;
1246 write_msi_msg(*virt_irq_p
, &msg
);
1248 irq_install_pre_handler(*virt_irq_p
,
1249 pci_sun4v_msi_prehandler
,
1250 pbm
, (void *) msiqid
);
1255 free_msi(pbm
, msi_num
);
1256 sun4v_destroy_msi(*virt_irq_p
);
1262 static void pci_sun4v_teardown_msi_irq(unsigned int virt_irq
,
1263 struct pci_dev
*pdev
)
1265 struct pci_pbm_info
*pbm
= pdev
->dev
.archdata
.host_controller
;
1266 unsigned long msiqid
, err
;
1267 unsigned int msi_num
;
1269 msi_num
= pdev
->dev
.archdata
.msi_num
;
1270 err
= pci_sun4v_msi_getmsiq(pbm
->devhandle
, msi_num
, &msiqid
);
1272 printk(KERN_ERR
"%s: getmsiq gives error %lu\n",
1277 pci_sun4v_msi_setvalid(pbm
->devhandle
, msi_num
, HV_MSIVALID_INVALID
);
1278 pci_sun4v_msiq_setvalid(pbm
->devhandle
, msiqid
, HV_MSIQ_INVALID
);
1280 free_msi(pbm
, msi_num
);
1282 /* The sun4v_destroy_msi() will liberate the devino and thus the MSIQ
1285 sun4v_destroy_msi(virt_irq
);
1287 #else /* CONFIG_PCI_MSI */
1288 static void pci_sun4v_msi_init(struct pci_pbm_info
*pbm
)
1291 #endif /* !(CONFIG_PCI_MSI) */
1293 static void pci_sun4v_pbm_init(struct pci_controller_info
*p
, struct device_node
*dp
, u32 devhandle
)
1295 struct pci_pbm_info
*pbm
;
1297 if (devhandle
& 0x40)
1303 pbm
->prom_node
= dp
;
1304 pbm
->pci_first_slot
= 1;
1306 pbm
->devhandle
= devhandle
;
1308 pbm
->name
= dp
->full_name
;
1310 printk("%s: SUN4V PCI Bus Module\n", pbm
->name
);
1312 pci_determine_mem_io_space(pbm
);
1314 pci_sun4v_get_bus_range(pbm
);
1315 pci_sun4v_iommu_init(pbm
);
1316 pci_sun4v_msi_init(pbm
);
1319 void sun4v_pci_init(struct device_node
*dp
, char *model_name
)
1321 struct pci_controller_info
*p
;
1322 struct pci_iommu
*iommu
;
1323 struct property
*prop
;
1324 struct linux_prom64_registers
*regs
;
1328 prop
= of_find_property(dp
, "reg", NULL
);
1331 devhandle
= (regs
->phys_addr
>> 32UL) & 0x0fffffff;
1333 for (p
= pci_controller_root
; p
; p
= p
->next
) {
1334 struct pci_pbm_info
*pbm
;
1336 if (p
->pbm_A
.prom_node
&& p
->pbm_B
.prom_node
)
1339 pbm
= (p
->pbm_A
.prom_node
?
1343 if (pbm
->devhandle
== (devhandle
^ 0x40)) {
1344 pci_sun4v_pbm_init(p
, dp
, devhandle
);
1349 for_each_possible_cpu(i
) {
1350 unsigned long page
= get_zeroed_page(GFP_ATOMIC
);
1353 goto fatal_memory_error
;
1355 per_cpu(pci_iommu_batch
, i
).pglist
= (u64
*) page
;
1358 p
= kzalloc(sizeof(struct pci_controller_info
), GFP_ATOMIC
);
1360 goto fatal_memory_error
;
1362 iommu
= kzalloc(sizeof(struct pci_iommu
), GFP_ATOMIC
);
1364 goto fatal_memory_error
;
1366 p
->pbm_A
.iommu
= iommu
;
1368 iommu
= kzalloc(sizeof(struct pci_iommu
), GFP_ATOMIC
);
1370 goto fatal_memory_error
;
1372 p
->pbm_B
.iommu
= iommu
;
1374 p
->next
= pci_controller_root
;
1375 pci_controller_root
= p
;
1377 p
->index
= pci_num_controllers
++;
1378 p
->pbms_same_domain
= 0;
1380 p
->scan_bus
= pci_sun4v_scan_bus
;
1381 p
->base_address_update
= pci_sun4v_base_address_update
;
1382 #ifdef CONFIG_PCI_MSI
1383 p
->setup_msi_irq
= pci_sun4v_setup_msi_irq
;
1384 p
->teardown_msi_irq
= pci_sun4v_teardown_msi_irq
;
1386 p
->pci_ops
= &pci_sun4v_ops
;
1388 /* Like PSYCHO and SCHIZO we have a 2GB aligned area
1391 pci_memspace_mask
= 0x7fffffffUL
;
1393 pci_sun4v_pbm_init(p
, dp
, devhandle
);
1397 prom_printf("SUN4V_PCI: Fatal memory allocation error.\n");