2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
24 * Authors: Dave Airlie
29 #include <drm/amdgpu_drm.h>
31 #include "amdgpu_trace.h"
35 * GPUVM is similar to the legacy gart on older asics, however
36 * rather than there being a single global gart table
37 * for the entire GPU, there are multiple VM page tables active
38 * at any given time. The VM page tables can contain a mix
39 * vram pages and system memory pages and system memory pages
40 * can be mapped as snooped (cached system pages) or unsnooped
41 * (uncached system pages).
42 * Each VM has an ID associated with it and there is a page table
43 * associated with each VMID. When execting a command buffer,
44 * the kernel tells the the ring what VMID to use for that command
45 * buffer. VMIDs are allocated dynamically as commands are submitted.
46 * The userspace drivers maintain their own address space and the kernel
47 * sets up their pages tables accordingly when they submit their
48 * command buffers and a VMID is assigned.
49 * Cayman/Trinity support up to 8 active VMs at any given time;
54 * amdgpu_vm_num_pde - return the number of page directory entries
56 * @adev: amdgpu_device pointer
58 * Calculate the number of page directory entries.
60 static unsigned amdgpu_vm_num_pdes(struct amdgpu_device
*adev
)
62 return adev
->vm_manager
.max_pfn
>> amdgpu_vm_block_size
;
66 * amdgpu_vm_directory_size - returns the size of the page directory in bytes
68 * @adev: amdgpu_device pointer
70 * Calculate the size of the page directory in bytes.
72 static unsigned amdgpu_vm_directory_size(struct amdgpu_device
*adev
)
74 return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_num_pdes(adev
) * 8);
78 * amdgpu_vm_get_pd_bo - add the VM PD to a validation list
80 * @vm: vm providing the BOs
81 * @validated: head of validation list
82 * @entry: entry to add
84 * Add the page directory to the list of BOs to
85 * validate for command submission.
87 void amdgpu_vm_get_pd_bo(struct amdgpu_vm
*vm
,
88 struct list_head
*validated
,
89 struct amdgpu_bo_list_entry
*entry
)
91 entry
->robj
= vm
->page_directory
;
93 entry
->tv
.bo
= &vm
->page_directory
->tbo
;
94 entry
->tv
.shared
= true;
95 list_add(&entry
->tv
.head
, validated
);
99 * amdgpu_vm_get_bos - add the vm BOs to a duplicates list
101 * @vm: vm providing the BOs
102 * @duplicates: head of duplicates list
104 * Add the page directory to the BO duplicates list
105 * for command submission.
107 void amdgpu_vm_get_pt_bos(struct amdgpu_vm
*vm
, struct list_head
*duplicates
)
111 /* add the vm page table to the list */
112 for (i
= 0; i
<= vm
->max_pde_used
; ++i
) {
113 struct amdgpu_bo_list_entry
*entry
= &vm
->page_tables
[i
].entry
;
118 list_add(&entry
->tv
.head
, duplicates
);
124 * amdgpu_vm_move_pt_bos_in_lru - move the PT BOs to the LRU tail
126 * @adev: amdgpu device instance
127 * @vm: vm providing the BOs
129 * Move the PT BOs to the tail of the LRU.
131 void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device
*adev
,
132 struct amdgpu_vm
*vm
)
134 struct ttm_bo_global
*glob
= adev
->mman
.bdev
.glob
;
137 spin_lock(&glob
->lru_lock
);
138 for (i
= 0; i
<= vm
->max_pde_used
; ++i
) {
139 struct amdgpu_bo_list_entry
*entry
= &vm
->page_tables
[i
].entry
;
144 ttm_bo_move_to_lru_tail(&entry
->robj
->tbo
);
146 spin_unlock(&glob
->lru_lock
);
150 * amdgpu_vm_grab_id - allocate the next free VMID
152 * @vm: vm to allocate id for
153 * @ring: ring we want to submit job to
154 * @sync: sync object where we add dependencies
155 * @fence: fence protecting ID from reuse
157 * Allocate an id for the vm, adding fences to the sync obj as necessary.
159 int amdgpu_vm_grab_id(struct amdgpu_vm
*vm
, struct amdgpu_ring
*ring
,
160 struct amdgpu_sync
*sync
, struct fence
*fence
)
162 struct amdgpu_vm_id
*vm_id
= &vm
->ids
[ring
->idx
];
163 struct amdgpu_device
*adev
= ring
->adev
;
164 struct amdgpu_vm_manager_id
*id
;
167 mutex_lock(&adev
->vm_manager
.lock
);
169 /* check if the id is still valid */
173 id
= &adev
->vm_manager
.ids
[vm_id
->id
];
174 owner
= atomic_long_read(&id
->owner
);
175 if (owner
== (long)vm
) {
176 list_move_tail(&id
->list
, &adev
->vm_manager
.ids_lru
);
177 trace_amdgpu_vm_grab_id(vm
, vm_id
->id
, ring
->idx
);
179 fence_put(id
->active
);
180 id
->active
= fence_get(fence
);
182 mutex_unlock(&adev
->vm_manager
.lock
);
187 /* we definately need to flush */
188 vm_id
->pd_gpu_addr
= ~0ll;
190 id
= list_first_entry(&adev
->vm_manager
.ids_lru
,
191 struct amdgpu_vm_manager_id
,
193 list_move_tail(&id
->list
, &adev
->vm_manager
.ids_lru
);
194 atomic_long_set(&id
->owner
, (long)vm
);
196 vm_id
->id
= id
- adev
->vm_manager
.ids
;
197 trace_amdgpu_vm_grab_id(vm
, vm_id
->id
, ring
->idx
);
199 r
= amdgpu_sync_fence(ring
->adev
, sync
, id
->active
);
202 fence_put(id
->active
);
203 id
->active
= fence_get(fence
);
206 mutex_unlock(&adev
->vm_manager
.lock
);
211 * amdgpu_vm_flush - hardware flush the vm
213 * @ring: ring to use for flush
214 * @vm: vm we want to flush
215 * @updates: last vm update that we waited for
219 void amdgpu_vm_flush(struct amdgpu_ring
*ring
,
220 struct amdgpu_vm
*vm
,
221 struct fence
*updates
)
223 uint64_t pd_addr
= amdgpu_bo_gpu_offset(vm
->page_directory
);
224 struct amdgpu_vm_id
*vm_id
= &vm
->ids
[ring
->idx
];
225 struct fence
*flushed_updates
= vm_id
->flushed_updates
;
228 if (!flushed_updates
)
233 is_later
= fence_is_later(updates
, flushed_updates
);
235 if (pd_addr
!= vm_id
->pd_gpu_addr
|| is_later
) {
236 trace_amdgpu_vm_flush(pd_addr
, ring
->idx
, vm_id
->id
);
238 vm_id
->flushed_updates
= fence_get(updates
);
239 fence_put(flushed_updates
);
241 vm_id
->pd_gpu_addr
= pd_addr
;
242 amdgpu_ring_emit_vm_flush(ring
, vm_id
->id
, vm_id
->pd_gpu_addr
);
247 * amdgpu_vm_bo_find - find the bo_va for a specific vm & bo
250 * @bo: requested buffer object
252 * Find @bo inside the requested vm.
253 * Search inside the @bos vm list for the requested vm
254 * Returns the found bo_va or NULL if none is found
256 * Object has to be reserved!
258 struct amdgpu_bo_va
*amdgpu_vm_bo_find(struct amdgpu_vm
*vm
,
259 struct amdgpu_bo
*bo
)
261 struct amdgpu_bo_va
*bo_va
;
263 list_for_each_entry(bo_va
, &bo
->va
, bo_list
) {
264 if (bo_va
->vm
== vm
) {
272 * amdgpu_vm_update_pages - helper to call the right asic function
274 * @adev: amdgpu_device pointer
275 * @gtt: GART instance to use for mapping
276 * @gtt_flags: GTT hw access flags
277 * @ib: indirect buffer to fill with commands
278 * @pe: addr of the page entry
279 * @addr: dst addr to write into pe
280 * @count: number of page entries to update
281 * @incr: increase next addr by incr bytes
282 * @flags: hw access flags
284 * Traces the parameters and calls the right asic functions
285 * to setup the page table using the DMA.
287 static void amdgpu_vm_update_pages(struct amdgpu_device
*adev
,
288 struct amdgpu_gart
*gtt
,
290 struct amdgpu_ib
*ib
,
291 uint64_t pe
, uint64_t addr
,
292 unsigned count
, uint32_t incr
,
295 trace_amdgpu_vm_set_page(pe
, addr
, count
, incr
, flags
);
297 if ((gtt
== &adev
->gart
) && (flags
== gtt_flags
)) {
298 uint64_t src
= gtt
->table_addr
+ (addr
>> 12) * 8;
299 amdgpu_vm_copy_pte(adev
, ib
, pe
, src
, count
);
302 dma_addr_t
*pages_addr
= gtt
->pages_addr
;
303 amdgpu_vm_write_pte(adev
, ib
, pages_addr
, pe
, addr
,
306 } else if (count
< 3) {
307 amdgpu_vm_write_pte(adev
, ib
, NULL
, pe
, addr
,
311 amdgpu_vm_set_pte_pde(adev
, ib
, pe
, addr
,
317 * amdgpu_vm_clear_bo - initially clear the page dir/table
319 * @adev: amdgpu_device pointer
322 * need to reserve bo first before calling it.
324 static int amdgpu_vm_clear_bo(struct amdgpu_device
*adev
,
325 struct amdgpu_vm
*vm
,
326 struct amdgpu_bo
*bo
)
328 struct amdgpu_ring
*ring
;
329 struct fence
*fence
= NULL
;
330 struct amdgpu_job
*job
;
335 ring
= container_of(vm
->entity
.sched
, struct amdgpu_ring
, sched
);
337 r
= reservation_object_reserve_shared(bo
->tbo
.resv
);
341 r
= ttm_bo_validate(&bo
->tbo
, &bo
->placement
, true, false);
345 addr
= amdgpu_bo_gpu_offset(bo
);
346 entries
= amdgpu_bo_size(bo
) / 8;
348 r
= amdgpu_job_alloc_with_ib(adev
, 64, &job
);
352 amdgpu_vm_update_pages(adev
, NULL
, 0, &job
->ibs
[0], addr
, 0, entries
,
354 amdgpu_ring_pad_ib(ring
, &job
->ibs
[0]);
356 WARN_ON(job
->ibs
[0].length_dw
> 64);
357 r
= amdgpu_job_submit(job
, ring
, &vm
->entity
,
358 AMDGPU_FENCE_OWNER_VM
, &fence
);
362 amdgpu_bo_fence(bo
, fence
, true);
367 amdgpu_job_free(job
);
374 * amdgpu_vm_map_gart - Resolve gart mapping of addr
376 * @pages_addr: optional DMA address to use for lookup
377 * @addr: the unmapped addr
379 * Look up the physical address of the page that the pte resolves
380 * to and return the pointer for the page table entry.
382 uint64_t amdgpu_vm_map_gart(const dma_addr_t
*pages_addr
, uint64_t addr
)
387 /* page table offset */
388 result
= pages_addr
[addr
>> PAGE_SHIFT
];
390 /* in case cpu page size != gpu page size*/
391 result
|= addr
& (~PAGE_MASK
);
394 /* No mapping required */
398 result
&= 0xFFFFFFFFFFFFF000ULL
;
404 * amdgpu_vm_update_pdes - make sure that page directory is valid
406 * @adev: amdgpu_device pointer
408 * @start: start of GPU address range
409 * @end: end of GPU address range
411 * Allocates new page tables if necessary
412 * and updates the page directory.
413 * Returns 0 for success, error for failure.
415 int amdgpu_vm_update_page_directory(struct amdgpu_device
*adev
,
416 struct amdgpu_vm
*vm
)
418 struct amdgpu_ring
*ring
;
419 struct amdgpu_bo
*pd
= vm
->page_directory
;
420 uint64_t pd_addr
= amdgpu_bo_gpu_offset(pd
);
421 uint32_t incr
= AMDGPU_VM_PTE_COUNT
* 8;
422 uint64_t last_pde
= ~0, last_pt
= ~0;
423 unsigned count
= 0, pt_idx
, ndw
;
424 struct amdgpu_job
*job
;
425 struct amdgpu_ib
*ib
;
426 struct fence
*fence
= NULL
;
430 ring
= container_of(vm
->entity
.sched
, struct amdgpu_ring
, sched
);
435 /* assume the worst case */
436 ndw
+= vm
->max_pde_used
* 6;
438 r
= amdgpu_job_alloc_with_ib(adev
, ndw
* 4, &job
);
444 /* walk over the address space and update the page directory */
445 for (pt_idx
= 0; pt_idx
<= vm
->max_pde_used
; ++pt_idx
) {
446 struct amdgpu_bo
*bo
= vm
->page_tables
[pt_idx
].entry
.robj
;
452 pt
= amdgpu_bo_gpu_offset(bo
);
453 if (vm
->page_tables
[pt_idx
].addr
== pt
)
455 vm
->page_tables
[pt_idx
].addr
= pt
;
457 pde
= pd_addr
+ pt_idx
* 8;
458 if (((last_pde
+ 8 * count
) != pde
) ||
459 ((last_pt
+ incr
* count
) != pt
)) {
462 amdgpu_vm_update_pages(adev
, NULL
, 0, ib
,
477 amdgpu_vm_update_pages(adev
, NULL
, 0, ib
, last_pde
, last_pt
,
478 count
, incr
, AMDGPU_PTE_VALID
);
480 if (ib
->length_dw
!= 0) {
481 amdgpu_ring_pad_ib(ring
, ib
);
482 amdgpu_sync_resv(adev
, &job
->sync
, pd
->tbo
.resv
,
483 AMDGPU_FENCE_OWNER_VM
);
484 WARN_ON(ib
->length_dw
> ndw
);
485 r
= amdgpu_job_submit(job
, ring
, &vm
->entity
,
486 AMDGPU_FENCE_OWNER_VM
, &fence
);
490 amdgpu_bo_fence(pd
, fence
, true);
491 fence_put(vm
->page_directory_fence
);
492 vm
->page_directory_fence
= fence_get(fence
);
496 amdgpu_job_free(job
);
502 amdgpu_job_free(job
);
507 * amdgpu_vm_frag_ptes - add fragment information to PTEs
509 * @adev: amdgpu_device pointer
510 * @gtt: GART instance to use for mapping
511 * @gtt_flags: GTT hw mapping flags
512 * @ib: IB for the update
513 * @pe_start: first PTE to handle
514 * @pe_end: last PTE to handle
515 * @addr: addr those PTEs should point to
516 * @flags: hw mapping flags
518 static void amdgpu_vm_frag_ptes(struct amdgpu_device
*adev
,
519 struct amdgpu_gart
*gtt
,
521 struct amdgpu_ib
*ib
,
522 uint64_t pe_start
, uint64_t pe_end
,
523 uint64_t addr
, uint32_t flags
)
526 * The MC L1 TLB supports variable sized pages, based on a fragment
527 * field in the PTE. When this field is set to a non-zero value, page
528 * granularity is increased from 4KB to (1 << (12 + frag)). The PTE
529 * flags are considered valid for all PTEs within the fragment range
530 * and corresponding mappings are assumed to be physically contiguous.
532 * The L1 TLB can store a single PTE for the whole fragment,
533 * significantly increasing the space available for translation
534 * caching. This leads to large improvements in throughput when the
535 * TLB is under pressure.
537 * The L2 TLB distributes small and large fragments into two
538 * asymmetric partitions. The large fragment cache is significantly
539 * larger. Thus, we try to use large fragments wherever possible.
540 * Userspace can support this by aligning virtual base address and
541 * allocation size to the fragment size.
544 /* SI and newer are optimized for 64KB */
545 uint64_t frag_flags
= AMDGPU_PTE_FRAG_64KB
;
546 uint64_t frag_align
= 0x80;
548 uint64_t frag_start
= ALIGN(pe_start
, frag_align
);
549 uint64_t frag_end
= pe_end
& ~(frag_align
- 1);
553 /* Abort early if there isn't anything to do */
554 if (pe_start
== pe_end
)
557 /* system pages are non continuously */
558 if (gtt
|| !(flags
& AMDGPU_PTE_VALID
) || (frag_start
>= frag_end
)) {
560 count
= (pe_end
- pe_start
) / 8;
561 amdgpu_vm_update_pages(adev
, gtt
, gtt_flags
, ib
, pe_start
,
562 addr
, count
, AMDGPU_GPU_PAGE_SIZE
,
567 /* handle the 4K area at the beginning */
568 if (pe_start
!= frag_start
) {
569 count
= (frag_start
- pe_start
) / 8;
570 amdgpu_vm_update_pages(adev
, NULL
, 0, ib
, pe_start
, addr
,
571 count
, AMDGPU_GPU_PAGE_SIZE
, flags
);
572 addr
+= AMDGPU_GPU_PAGE_SIZE
* count
;
575 /* handle the area in the middle */
576 count
= (frag_end
- frag_start
) / 8;
577 amdgpu_vm_update_pages(adev
, NULL
, 0, ib
, frag_start
, addr
, count
,
578 AMDGPU_GPU_PAGE_SIZE
, flags
| frag_flags
);
580 /* handle the 4K area at the end */
581 if (frag_end
!= pe_end
) {
582 addr
+= AMDGPU_GPU_PAGE_SIZE
* count
;
583 count
= (pe_end
- frag_end
) / 8;
584 amdgpu_vm_update_pages(adev
, NULL
, 0, ib
, frag_end
, addr
,
585 count
, AMDGPU_GPU_PAGE_SIZE
, flags
);
590 * amdgpu_vm_update_ptes - make sure that page tables are valid
592 * @adev: amdgpu_device pointer
593 * @gtt: GART instance to use for mapping
594 * @gtt_flags: GTT hw mapping flags
596 * @start: start of GPU address range
597 * @end: end of GPU address range
598 * @dst: destination address to map to
599 * @flags: mapping flags
601 * Update the page tables in the range @start - @end.
603 static void amdgpu_vm_update_ptes(struct amdgpu_device
*adev
,
604 struct amdgpu_gart
*gtt
,
606 struct amdgpu_vm
*vm
,
607 struct amdgpu_ib
*ib
,
608 uint64_t start
, uint64_t end
,
609 uint64_t dst
, uint32_t flags
)
611 const uint64_t mask
= AMDGPU_VM_PTE_COUNT
- 1;
613 uint64_t last_pe_start
= ~0, last_pe_end
= ~0, last_dst
= ~0;
616 /* walk over the address space and update the page tables */
617 for (addr
= start
; addr
< end
; ) {
618 uint64_t pt_idx
= addr
>> amdgpu_vm_block_size
;
619 struct amdgpu_bo
*pt
= vm
->page_tables
[pt_idx
].entry
.robj
;
623 if ((addr
& ~mask
) == (end
& ~mask
))
626 nptes
= AMDGPU_VM_PTE_COUNT
- (addr
& mask
);
628 pe_start
= amdgpu_bo_gpu_offset(pt
);
629 pe_start
+= (addr
& mask
) * 8;
631 if (last_pe_end
!= pe_start
) {
633 amdgpu_vm_frag_ptes(adev
, gtt
, gtt_flags
, ib
,
634 last_pe_start
, last_pe_end
,
637 last_pe_start
= pe_start
;
638 last_pe_end
= pe_start
+ 8 * nptes
;
641 last_pe_end
+= 8 * nptes
;
645 dst
+= nptes
* AMDGPU_GPU_PAGE_SIZE
;
648 amdgpu_vm_frag_ptes(adev
, gtt
, gtt_flags
, ib
,
649 last_pe_start
, last_pe_end
,
654 * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table
656 * @adev: amdgpu_device pointer
657 * @gtt: GART instance to use for mapping
658 * @gtt_flags: flags as they are used for GTT
660 * @start: start of mapped range
661 * @last: last mapped entry
662 * @flags: flags for the entries
663 * @addr: addr to set the area to
664 * @fence: optional resulting fence
666 * Fill in the page table entries between @start and @last.
667 * Returns 0 for success, -EINVAL for failure.
669 static int amdgpu_vm_bo_update_mapping(struct amdgpu_device
*adev
,
670 struct amdgpu_gart
*gtt
,
672 struct amdgpu_vm
*vm
,
673 uint64_t start
, uint64_t last
,
674 uint32_t flags
, uint64_t addr
,
675 struct fence
**fence
)
677 struct amdgpu_ring
*ring
;
678 void *owner
= AMDGPU_FENCE_OWNER_VM
;
679 unsigned nptes
, ncmds
, ndw
;
680 struct amdgpu_job
*job
;
681 struct amdgpu_ib
*ib
;
682 struct fence
*f
= NULL
;
685 ring
= container_of(vm
->entity
.sched
, struct amdgpu_ring
, sched
);
687 /* sync to everything on unmapping */
688 if (!(flags
& AMDGPU_PTE_VALID
))
689 owner
= AMDGPU_FENCE_OWNER_UNDEFINED
;
691 nptes
= last
- start
+ 1;
694 * reserve space for one command every (1 << BLOCK_SIZE)
695 * entries or 2k dwords (whatever is smaller)
697 ncmds
= (nptes
>> min(amdgpu_vm_block_size
, 11)) + 1;
702 if ((gtt
== &adev
->gart
) && (flags
== gtt_flags
)) {
703 /* only copy commands needed */
707 /* header for write data commands */
710 /* body of write data command */
714 /* set page commands needed */
717 /* two extra commands for begin/end of fragment */
721 r
= amdgpu_job_alloc_with_ib(adev
, ndw
* 4, &job
);
727 r
= amdgpu_sync_resv(adev
, &job
->sync
, vm
->page_directory
->tbo
.resv
,
732 r
= reservation_object_reserve_shared(vm
->page_directory
->tbo
.resv
);
736 amdgpu_vm_update_ptes(adev
, gtt
, gtt_flags
, vm
, ib
, start
, last
+ 1,
739 amdgpu_ring_pad_ib(ring
, ib
);
740 WARN_ON(ib
->length_dw
> ndw
);
741 r
= amdgpu_job_submit(job
, ring
, &vm
->entity
,
742 AMDGPU_FENCE_OWNER_VM
, &f
);
746 amdgpu_bo_fence(vm
->page_directory
, f
, true);
749 *fence
= fence_get(f
);
755 amdgpu_job_free(job
);
760 * amdgpu_vm_bo_split_mapping - split a mapping into smaller chunks
762 * @adev: amdgpu_device pointer
763 * @gtt: GART instance to use for mapping
765 * @mapping: mapped range and flags to use for the update
766 * @addr: addr to set the area to
767 * @gtt_flags: flags as they are used for GTT
768 * @fence: optional resulting fence
770 * Split the mapping into smaller chunks so that each update fits
772 * Returns 0 for success, -EINVAL for failure.
774 static int amdgpu_vm_bo_split_mapping(struct amdgpu_device
*adev
,
775 struct amdgpu_gart
*gtt
,
777 struct amdgpu_vm
*vm
,
778 struct amdgpu_bo_va_mapping
*mapping
,
779 uint64_t addr
, struct fence
**fence
)
781 const uint64_t max_size
= 64ULL * 1024ULL * 1024ULL / AMDGPU_GPU_PAGE_SIZE
;
783 uint64_t start
= mapping
->it
.start
;
784 uint32_t flags
= gtt_flags
;
787 /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here
788 * but in case of something, we filter the flags in first place
790 if (!(mapping
->flags
& AMDGPU_PTE_READABLE
))
791 flags
&= ~AMDGPU_PTE_READABLE
;
792 if (!(mapping
->flags
& AMDGPU_PTE_WRITEABLE
))
793 flags
&= ~AMDGPU_PTE_WRITEABLE
;
795 trace_amdgpu_vm_bo_update(mapping
);
797 addr
+= mapping
->offset
;
799 if (!gtt
|| ((gtt
== &adev
->gart
) && (flags
== gtt_flags
)))
800 return amdgpu_vm_bo_update_mapping(adev
, gtt
, gtt_flags
, vm
,
801 start
, mapping
->it
.last
,
804 while (start
!= mapping
->it
.last
+ 1) {
807 last
= min((uint64_t)mapping
->it
.last
, start
+ max_size
);
808 r
= amdgpu_vm_bo_update_mapping(adev
, gtt
, gtt_flags
, vm
,
809 start
, last
, flags
, addr
,
822 * amdgpu_vm_bo_update - update all BO mappings in the vm page table
824 * @adev: amdgpu_device pointer
825 * @bo_va: requested BO and VM object
828 * Fill in the page table entries for @bo_va.
829 * Returns 0 for success, -EINVAL for failure.
831 * Object have to be reserved and mutex must be locked!
833 int amdgpu_vm_bo_update(struct amdgpu_device
*adev
,
834 struct amdgpu_bo_va
*bo_va
,
835 struct ttm_mem_reg
*mem
)
837 struct amdgpu_vm
*vm
= bo_va
->vm
;
838 struct amdgpu_bo_va_mapping
*mapping
;
839 struct amdgpu_gart
*gtt
= NULL
;
845 addr
= (u64
)mem
->start
<< PAGE_SHIFT
;
846 switch (mem
->mem_type
) {
848 gtt
= &bo_va
->bo
->adev
->gart
;
852 addr
+= adev
->vm_manager
.vram_base_offset
;
862 flags
= amdgpu_ttm_tt_pte_flags(adev
, bo_va
->bo
->tbo
.ttm
, mem
);
864 spin_lock(&vm
->status_lock
);
865 if (!list_empty(&bo_va
->vm_status
))
866 list_splice_init(&bo_va
->valids
, &bo_va
->invalids
);
867 spin_unlock(&vm
->status_lock
);
869 list_for_each_entry(mapping
, &bo_va
->invalids
, list
) {
870 r
= amdgpu_vm_bo_split_mapping(adev
, gtt
, flags
, vm
, mapping
, addr
,
871 &bo_va
->last_pt_update
);
876 if (trace_amdgpu_vm_bo_mapping_enabled()) {
877 list_for_each_entry(mapping
, &bo_va
->valids
, list
)
878 trace_amdgpu_vm_bo_mapping(mapping
);
880 list_for_each_entry(mapping
, &bo_va
->invalids
, list
)
881 trace_amdgpu_vm_bo_mapping(mapping
);
884 spin_lock(&vm
->status_lock
);
885 list_splice_init(&bo_va
->invalids
, &bo_va
->valids
);
886 list_del_init(&bo_va
->vm_status
);
888 list_add(&bo_va
->vm_status
, &vm
->cleared
);
889 spin_unlock(&vm
->status_lock
);
895 * amdgpu_vm_clear_freed - clear freed BOs in the PT
897 * @adev: amdgpu_device pointer
900 * Make sure all freed BOs are cleared in the PT.
901 * Returns 0 for success.
903 * PTs have to be reserved and mutex must be locked!
905 int amdgpu_vm_clear_freed(struct amdgpu_device
*adev
,
906 struct amdgpu_vm
*vm
)
908 struct amdgpu_bo_va_mapping
*mapping
;
911 spin_lock(&vm
->freed_lock
);
912 while (!list_empty(&vm
->freed
)) {
913 mapping
= list_first_entry(&vm
->freed
,
914 struct amdgpu_bo_va_mapping
, list
);
915 list_del(&mapping
->list
);
916 spin_unlock(&vm
->freed_lock
);
917 r
= amdgpu_vm_bo_split_mapping(adev
, NULL
, 0, vm
, mapping
,
923 spin_lock(&vm
->freed_lock
);
925 spin_unlock(&vm
->freed_lock
);
932 * amdgpu_vm_clear_invalids - clear invalidated BOs in the PT
934 * @adev: amdgpu_device pointer
937 * Make sure all invalidated BOs are cleared in the PT.
938 * Returns 0 for success.
940 * PTs have to be reserved and mutex must be locked!
942 int amdgpu_vm_clear_invalids(struct amdgpu_device
*adev
,
943 struct amdgpu_vm
*vm
, struct amdgpu_sync
*sync
)
945 struct amdgpu_bo_va
*bo_va
= NULL
;
948 spin_lock(&vm
->status_lock
);
949 while (!list_empty(&vm
->invalidated
)) {
950 bo_va
= list_first_entry(&vm
->invalidated
,
951 struct amdgpu_bo_va
, vm_status
);
952 spin_unlock(&vm
->status_lock
);
953 mutex_lock(&bo_va
->mutex
);
954 r
= amdgpu_vm_bo_update(adev
, bo_va
, NULL
);
955 mutex_unlock(&bo_va
->mutex
);
959 spin_lock(&vm
->status_lock
);
961 spin_unlock(&vm
->status_lock
);
964 r
= amdgpu_sync_fence(adev
, sync
, bo_va
->last_pt_update
);
970 * amdgpu_vm_bo_add - add a bo to a specific vm
972 * @adev: amdgpu_device pointer
974 * @bo: amdgpu buffer object
976 * Add @bo into the requested vm.
977 * Add @bo to the list of bos associated with the vm
978 * Returns newly added bo_va or NULL for failure
980 * Object has to be reserved!
982 struct amdgpu_bo_va
*amdgpu_vm_bo_add(struct amdgpu_device
*adev
,
983 struct amdgpu_vm
*vm
,
984 struct amdgpu_bo
*bo
)
986 struct amdgpu_bo_va
*bo_va
;
988 bo_va
= kzalloc(sizeof(struct amdgpu_bo_va
), GFP_KERNEL
);
994 bo_va
->ref_count
= 1;
995 INIT_LIST_HEAD(&bo_va
->bo_list
);
996 INIT_LIST_HEAD(&bo_va
->valids
);
997 INIT_LIST_HEAD(&bo_va
->invalids
);
998 INIT_LIST_HEAD(&bo_va
->vm_status
);
999 mutex_init(&bo_va
->mutex
);
1000 list_add_tail(&bo_va
->bo_list
, &bo
->va
);
1006 * amdgpu_vm_bo_map - map bo inside a vm
1008 * @adev: amdgpu_device pointer
1009 * @bo_va: bo_va to store the address
1010 * @saddr: where to map the BO
1011 * @offset: requested offset in the BO
1012 * @flags: attributes of pages (read/write/valid/etc.)
1014 * Add a mapping of the BO at the specefied addr into the VM.
1015 * Returns 0 for success, error for failure.
1017 * Object has to be reserved and unreserved outside!
1019 int amdgpu_vm_bo_map(struct amdgpu_device
*adev
,
1020 struct amdgpu_bo_va
*bo_va
,
1021 uint64_t saddr
, uint64_t offset
,
1022 uint64_t size
, uint32_t flags
)
1024 struct amdgpu_bo_va_mapping
*mapping
;
1025 struct amdgpu_vm
*vm
= bo_va
->vm
;
1026 struct interval_tree_node
*it
;
1027 unsigned last_pfn
, pt_idx
;
1031 /* validate the parameters */
1032 if (saddr
& AMDGPU_GPU_PAGE_MASK
|| offset
& AMDGPU_GPU_PAGE_MASK
||
1033 size
== 0 || size
& AMDGPU_GPU_PAGE_MASK
)
1036 /* make sure object fit at this offset */
1037 eaddr
= saddr
+ size
- 1;
1038 if ((saddr
>= eaddr
) || (offset
+ size
> amdgpu_bo_size(bo_va
->bo
)))
1041 last_pfn
= eaddr
/ AMDGPU_GPU_PAGE_SIZE
;
1042 if (last_pfn
>= adev
->vm_manager
.max_pfn
) {
1043 dev_err(adev
->dev
, "va above limit (0x%08X >= 0x%08X)\n",
1044 last_pfn
, adev
->vm_manager
.max_pfn
);
1048 saddr
/= AMDGPU_GPU_PAGE_SIZE
;
1049 eaddr
/= AMDGPU_GPU_PAGE_SIZE
;
1051 spin_lock(&vm
->it_lock
);
1052 it
= interval_tree_iter_first(&vm
->va
, saddr
, eaddr
);
1053 spin_unlock(&vm
->it_lock
);
1055 struct amdgpu_bo_va_mapping
*tmp
;
1056 tmp
= container_of(it
, struct amdgpu_bo_va_mapping
, it
);
1057 /* bo and tmp overlap, invalid addr */
1058 dev_err(adev
->dev
, "bo %p va 0x%010Lx-0x%010Lx conflict with "
1059 "0x%010lx-0x%010lx\n", bo_va
->bo
, saddr
, eaddr
,
1060 tmp
->it
.start
, tmp
->it
.last
+ 1);
1065 mapping
= kmalloc(sizeof(*mapping
), GFP_KERNEL
);
1071 INIT_LIST_HEAD(&mapping
->list
);
1072 mapping
->it
.start
= saddr
;
1073 mapping
->it
.last
= eaddr
;
1074 mapping
->offset
= offset
;
1075 mapping
->flags
= flags
;
1077 mutex_lock(&bo_va
->mutex
);
1078 list_add(&mapping
->list
, &bo_va
->invalids
);
1079 mutex_unlock(&bo_va
->mutex
);
1080 spin_lock(&vm
->it_lock
);
1081 interval_tree_insert(&mapping
->it
, &vm
->va
);
1082 spin_unlock(&vm
->it_lock
);
1083 trace_amdgpu_vm_bo_map(bo_va
, mapping
);
1085 /* Make sure the page tables are allocated */
1086 saddr
>>= amdgpu_vm_block_size
;
1087 eaddr
>>= amdgpu_vm_block_size
;
1089 BUG_ON(eaddr
>= amdgpu_vm_num_pdes(adev
));
1091 if (eaddr
> vm
->max_pde_used
)
1092 vm
->max_pde_used
= eaddr
;
1094 /* walk over the address space and allocate the page tables */
1095 for (pt_idx
= saddr
; pt_idx
<= eaddr
; ++pt_idx
) {
1096 struct reservation_object
*resv
= vm
->page_directory
->tbo
.resv
;
1097 struct amdgpu_bo_list_entry
*entry
;
1098 struct amdgpu_bo
*pt
;
1100 entry
= &vm
->page_tables
[pt_idx
].entry
;
1104 r
= amdgpu_bo_create(adev
, AMDGPU_VM_PTE_COUNT
* 8,
1105 AMDGPU_GPU_PAGE_SIZE
, true,
1106 AMDGPU_GEM_DOMAIN_VRAM
,
1107 AMDGPU_GEM_CREATE_NO_CPU_ACCESS
,
1112 /* Keep a reference to the page table to avoid freeing
1113 * them up in the wrong order.
1115 pt
->parent
= amdgpu_bo_ref(vm
->page_directory
);
1117 r
= amdgpu_vm_clear_bo(adev
, vm
, pt
);
1119 amdgpu_bo_unref(&pt
);
1124 entry
->priority
= 0;
1125 entry
->tv
.bo
= &entry
->robj
->tbo
;
1126 entry
->tv
.shared
= true;
1127 vm
->page_tables
[pt_idx
].addr
= 0;
1133 list_del(&mapping
->list
);
1134 spin_lock(&vm
->it_lock
);
1135 interval_tree_remove(&mapping
->it
, &vm
->va
);
1136 spin_unlock(&vm
->it_lock
);
1137 trace_amdgpu_vm_bo_unmap(bo_va
, mapping
);
1145 * amdgpu_vm_bo_unmap - remove bo mapping from vm
1147 * @adev: amdgpu_device pointer
1148 * @bo_va: bo_va to remove the address from
1149 * @saddr: where to the BO is mapped
1151 * Remove a mapping of the BO at the specefied addr from the VM.
1152 * Returns 0 for success, error for failure.
1154 * Object has to be reserved and unreserved outside!
1156 int amdgpu_vm_bo_unmap(struct amdgpu_device
*adev
,
1157 struct amdgpu_bo_va
*bo_va
,
1160 struct amdgpu_bo_va_mapping
*mapping
;
1161 struct amdgpu_vm
*vm
= bo_va
->vm
;
1164 saddr
/= AMDGPU_GPU_PAGE_SIZE
;
1165 mutex_lock(&bo_va
->mutex
);
1166 list_for_each_entry(mapping
, &bo_va
->valids
, list
) {
1167 if (mapping
->it
.start
== saddr
)
1171 if (&mapping
->list
== &bo_va
->valids
) {
1174 list_for_each_entry(mapping
, &bo_va
->invalids
, list
) {
1175 if (mapping
->it
.start
== saddr
)
1179 if (&mapping
->list
== &bo_va
->invalids
) {
1180 mutex_unlock(&bo_va
->mutex
);
1184 mutex_unlock(&bo_va
->mutex
);
1185 list_del(&mapping
->list
);
1186 spin_lock(&vm
->it_lock
);
1187 interval_tree_remove(&mapping
->it
, &vm
->va
);
1188 spin_unlock(&vm
->it_lock
);
1189 trace_amdgpu_vm_bo_unmap(bo_va
, mapping
);
1192 spin_lock(&vm
->freed_lock
);
1193 list_add(&mapping
->list
, &vm
->freed
);
1194 spin_unlock(&vm
->freed_lock
);
1203 * amdgpu_vm_bo_rmv - remove a bo to a specific vm
1205 * @adev: amdgpu_device pointer
1206 * @bo_va: requested bo_va
1208 * Remove @bo_va->bo from the requested vm.
1210 * Object have to be reserved!
1212 void amdgpu_vm_bo_rmv(struct amdgpu_device
*adev
,
1213 struct amdgpu_bo_va
*bo_va
)
1215 struct amdgpu_bo_va_mapping
*mapping
, *next
;
1216 struct amdgpu_vm
*vm
= bo_va
->vm
;
1218 list_del(&bo_va
->bo_list
);
1220 spin_lock(&vm
->status_lock
);
1221 list_del(&bo_va
->vm_status
);
1222 spin_unlock(&vm
->status_lock
);
1224 list_for_each_entry_safe(mapping
, next
, &bo_va
->valids
, list
) {
1225 list_del(&mapping
->list
);
1226 spin_lock(&vm
->it_lock
);
1227 interval_tree_remove(&mapping
->it
, &vm
->va
);
1228 spin_unlock(&vm
->it_lock
);
1229 trace_amdgpu_vm_bo_unmap(bo_va
, mapping
);
1230 spin_lock(&vm
->freed_lock
);
1231 list_add(&mapping
->list
, &vm
->freed
);
1232 spin_unlock(&vm
->freed_lock
);
1234 list_for_each_entry_safe(mapping
, next
, &bo_va
->invalids
, list
) {
1235 list_del(&mapping
->list
);
1236 spin_lock(&vm
->it_lock
);
1237 interval_tree_remove(&mapping
->it
, &vm
->va
);
1238 spin_unlock(&vm
->it_lock
);
1241 fence_put(bo_va
->last_pt_update
);
1242 mutex_destroy(&bo_va
->mutex
);
1247 * amdgpu_vm_bo_invalidate - mark the bo as invalid
1249 * @adev: amdgpu_device pointer
1251 * @bo: amdgpu buffer object
1253 * Mark @bo as invalid.
1255 void amdgpu_vm_bo_invalidate(struct amdgpu_device
*adev
,
1256 struct amdgpu_bo
*bo
)
1258 struct amdgpu_bo_va
*bo_va
;
1260 list_for_each_entry(bo_va
, &bo
->va
, bo_list
) {
1261 spin_lock(&bo_va
->vm
->status_lock
);
1262 if (list_empty(&bo_va
->vm_status
))
1263 list_add(&bo_va
->vm_status
, &bo_va
->vm
->invalidated
);
1264 spin_unlock(&bo_va
->vm
->status_lock
);
1269 * amdgpu_vm_init - initialize a vm instance
1271 * @adev: amdgpu_device pointer
1276 int amdgpu_vm_init(struct amdgpu_device
*adev
, struct amdgpu_vm
*vm
)
1278 const unsigned align
= min(AMDGPU_VM_PTB_ALIGN_SIZE
,
1279 AMDGPU_VM_PTE_COUNT
* 8);
1280 unsigned pd_size
, pd_entries
;
1281 unsigned ring_instance
;
1282 struct amdgpu_ring
*ring
;
1283 struct amd_sched_rq
*rq
;
1286 for (i
= 0; i
< AMDGPU_MAX_RINGS
; ++i
) {
1288 vm
->ids
[i
].flushed_updates
= NULL
;
1291 spin_lock_init(&vm
->status_lock
);
1292 INIT_LIST_HEAD(&vm
->invalidated
);
1293 INIT_LIST_HEAD(&vm
->cleared
);
1294 INIT_LIST_HEAD(&vm
->freed
);
1295 spin_lock_init(&vm
->it_lock
);
1296 spin_lock_init(&vm
->freed_lock
);
1297 pd_size
= amdgpu_vm_directory_size(adev
);
1298 pd_entries
= amdgpu_vm_num_pdes(adev
);
1300 /* allocate page table array */
1301 vm
->page_tables
= drm_calloc_large(pd_entries
, sizeof(struct amdgpu_vm_pt
));
1302 if (vm
->page_tables
== NULL
) {
1303 DRM_ERROR("Cannot allocate memory for page table array\n");
1307 /* create scheduler entity for page table updates */
1309 ring_instance
= atomic_inc_return(&adev
->vm_manager
.vm_pte_next_ring
);
1310 ring_instance
%= adev
->vm_manager
.vm_pte_num_rings
;
1311 ring
= adev
->vm_manager
.vm_pte_rings
[ring_instance
];
1312 rq
= &ring
->sched
.sched_rq
[AMD_SCHED_PRIORITY_KERNEL
];
1313 r
= amd_sched_entity_init(&ring
->sched
, &vm
->entity
,
1314 rq
, amdgpu_sched_jobs
);
1318 vm
->page_directory_fence
= NULL
;
1320 r
= amdgpu_bo_create(adev
, pd_size
, align
, true,
1321 AMDGPU_GEM_DOMAIN_VRAM
,
1322 AMDGPU_GEM_CREATE_NO_CPU_ACCESS
,
1323 NULL
, NULL
, &vm
->page_directory
);
1325 goto error_free_sched_entity
;
1327 r
= amdgpu_bo_reserve(vm
->page_directory
, false);
1329 goto error_free_page_directory
;
1331 r
= amdgpu_vm_clear_bo(adev
, vm
, vm
->page_directory
);
1332 amdgpu_bo_unreserve(vm
->page_directory
);
1334 goto error_free_page_directory
;
1338 error_free_page_directory
:
1339 amdgpu_bo_unref(&vm
->page_directory
);
1340 vm
->page_directory
= NULL
;
1342 error_free_sched_entity
:
1343 amd_sched_entity_fini(&ring
->sched
, &vm
->entity
);
1349 * amdgpu_vm_fini - tear down a vm instance
1351 * @adev: amdgpu_device pointer
1355 * Unbind the VM and remove all bos from the vm bo list
1357 void amdgpu_vm_fini(struct amdgpu_device
*adev
, struct amdgpu_vm
*vm
)
1359 struct amdgpu_bo_va_mapping
*mapping
, *tmp
;
1362 amd_sched_entity_fini(vm
->entity
.sched
, &vm
->entity
);
1364 if (!RB_EMPTY_ROOT(&vm
->va
)) {
1365 dev_err(adev
->dev
, "still active bo inside vm\n");
1367 rbtree_postorder_for_each_entry_safe(mapping
, tmp
, &vm
->va
, it
.rb
) {
1368 list_del(&mapping
->list
);
1369 interval_tree_remove(&mapping
->it
, &vm
->va
);
1372 list_for_each_entry_safe(mapping
, tmp
, &vm
->freed
, list
) {
1373 list_del(&mapping
->list
);
1377 for (i
= 0; i
< amdgpu_vm_num_pdes(adev
); i
++)
1378 amdgpu_bo_unref(&vm
->page_tables
[i
].entry
.robj
);
1379 drm_free_large(vm
->page_tables
);
1381 amdgpu_bo_unref(&vm
->page_directory
);
1382 fence_put(vm
->page_directory_fence
);
1383 for (i
= 0; i
< AMDGPU_MAX_RINGS
; ++i
) {
1384 unsigned id
= vm
->ids
[i
].id
;
1386 atomic_long_cmpxchg(&adev
->vm_manager
.ids
[id
].owner
,
1388 fence_put(vm
->ids
[i
].flushed_updates
);
1394 * amdgpu_vm_manager_init - init the VM manager
1396 * @adev: amdgpu_device pointer
1398 * Initialize the VM manager structures
1400 void amdgpu_vm_manager_init(struct amdgpu_device
*adev
)
1404 INIT_LIST_HEAD(&adev
->vm_manager
.ids_lru
);
1406 /* skip over VMID 0, since it is the system VM */
1407 for (i
= 1; i
< adev
->vm_manager
.num_ids
; ++i
)
1408 list_add_tail(&adev
->vm_manager
.ids
[i
].list
,
1409 &adev
->vm_manager
.ids_lru
);
1411 atomic_set(&adev
->vm_manager
.vm_pte_next_ring
, 0);
1415 * amdgpu_vm_manager_fini - cleanup VM manager
1417 * @adev: amdgpu_device pointer
1419 * Cleanup the VM manager and free resources.
1421 void amdgpu_vm_manager_fini(struct amdgpu_device
*adev
)
1425 for (i
= 0; i
< AMDGPU_NUM_VM
; ++i
)
1426 fence_put(adev
->vm_manager
.ids
[i
].active
);