2 * Kernel-based Virtual Machine driver for Linux
4 * This module enables machines with Intel VT-x extensions to run virtual
5 * machines without emulation or binary translation.
9 * Copyright (C) 2006 Qumranet, Inc.
10 * Copyright 2010 Red Hat, Inc. and/or its affilates.
13 * Yaniv Kamay <yaniv@qumranet.com>
14 * Avi Kivity <avi@qumranet.com>
16 * This work is licensed under the terms of the GNU GPL, version 2. See
17 * the COPYING file in the top-level directory.
22 * We need the mmu code to access both 32-bit and 64-bit guest ptes,
23 * so the code in this file is compiled twice, once per pte size.
27 #define pt_element_t u64
28 #define guest_walker guest_walker64
29 #define FNAME(name) paging##64_##name
30 #define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK
31 #define PT_LVL_ADDR_MASK(lvl) PT64_LVL_ADDR_MASK(lvl)
32 #define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl)
33 #define PT_INDEX(addr, level) PT64_INDEX(addr, level)
34 #define PT_LEVEL_MASK(level) PT64_LEVEL_MASK(level)
35 #define PT_LEVEL_BITS PT64_LEVEL_BITS
37 #define PT_MAX_FULL_LEVELS 4
38 #define CMPXCHG cmpxchg
40 #define CMPXCHG cmpxchg64
41 #define PT_MAX_FULL_LEVELS 2
44 #define pt_element_t u32
45 #define guest_walker guest_walker32
46 #define FNAME(name) paging##32_##name
47 #define PT_BASE_ADDR_MASK PT32_BASE_ADDR_MASK
48 #define PT_LVL_ADDR_MASK(lvl) PT32_LVL_ADDR_MASK(lvl)
49 #define PT_LVL_OFFSET_MASK(lvl) PT32_LVL_OFFSET_MASK(lvl)
50 #define PT_INDEX(addr, level) PT32_INDEX(addr, level)
51 #define PT_LEVEL_MASK(level) PT32_LEVEL_MASK(level)
52 #define PT_LEVEL_BITS PT32_LEVEL_BITS
53 #define PT_MAX_FULL_LEVELS 2
54 #define CMPXCHG cmpxchg
56 #error Invalid PTTYPE value
59 #define gpte_to_gfn_lvl FNAME(gpte_to_gfn_lvl)
60 #define gpte_to_gfn(pte) gpte_to_gfn_lvl((pte), PT_PAGE_TABLE_LEVEL)
63 * The guest_walker structure emulates the behavior of the hardware page
68 gfn_t table_gfn
[PT_MAX_FULL_LEVELS
];
69 pt_element_t ptes
[PT_MAX_FULL_LEVELS
];
70 pt_element_t prefetch_ptes
[PTE_PREFETCH_NUM
];
71 gpa_t pte_gpa
[PT_MAX_FULL_LEVELS
];
78 static gfn_t
gpte_to_gfn_lvl(pt_element_t gpte
, int lvl
)
80 return (gpte
& PT_LVL_ADDR_MASK(lvl
)) >> PAGE_SHIFT
;
83 static bool FNAME(cmpxchg_gpte
)(struct kvm
*kvm
,
84 gfn_t table_gfn
, unsigned index
,
85 pt_element_t orig_pte
, pt_element_t new_pte
)
91 page
= gfn_to_page(kvm
, table_gfn
);
93 table
= kmap_atomic(page
, KM_USER0
);
94 ret
= CMPXCHG(&table
[index
], orig_pte
, new_pte
);
95 kunmap_atomic(table
, KM_USER0
);
97 kvm_release_page_dirty(page
);
99 return (ret
!= orig_pte
);
102 static unsigned FNAME(gpte_access
)(struct kvm_vcpu
*vcpu
, pt_element_t gpte
)
106 access
= (gpte
& (PT_WRITABLE_MASK
| PT_USER_MASK
)) | ACC_EXEC_MASK
;
109 access
&= ~(gpte
>> PT64_NX_SHIFT
);
115 * Fetch a guest pte for a guest virtual address
117 static int FNAME(walk_addr_generic
)(struct guest_walker
*walker
,
118 struct kvm_vcpu
*vcpu
, struct kvm_mmu
*mmu
,
119 gva_t addr
, int write_fault
,
120 int user_fault
, int fetch_fault
)
124 unsigned index
, pt_access
, uninitialized_var(pte_access
);
126 bool eperm
, present
, rsvd_fault
;
130 trace_kvm_mmu_pagetable_walk(addr
, write_fault
, user_fault
,
134 eperm
= rsvd_fault
= false;
135 walker
->level
= mmu
->root_level
;
136 pte
= mmu
->get_cr3(vcpu
);
139 if (walker
->level
== PT32E_ROOT_LEVEL
) {
140 pte
= kvm_pdptr_read(vcpu
, (addr
>> 30) & 3);
141 trace_kvm_mmu_paging_element(pte
, walker
->level
);
142 if (!is_present_gpte(pte
)) {
149 ASSERT((!is_long_mode(vcpu
) && is_pae(vcpu
)) ||
150 (mmu
->get_cr3(vcpu
) & CR3_NONPAE_RESERVED_BITS
) == 0);
155 index
= PT_INDEX(addr
, walker
->level
);
157 table_gfn
= gpte_to_gfn(pte
);
158 offset
= index
* sizeof(pt_element_t
);
159 pte_gpa
= gfn_to_gpa(table_gfn
) + offset
;
160 walker
->table_gfn
[walker
->level
- 1] = table_gfn
;
161 walker
->pte_gpa
[walker
->level
- 1] = pte_gpa
;
163 if (kvm_read_guest_page_mmu(vcpu
, mmu
, table_gfn
, &pte
,
165 PFERR_USER_MASK
|PFERR_WRITE_MASK
)) {
170 trace_kvm_mmu_paging_element(pte
, walker
->level
);
172 if (!is_present_gpte(pte
)) {
177 if (is_rsvd_bits_set(&vcpu
->arch
.mmu
, pte
, walker
->level
)) {
182 if (write_fault
&& !is_writable_pte(pte
))
183 if (user_fault
|| is_write_protection(vcpu
))
186 if (user_fault
&& !(pte
& PT_USER_MASK
))
190 if (fetch_fault
&& (pte
& PT64_NX_MASK
))
194 if (!eperm
&& !rsvd_fault
&& !(pte
& PT_ACCESSED_MASK
)) {
195 trace_kvm_mmu_set_accessed_bit(table_gfn
, index
,
197 if (FNAME(cmpxchg_gpte
)(vcpu
->kvm
, table_gfn
,
198 index
, pte
, pte
|PT_ACCESSED_MASK
))
200 mark_page_dirty(vcpu
->kvm
, table_gfn
);
201 pte
|= PT_ACCESSED_MASK
;
204 pte_access
= pt_access
& FNAME(gpte_access
)(vcpu
, pte
);
206 walker
->ptes
[walker
->level
- 1] = pte
;
208 if ((walker
->level
== PT_PAGE_TABLE_LEVEL
) ||
209 ((walker
->level
== PT_DIRECTORY_LEVEL
) &&
211 (PTTYPE
== 64 || is_pse(vcpu
))) ||
212 ((walker
->level
== PT_PDPE_LEVEL
) &&
214 mmu
->root_level
== PT64_ROOT_LEVEL
)) {
215 int lvl
= walker
->level
;
219 gfn
= gpte_to_gfn_lvl(pte
, lvl
);
220 gfn
+= (addr
& PT_LVL_OFFSET_MASK(lvl
)) >> PAGE_SHIFT
;
223 walker
->level
== PT_DIRECTORY_LEVEL
&&
225 gfn
+= pse36_gfn_delta(pte
);
227 access
|= write_fault
? PFERR_WRITE_MASK
: 0;
228 access
|= fetch_fault
? PFERR_FETCH_MASK
: 0;
229 access
|= user_fault
? PFERR_USER_MASK
: 0;
231 real_gpa
= mmu
->translate_gpa(vcpu
, gfn_to_gpa(gfn
),
233 if (real_gpa
== UNMAPPED_GVA
)
236 walker
->gfn
= real_gpa
>> PAGE_SHIFT
;
241 pt_access
= pte_access
;
245 if (!present
|| eperm
|| rsvd_fault
)
248 if (write_fault
&& !is_dirty_gpte(pte
)) {
251 trace_kvm_mmu_set_dirty_bit(table_gfn
, index
, sizeof(pte
));
252 ret
= FNAME(cmpxchg_gpte
)(vcpu
->kvm
, table_gfn
, index
, pte
,
256 mark_page_dirty(vcpu
->kvm
, table_gfn
);
257 pte
|= PT_DIRTY_MASK
;
258 walker
->ptes
[walker
->level
- 1] = pte
;
261 walker
->pt_access
= pt_access
;
262 walker
->pte_access
= pte_access
;
263 pgprintk("%s: pte %llx pte_access %x pt_access %x\n",
264 __func__
, (u64
)pte
, pte_access
, pt_access
);
268 walker
->error_code
= 0;
270 walker
->error_code
|= PFERR_PRESENT_MASK
;
272 walker
->error_code
|= PFERR_WRITE_MASK
;
274 walker
->error_code
|= PFERR_USER_MASK
;
275 if (fetch_fault
&& is_nx(vcpu
))
276 walker
->error_code
|= PFERR_FETCH_MASK
;
278 walker
->error_code
|= PFERR_RSVD_MASK
;
280 vcpu
->arch
.fault
.address
= addr
;
281 vcpu
->arch
.fault
.error_code
= walker
->error_code
;
283 trace_kvm_mmu_walker_error(walker
->error_code
);
287 static int FNAME(walk_addr
)(struct guest_walker
*walker
,
288 struct kvm_vcpu
*vcpu
, gva_t addr
,
289 int write_fault
, int user_fault
, int fetch_fault
)
291 return FNAME(walk_addr_generic
)(walker
, vcpu
, &vcpu
->arch
.mmu
, addr
,
292 write_fault
, user_fault
, fetch_fault
);
295 static int FNAME(walk_addr_nested
)(struct guest_walker
*walker
,
296 struct kvm_vcpu
*vcpu
, gva_t addr
,
297 int write_fault
, int user_fault
,
300 return FNAME(walk_addr_generic
)(walker
, vcpu
, &vcpu
->arch
.nested_mmu
,
301 addr
, write_fault
, user_fault
,
305 static void FNAME(update_pte
)(struct kvm_vcpu
*vcpu
, struct kvm_mmu_page
*sp
,
306 u64
*spte
, const void *pte
)
313 gpte
= *(const pt_element_t
*)pte
;
314 if (~gpte
& (PT_PRESENT_MASK
| PT_ACCESSED_MASK
)) {
315 if (!is_present_gpte(gpte
)) {
317 new_spte
= shadow_trap_nonpresent_pte
;
319 new_spte
= shadow_notrap_nonpresent_pte
;
320 __set_spte(spte
, new_spte
);
324 pgprintk("%s: gpte %llx spte %p\n", __func__
, (u64
)gpte
, spte
);
325 pte_access
= sp
->role
.access
& FNAME(gpte_access
)(vcpu
, gpte
);
326 if (gpte_to_gfn(gpte
) != vcpu
->arch
.update_pte
.gfn
)
328 pfn
= vcpu
->arch
.update_pte
.pfn
;
329 if (is_error_pfn(pfn
))
331 if (mmu_notifier_retry(vcpu
, vcpu
->arch
.update_pte
.mmu_seq
))
335 * we call mmu_set_spte() with reset_host_protection = true beacuse that
336 * vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1).
338 mmu_set_spte(vcpu
, spte
, sp
->role
.access
, pte_access
, 0, 0,
339 is_dirty_gpte(gpte
), NULL
, PT_PAGE_TABLE_LEVEL
,
340 gpte_to_gfn(gpte
), pfn
, true, true);
343 static bool FNAME(gpte_changed
)(struct kvm_vcpu
*vcpu
,
344 struct guest_walker
*gw
, int level
)
346 pt_element_t curr_pte
;
347 gpa_t base_gpa
, pte_gpa
= gw
->pte_gpa
[level
- 1];
351 if (level
== PT_PAGE_TABLE_LEVEL
) {
352 mask
= PTE_PREFETCH_NUM
* sizeof(pt_element_t
) - 1;
353 base_gpa
= pte_gpa
& ~mask
;
354 index
= (pte_gpa
- base_gpa
) / sizeof(pt_element_t
);
356 r
= kvm_read_guest_atomic(vcpu
->kvm
, base_gpa
,
357 gw
->prefetch_ptes
, sizeof(gw
->prefetch_ptes
));
358 curr_pte
= gw
->prefetch_ptes
[index
];
360 r
= kvm_read_guest_atomic(vcpu
->kvm
, pte_gpa
,
361 &curr_pte
, sizeof(curr_pte
));
363 return r
|| curr_pte
!= gw
->ptes
[level
- 1];
366 static void FNAME(pte_prefetch
)(struct kvm_vcpu
*vcpu
, struct guest_walker
*gw
,
369 struct kvm_mmu_page
*sp
;
370 struct kvm_mmu
*mmu
= &vcpu
->arch
.mmu
;
371 pt_element_t
*gptep
= gw
->prefetch_ptes
;
375 sp
= page_header(__pa(sptep
));
377 if (sp
->role
.level
> PT_PAGE_TABLE_LEVEL
)
381 return __direct_pte_prefetch(vcpu
, sp
, sptep
);
383 i
= (sptep
- sp
->spt
) & ~(PTE_PREFETCH_NUM
- 1);
386 for (i
= 0; i
< PTE_PREFETCH_NUM
; i
++, spte
++) {
396 if (*spte
!= shadow_trap_nonpresent_pte
)
401 if (!is_present_gpte(gpte
) ||
402 is_rsvd_bits_set(mmu
, gpte
, PT_PAGE_TABLE_LEVEL
)) {
404 __set_spte(spte
, shadow_notrap_nonpresent_pte
);
408 if (!(gpte
& PT_ACCESSED_MASK
))
411 pte_access
= sp
->role
.access
& FNAME(gpte_access
)(vcpu
, gpte
);
412 gfn
= gpte_to_gfn(gpte
);
413 dirty
= is_dirty_gpte(gpte
);
414 pfn
= pte_prefetch_gfn_to_pfn(vcpu
, gfn
,
415 (pte_access
& ACC_WRITE_MASK
) && dirty
);
416 if (is_error_pfn(pfn
)) {
417 kvm_release_pfn_clean(pfn
);
421 mmu_set_spte(vcpu
, spte
, sp
->role
.access
, pte_access
, 0, 0,
422 dirty
, NULL
, PT_PAGE_TABLE_LEVEL
, gfn
,
428 * Fetch a shadow pte for a specific level in the paging hierarchy.
430 static u64
*FNAME(fetch
)(struct kvm_vcpu
*vcpu
, gva_t addr
,
431 struct guest_walker
*gw
,
432 int user_fault
, int write_fault
, int hlevel
,
433 int *ptwrite
, pfn_t pfn
)
435 unsigned access
= gw
->pt_access
;
436 struct kvm_mmu_page
*sp
= NULL
;
437 bool dirty
= is_dirty_gpte(gw
->ptes
[gw
->level
- 1]);
439 unsigned direct_access
;
440 struct kvm_shadow_walk_iterator it
;
442 if (!is_present_gpte(gw
->ptes
[gw
->level
- 1]))
445 direct_access
= gw
->pt_access
& gw
->pte_access
;
447 direct_access
&= ~ACC_WRITE_MASK
;
449 top_level
= vcpu
->arch
.mmu
.root_level
;
450 if (top_level
== PT32E_ROOT_LEVEL
)
451 top_level
= PT32_ROOT_LEVEL
;
453 * Verify that the top-level gpte is still there. Since the page
454 * is a root page, it is either write protected (and cannot be
455 * changed from now on) or it is invalid (in which case, we don't
456 * really care if it changes underneath us after this point).
458 if (FNAME(gpte_changed
)(vcpu
, gw
, top_level
))
459 goto out_gpte_changed
;
461 for (shadow_walk_init(&it
, vcpu
, addr
);
462 shadow_walk_okay(&it
) && it
.level
> gw
->level
;
463 shadow_walk_next(&it
)) {
466 drop_large_spte(vcpu
, it
.sptep
);
469 if (!is_shadow_present_pte(*it
.sptep
)) {
470 table_gfn
= gw
->table_gfn
[it
.level
- 2];
471 sp
= kvm_mmu_get_page(vcpu
, table_gfn
, addr
, it
.level
-1,
472 false, access
, it
.sptep
);
476 * Verify that the gpte in the page we've just write
477 * protected is still there.
479 if (FNAME(gpte_changed
)(vcpu
, gw
, it
.level
- 1))
480 goto out_gpte_changed
;
483 link_shadow_page(it
.sptep
, sp
);
487 shadow_walk_okay(&it
) && it
.level
> hlevel
;
488 shadow_walk_next(&it
)) {
491 validate_direct_spte(vcpu
, it
.sptep
, direct_access
);
493 drop_large_spte(vcpu
, it
.sptep
);
495 if (is_shadow_present_pte(*it
.sptep
))
498 direct_gfn
= gw
->gfn
& ~(KVM_PAGES_PER_HPAGE(it
.level
) - 1);
500 sp
= kvm_mmu_get_page(vcpu
, direct_gfn
, addr
, it
.level
-1,
501 true, direct_access
, it
.sptep
);
502 link_shadow_page(it
.sptep
, sp
);
505 mmu_set_spte(vcpu
, it
.sptep
, access
, gw
->pte_access
& access
,
506 user_fault
, write_fault
, dirty
, ptwrite
, it
.level
,
507 gw
->gfn
, pfn
, false, true);
508 FNAME(pte_prefetch
)(vcpu
, gw
, it
.sptep
);
514 kvm_mmu_put_page(sp
, it
.sptep
);
515 kvm_release_pfn_clean(pfn
);
520 * Page fault handler. There are several causes for a page fault:
521 * - there is no shadow pte for the guest pte
522 * - write access through a shadow pte marked read only so that we can set
524 * - write access to a shadow pte marked read only so we can update the page
525 * dirty bitmap, when userspace requests it
526 * - mmio access; in this case we will never install a present shadow pte
527 * - normal guest page fault due to the guest pte marked not present, not
528 * writable, or not executable
530 * Returns: 1 if we need to emulate the instruction, 0 otherwise, or
531 * a negative value on error.
533 static int FNAME(page_fault
)(struct kvm_vcpu
*vcpu
, gva_t addr
,
536 int write_fault
= error_code
& PFERR_WRITE_MASK
;
537 int user_fault
= error_code
& PFERR_USER_MASK
;
538 int fetch_fault
= error_code
& PFERR_FETCH_MASK
;
539 struct guest_walker walker
;
544 int level
= PT_PAGE_TABLE_LEVEL
;
545 unsigned long mmu_seq
;
547 pgprintk("%s: addr %lx err %x\n", __func__
, addr
, error_code
);
549 r
= mmu_topup_memory_caches(vcpu
);
554 * Look up the guest pte for the faulting address.
556 r
= FNAME(walk_addr
)(&walker
, vcpu
, addr
, write_fault
, user_fault
,
560 * The page is not mapped by the guest. Let the guest handle it.
563 pgprintk("%s: guest page fault\n", __func__
);
564 inject_page_fault(vcpu
);
565 vcpu
->arch
.last_pt_write_count
= 0; /* reset fork detector */
569 if (walker
.level
>= PT_DIRECTORY_LEVEL
) {
570 level
= min(walker
.level
, mapping_level(vcpu
, walker
.gfn
));
571 walker
.gfn
= walker
.gfn
& ~(KVM_PAGES_PER_HPAGE(level
) - 1);
574 mmu_seq
= vcpu
->kvm
->mmu_notifier_seq
;
576 pfn
= gfn_to_pfn(vcpu
->kvm
, walker
.gfn
);
579 if (is_error_pfn(pfn
))
580 return kvm_handle_bad_page(vcpu
->kvm
, walker
.gfn
, pfn
);
582 spin_lock(&vcpu
->kvm
->mmu_lock
);
583 if (mmu_notifier_retry(vcpu
, mmu_seq
))
586 trace_kvm_mmu_audit(vcpu
, AUDIT_PRE_PAGE_FAULT
);
587 kvm_mmu_free_some_pages(vcpu
);
588 sptep
= FNAME(fetch
)(vcpu
, addr
, &walker
, user_fault
, write_fault
,
589 level
, &write_pt
, pfn
);
591 pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__
,
592 sptep
, *sptep
, write_pt
);
595 vcpu
->arch
.last_pt_write_count
= 0; /* reset fork detector */
597 ++vcpu
->stat
.pf_fixed
;
598 trace_kvm_mmu_audit(vcpu
, AUDIT_POST_PAGE_FAULT
);
599 spin_unlock(&vcpu
->kvm
->mmu_lock
);
604 spin_unlock(&vcpu
->kvm
->mmu_lock
);
605 kvm_release_pfn_clean(pfn
);
609 static void FNAME(invlpg
)(struct kvm_vcpu
*vcpu
, gva_t gva
)
611 struct kvm_shadow_walk_iterator iterator
;
612 struct kvm_mmu_page
*sp
;
618 spin_lock(&vcpu
->kvm
->mmu_lock
);
620 for_each_shadow_entry(vcpu
, gva
, iterator
) {
621 level
= iterator
.level
;
622 sptep
= iterator
.sptep
;
624 sp
= page_header(__pa(sptep
));
625 if (is_last_spte(*sptep
, level
)) {
632 (PT_LEVEL_BITS
- PT64_LEVEL_BITS
) * level
;
633 offset
= sp
->role
.quadrant
<< shift
;
635 pte_gpa
= (sp
->gfn
<< PAGE_SHIFT
) + offset
;
636 pte_gpa
+= (sptep
- sp
->spt
) * sizeof(pt_element_t
);
638 if (is_shadow_present_pte(*sptep
)) {
639 if (is_large_pte(*sptep
))
640 --vcpu
->kvm
->stat
.lpages
;
641 drop_spte(vcpu
->kvm
, sptep
,
642 shadow_trap_nonpresent_pte
);
645 __set_spte(sptep
, shadow_trap_nonpresent_pte
);
649 if (!is_shadow_present_pte(*sptep
) || !sp
->unsync_children
)
654 kvm_flush_remote_tlbs(vcpu
->kvm
);
656 atomic_inc(&vcpu
->kvm
->arch
.invlpg_counter
);
658 spin_unlock(&vcpu
->kvm
->mmu_lock
);
663 if (mmu_topup_memory_caches(vcpu
))
665 kvm_mmu_pte_write(vcpu
, pte_gpa
, NULL
, sizeof(pt_element_t
), 0);
668 static gpa_t
FNAME(gva_to_gpa
)(struct kvm_vcpu
*vcpu
, gva_t vaddr
, u32 access
,
671 struct guest_walker walker
;
672 gpa_t gpa
= UNMAPPED_GVA
;
675 r
= FNAME(walk_addr
)(&walker
, vcpu
, vaddr
,
676 !!(access
& PFERR_WRITE_MASK
),
677 !!(access
& PFERR_USER_MASK
),
678 !!(access
& PFERR_FETCH_MASK
));
681 gpa
= gfn_to_gpa(walker
.gfn
);
682 gpa
|= vaddr
& ~PAGE_MASK
;
684 *error
= walker
.error_code
;
689 static gpa_t
FNAME(gva_to_gpa_nested
)(struct kvm_vcpu
*vcpu
, gva_t vaddr
,
690 u32 access
, u32
*error
)
692 struct guest_walker walker
;
693 gpa_t gpa
= UNMAPPED_GVA
;
696 r
= FNAME(walk_addr_nested
)(&walker
, vcpu
, vaddr
,
697 access
& PFERR_WRITE_MASK
,
698 access
& PFERR_USER_MASK
,
699 access
& PFERR_FETCH_MASK
);
702 gpa
= gfn_to_gpa(walker
.gfn
);
703 gpa
|= vaddr
& ~PAGE_MASK
;
705 *error
= walker
.error_code
;
710 static void FNAME(prefetch_page
)(struct kvm_vcpu
*vcpu
,
711 struct kvm_mmu_page
*sp
)
714 pt_element_t pt
[256 / sizeof(pt_element_t
)];
718 || (PTTYPE
== 32 && sp
->role
.level
> PT_PAGE_TABLE_LEVEL
)) {
719 nonpaging_prefetch_page(vcpu
, sp
);
723 pte_gpa
= gfn_to_gpa(sp
->gfn
);
725 offset
= sp
->role
.quadrant
<< PT64_LEVEL_BITS
;
726 pte_gpa
+= offset
* sizeof(pt_element_t
);
729 for (i
= 0; i
< PT64_ENT_PER_PAGE
; i
+= ARRAY_SIZE(pt
)) {
730 r
= kvm_read_guest_atomic(vcpu
->kvm
, pte_gpa
, pt
, sizeof pt
);
731 pte_gpa
+= ARRAY_SIZE(pt
) * sizeof(pt_element_t
);
732 for (j
= 0; j
< ARRAY_SIZE(pt
); ++j
)
733 if (r
|| is_present_gpte(pt
[j
]))
734 sp
->spt
[i
+j
] = shadow_trap_nonpresent_pte
;
736 sp
->spt
[i
+j
] = shadow_notrap_nonpresent_pte
;
741 * Using the cached information from sp->gfns is safe because:
742 * - The spte has a reference to the struct page, so the pfn for a given gfn
743 * can't change unless all sptes pointing to it are nuked first.
745 static int FNAME(sync_page
)(struct kvm_vcpu
*vcpu
, struct kvm_mmu_page
*sp
,
748 int i
, offset
, nr_present
;
749 bool reset_host_protection
;
752 offset
= nr_present
= 0;
754 /* direct kvm_mmu_page can not be unsync. */
755 BUG_ON(sp
->role
.direct
);
758 offset
= sp
->role
.quadrant
<< PT64_LEVEL_BITS
;
760 first_pte_gpa
= gfn_to_gpa(sp
->gfn
) + offset
* sizeof(pt_element_t
);
762 for (i
= 0; i
< PT64_ENT_PER_PAGE
; i
++) {
768 if (!is_shadow_present_pte(sp
->spt
[i
]))
771 pte_gpa
= first_pte_gpa
+ i
* sizeof(pt_element_t
);
773 if (kvm_read_guest_atomic(vcpu
->kvm
, pte_gpa
, &gpte
,
774 sizeof(pt_element_t
)))
777 gfn
= gpte_to_gfn(gpte
);
778 if (is_rsvd_bits_set(&vcpu
->arch
.mmu
, gpte
, PT_PAGE_TABLE_LEVEL
)
779 || gfn
!= sp
->gfns
[i
] || !is_present_gpte(gpte
)
780 || !(gpte
& PT_ACCESSED_MASK
)) {
783 if (is_present_gpte(gpte
) || !clear_unsync
)
784 nonpresent
= shadow_trap_nonpresent_pte
;
786 nonpresent
= shadow_notrap_nonpresent_pte
;
787 drop_spte(vcpu
->kvm
, &sp
->spt
[i
], nonpresent
);
792 pte_access
= sp
->role
.access
& FNAME(gpte_access
)(vcpu
, gpte
);
793 if (!(sp
->spt
[i
] & SPTE_HOST_WRITEABLE
)) {
794 pte_access
&= ~ACC_WRITE_MASK
;
795 reset_host_protection
= 0;
797 reset_host_protection
= 1;
799 set_spte(vcpu
, &sp
->spt
[i
], pte_access
, 0, 0,
800 is_dirty_gpte(gpte
), PT_PAGE_TABLE_LEVEL
, gfn
,
801 spte_to_pfn(sp
->spt
[i
]), true, false,
802 reset_host_protection
);
811 #undef PT_BASE_ADDR_MASK
814 #undef PT_LVL_ADDR_MASK
815 #undef PT_LVL_OFFSET_MASK
817 #undef PT_MAX_FULL_LEVELS
819 #undef gpte_to_gfn_lvl