Commit | Line | Data |
---|---|---|
d7e28ffe RR |
1 | #ifndef _LGUEST_H |
2 | #define _LGUEST_H | |
3 | ||
4 | #include <asm/desc.h> | |
5 | ||
6 | #define GDT_ENTRY_LGUEST_CS 10 | |
7 | #define GDT_ENTRY_LGUEST_DS 11 | |
8 | #define LGUEST_CS (GDT_ENTRY_LGUEST_CS * 8) | |
9 | #define LGUEST_DS (GDT_ENTRY_LGUEST_DS * 8) | |
10 | ||
11 | #ifndef __ASSEMBLY__ | |
12 | #include <linux/types.h> | |
13 | #include <linux/init.h> | |
14 | #include <linux/stringify.h> | |
15 | #include <linux/binfmts.h> | |
16 | #include <linux/futex.h> | |
17 | #include <linux/lguest.h> | |
18 | #include <linux/lguest_launcher.h> | |
19 | #include <linux/wait.h> | |
20 | #include <linux/err.h> | |
21 | #include <asm/semaphore.h> | |
22 | #include "irq_vectors.h" | |
23 | ||
24 | #define GUEST_PL 1 | |
25 | ||
26 | struct lguest_regs | |
27 | { | |
28 | /* Manually saved part. */ | |
29 | unsigned long ebx, ecx, edx; | |
30 | unsigned long esi, edi, ebp; | |
31 | unsigned long gs; | |
32 | unsigned long eax; | |
33 | unsigned long fs, ds, es; | |
34 | unsigned long trapnum, errcode; | |
35 | /* Trap pushed part */ | |
36 | unsigned long eip; | |
37 | unsigned long cs; | |
38 | unsigned long eflags; | |
39 | unsigned long esp; | |
40 | unsigned long ss; | |
41 | }; | |
42 | ||
43 | void free_pagetables(void); | |
44 | int init_pagetables(struct page **switcher_page, unsigned int pages); | |
45 | ||
46 | /* Full 4G segment descriptors, suitable for CS and DS. */ | |
47 | #define FULL_EXEC_SEGMENT ((struct desc_struct){0x0000ffff, 0x00cf9b00}) | |
48 | #define FULL_SEGMENT ((struct desc_struct){0x0000ffff, 0x00cf9300}) | |
49 | ||
50 | struct lguest_dma_info | |
51 | { | |
52 | struct list_head list; | |
53 | union futex_key key; | |
54 | unsigned long dmas; | |
55 | u16 next_dma; | |
56 | u16 num_dmas; | |
57 | u16 guestid; | |
58 | u8 interrupt; /* 0 when not registered */ | |
59 | }; | |
60 | ||
bff672e6 RR |
61 | /*H:310 The page-table code owes a great debt of gratitude to Andi Kleen. He |
62 | * reviewed the original code which used "u32" for all page table entries, and | |
63 | * insisted that it would be far clearer with explicit typing. I thought it | |
64 | * was overkill, but he was right: it is much clearer than it was before. | |
65 | * | |
66 | * We have separate types for the Guest's ptes & pgds and the shadow ptes & | |
67 | * pgds. There's already a Linux type for these (pte_t and pgd_t) but they | |
68 | * change depending on kernel config options (PAE). */ | |
69 | ||
70 | /* Each entry is identical: lower 12 bits of flags and upper 20 bits for the | |
71 | * "page frame number" (0 == first physical page, etc). They are different | |
72 | * types so the compiler will warn us if we mix them improperly. */ | |
d7e28ffe RR |
73 | typedef union { |
74 | struct { unsigned flags:12, pfn:20; }; | |
75 | struct { unsigned long val; } raw; | |
76 | } spgd_t; | |
77 | typedef union { | |
78 | struct { unsigned flags:12, pfn:20; }; | |
79 | struct { unsigned long val; } raw; | |
80 | } spte_t; | |
81 | typedef union { | |
82 | struct { unsigned flags:12, pfn:20; }; | |
83 | struct { unsigned long val; } raw; | |
84 | } gpgd_t; | |
85 | typedef union { | |
86 | struct { unsigned flags:12, pfn:20; }; | |
87 | struct { unsigned long val; } raw; | |
88 | } gpte_t; | |
bff672e6 RR |
89 | |
90 | /* We have two convenient macros to convert a "raw" value as handed to us by | |
91 | * the Guest into the correct Guest PGD or PTE type. */ | |
d7e28ffe RR |
92 | #define mkgpte(_val) ((gpte_t){.raw.val = _val}) |
93 | #define mkgpgd(_val) ((gpgd_t){.raw.val = _val}) | |
bff672e6 | 94 | /*:*/ |
d7e28ffe RR |
95 | |
96 | struct pgdir | |
97 | { | |
98 | unsigned long cr3; | |
99 | spgd_t *pgdir; | |
100 | }; | |
101 | ||
102 | /* This is a guest-specific page (mapped ro) into the guest. */ | |
103 | struct lguest_ro_state | |
104 | { | |
105 | /* Host information we need to restore when we switch back. */ | |
106 | u32 host_cr3; | |
107 | struct Xgt_desc_struct host_idt_desc; | |
108 | struct Xgt_desc_struct host_gdt_desc; | |
109 | u32 host_sp; | |
110 | ||
111 | /* Fields which are used when guest is running. */ | |
112 | struct Xgt_desc_struct guest_idt_desc; | |
113 | struct Xgt_desc_struct guest_gdt_desc; | |
114 | struct i386_hw_tss guest_tss; | |
115 | struct desc_struct guest_idt[IDT_ENTRIES]; | |
116 | struct desc_struct guest_gdt[GDT_ENTRIES]; | |
117 | }; | |
118 | ||
119 | /* We have two pages shared with guests, per cpu. */ | |
120 | struct lguest_pages | |
121 | { | |
122 | /* This is the stack page mapped rw in guest */ | |
123 | char spare[PAGE_SIZE - sizeof(struct lguest_regs)]; | |
124 | struct lguest_regs regs; | |
125 | ||
126 | /* This is the host state & guest descriptor page, ro in guest */ | |
127 | struct lguest_ro_state state; | |
128 | } __attribute__((aligned(PAGE_SIZE))); | |
129 | ||
130 | #define CHANGED_IDT 1 | |
131 | #define CHANGED_GDT 2 | |
132 | #define CHANGED_GDT_TLS 4 /* Actually a subset of CHANGED_GDT */ | |
133 | #define CHANGED_ALL 3 | |
134 | ||
135 | /* The private info the thread maintains about the guest. */ | |
136 | struct lguest | |
137 | { | |
138 | /* At end of a page shared mapped over lguest_pages in guest. */ | |
139 | unsigned long regs_page; | |
140 | struct lguest_regs *regs; | |
141 | struct lguest_data __user *lguest_data; | |
142 | struct task_struct *tsk; | |
143 | struct mm_struct *mm; /* == tsk->mm, but that becomes NULL on exit */ | |
144 | u16 guestid; | |
145 | u32 pfn_limit; | |
146 | u32 page_offset; | |
147 | u32 cr2; | |
148 | int halted; | |
149 | int ts; | |
150 | u32 next_hcall; | |
151 | u32 esp1; | |
152 | u8 ss1; | |
153 | ||
154 | /* Do we need to stop what we're doing and return to userspace? */ | |
155 | int break_out; | |
156 | wait_queue_head_t break_wq; | |
157 | ||
158 | /* Bitmap of what has changed: see CHANGED_* above. */ | |
159 | int changed; | |
160 | struct lguest_pages *last_pages; | |
161 | ||
162 | /* We keep a small number of these. */ | |
163 | u32 pgdidx; | |
164 | struct pgdir pgdirs[4]; | |
165 | ||
166 | /* Cached wakeup: we hold a reference to this task. */ | |
167 | struct task_struct *wake; | |
168 | ||
169 | unsigned long noirq_start, noirq_end; | |
170 | int dma_is_pending; | |
171 | unsigned long pending_dma; /* struct lguest_dma */ | |
172 | unsigned long pending_key; /* address they're sending to */ | |
173 | ||
174 | unsigned int stack_pages; | |
175 | u32 tsc_khz; | |
176 | ||
177 | struct lguest_dma_info dma[LGUEST_MAX_DMA]; | |
178 | ||
179 | /* Dead? */ | |
180 | const char *dead; | |
181 | ||
182 | /* The GDT entries copied into lguest_ro_state when running. */ | |
183 | struct desc_struct gdt[GDT_ENTRIES]; | |
184 | ||
185 | /* The IDT entries: some copied into lguest_ro_state when running. */ | |
186 | struct desc_struct idt[FIRST_EXTERNAL_VECTOR+LGUEST_IRQS]; | |
187 | struct desc_struct syscall_idt; | |
188 | ||
189 | /* Virtual clock device */ | |
190 | struct hrtimer hrt; | |
191 | ||
192 | /* Pending virtual interrupts */ | |
193 | DECLARE_BITMAP(irqs_pending, LGUEST_IRQS); | |
194 | }; | |
195 | ||
196 | extern struct lguest lguests[]; | |
197 | extern struct mutex lguest_lock; | |
198 | ||
199 | /* core.c: */ | |
200 | u32 lgread_u32(struct lguest *lg, unsigned long addr); | |
201 | void lgwrite_u32(struct lguest *lg, unsigned long addr, u32 val); | |
202 | void lgread(struct lguest *lg, void *buf, unsigned long addr, unsigned len); | |
203 | void lgwrite(struct lguest *lg, unsigned long, const void *buf, unsigned len); | |
204 | int find_free_guest(void); | |
205 | int lguest_address_ok(const struct lguest *lg, | |
206 | unsigned long addr, unsigned long len); | |
207 | int run_guest(struct lguest *lg, unsigned long __user *user); | |
208 | ||
209 | ||
210 | /* interrupts_and_traps.c: */ | |
211 | void maybe_do_interrupt(struct lguest *lg); | |
212 | int deliver_trap(struct lguest *lg, unsigned int num); | |
213 | void load_guest_idt_entry(struct lguest *lg, unsigned int i, u32 low, u32 hi); | |
214 | void guest_set_stack(struct lguest *lg, u32 seg, u32 esp, unsigned int pages); | |
215 | void pin_stack_pages(struct lguest *lg); | |
216 | void setup_default_idt_entries(struct lguest_ro_state *state, | |
217 | const unsigned long *def); | |
218 | void copy_traps(const struct lguest *lg, struct desc_struct *idt, | |
219 | const unsigned long *def); | |
220 | void guest_set_clockevent(struct lguest *lg, unsigned long delta); | |
221 | void init_clockdev(struct lguest *lg); | |
222 | ||
223 | /* segments.c: */ | |
224 | void setup_default_gdt_entries(struct lguest_ro_state *state); | |
225 | void setup_guest_gdt(struct lguest *lg); | |
226 | void load_guest_gdt(struct lguest *lg, unsigned long table, u32 num); | |
227 | void guest_load_tls(struct lguest *lg, unsigned long tls_array); | |
228 | void copy_gdt(const struct lguest *lg, struct desc_struct *gdt); | |
229 | void copy_gdt_tls(const struct lguest *lg, struct desc_struct *gdt); | |
230 | ||
231 | /* page_tables.c: */ | |
232 | int init_guest_pagetable(struct lguest *lg, unsigned long pgtable); | |
233 | void free_guest_pagetable(struct lguest *lg); | |
234 | void guest_new_pagetable(struct lguest *lg, unsigned long pgtable); | |
235 | void guest_set_pmd(struct lguest *lg, unsigned long cr3, u32 i); | |
236 | void guest_pagetable_clear_all(struct lguest *lg); | |
237 | void guest_pagetable_flush_user(struct lguest *lg); | |
238 | void guest_set_pte(struct lguest *lg, unsigned long cr3, | |
239 | unsigned long vaddr, gpte_t val); | |
240 | void map_switcher_in_guest(struct lguest *lg, struct lguest_pages *pages); | |
241 | int demand_page(struct lguest *info, unsigned long cr2, int errcode); | |
242 | void pin_page(struct lguest *lg, unsigned long vaddr); | |
243 | ||
244 | /* lguest_user.c: */ | |
245 | int lguest_device_init(void); | |
246 | void lguest_device_remove(void); | |
247 | ||
248 | /* io.c: */ | |
249 | void lguest_io_init(void); | |
250 | int bind_dma(struct lguest *lg, | |
251 | unsigned long key, unsigned long udma, u16 numdmas, u8 interrupt); | |
252 | void send_dma(struct lguest *info, unsigned long key, unsigned long udma); | |
253 | void release_all_dma(struct lguest *lg); | |
254 | unsigned long get_dma_buffer(struct lguest *lg, unsigned long key, | |
255 | unsigned long *interrupt); | |
256 | ||
257 | /* hypercalls.c: */ | |
258 | void do_hypercalls(struct lguest *lg); | |
259 | ||
dde79789 RR |
260 | /*L:035 |
261 | * Let's step aside for the moment, to study one important routine that's used | |
262 | * widely in the Host code. | |
263 | * | |
264 | * There are many cases where the Guest does something invalid, like pass crap | |
265 | * to a hypercall. Since only the Guest kernel can make hypercalls, it's quite | |
266 | * acceptable to simply terminate the Guest and give the Launcher a nicely | |
267 | * formatted reason. It's also simpler for the Guest itself, which doesn't | |
268 | * need to check most hypercalls for "success"; if you're still running, it | |
269 | * succeeded. | |
270 | * | |
271 | * Once this is called, the Guest will never run again, so most Host code can | |
272 | * call this then continue as if nothing had happened. This means many | |
273 | * functions don't have to explicitly return an error code, which keeps the | |
274 | * code simple. | |
275 | * | |
276 | * It also means that this can be called more than once: only the first one is | |
277 | * remembered. The only trick is that we still need to kill the Guest even if | |
278 | * we can't allocate memory to store the reason. Linux has a neat way of | |
279 | * packing error codes into invalid pointers, so we use that here. | |
280 | * | |
281 | * Like any macro which uses an "if", it is safely wrapped in a run-once "do { | |
282 | * } while(0)". | |
283 | */ | |
d7e28ffe RR |
284 | #define kill_guest(lg, fmt...) \ |
285 | do { \ | |
286 | if (!(lg)->dead) { \ | |
287 | (lg)->dead = kasprintf(GFP_ATOMIC, fmt); \ | |
288 | if (!(lg)->dead) \ | |
289 | (lg)->dead = ERR_PTR(-ENOMEM); \ | |
290 | } \ | |
291 | } while(0) | |
dde79789 | 292 | /* (End of aside) :*/ |
d7e28ffe RR |
293 | |
294 | static inline unsigned long guest_pa(struct lguest *lg, unsigned long vaddr) | |
295 | { | |
296 | return vaddr - lg->page_offset; | |
297 | } | |
298 | #endif /* __ASSEMBLY__ */ | |
299 | #endif /* _LGUEST_H */ |