1 /*
2  *  x86 exception helpers - sysemu code
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "exec/exec-all.h"
23 #include "tcg/helper-tcg.h"
24 
25 int get_pg_mode(CPUX86State *env)
26 {
27     int pg_mode = 0;
28     if (env->cr[0] & CR0_WP_MASK) {
29         pg_mode |= PG_MODE_WP;
30     }
31     if (env->cr[4] & CR4_PAE_MASK) {
32         pg_mode |= PG_MODE_PAE;
33     }
34     if (env->cr[4] & CR4_PSE_MASK) {
35         pg_mode |= PG_MODE_PSE;
36     }
37     if (env->cr[4] & CR4_PKE_MASK) {
38         pg_mode |= PG_MODE_PKE;
39     }
40     if (env->cr[4] & CR4_PKS_MASK) {
41         pg_mode |= PG_MODE_PKS;
42     }
43     if (env->cr[4] & CR4_SMEP_MASK) {
44         pg_mode |= PG_MODE_SMEP;
45     }
46     if (env->cr[4] & CR4_LA57_MASK) {
47         pg_mode |= PG_MODE_LA57;
48     }
49     if (env->hflags & HF_LMA_MASK) {
50         pg_mode |= PG_MODE_LMA;
51     }
52     if (env->efer & MSR_EFER_NXE) {
53         pg_mode |= PG_MODE_NXE;
54     }
55     return pg_mode;
56 }
57 
58 #define PG_ERROR_OK (-1)
59 
60 typedef hwaddr (*MMUTranslateFunc)(CPUState *cs, hwaddr gphys, MMUAccessType access_type,
61 				int *prot);
62 
63 #define GET_HPHYS(cs, gpa, access_type, prot)  \
64 	(get_hphys_func ? get_hphys_func(cs, gpa, access_type, prot) : gpa)
65 
66 static int mmu_translate(CPUState *cs, hwaddr addr, MMUTranslateFunc get_hphys_func,
67                          uint64_t cr3, int is_write1, int mmu_idx, int pg_mode,
68                          hwaddr *xlat, int *page_size, int *prot)
69 {
70     X86CPU *cpu = X86_CPU(cs);
71     CPUX86State *env = &cpu->env;
72     uint64_t ptep, pte;
73     int32_t a20_mask;
74     target_ulong pde_addr, pte_addr;
75     int error_code = 0;
76     int is_dirty, is_write, is_user;
77     uint64_t rsvd_mask = PG_ADDRESS_MASK & ~MAKE_64BIT_MASK(0, cpu->phys_bits);
78     uint32_t page_offset;
79     uint32_t pkr;
80 
81     is_user = (mmu_idx == MMU_USER_IDX);
82     is_write = is_write1 & 1;
83     a20_mask = x86_get_a20_mask(env);
84 
85     if (!(pg_mode & PG_MODE_NXE)) {
86         rsvd_mask |= PG_NX_MASK;
87     }
88 
89     if (pg_mode & PG_MODE_PAE) {
90         uint64_t pde, pdpe;
91         target_ulong pdpe_addr;
92 
93 #ifdef TARGET_X86_64
94         if (pg_mode & PG_MODE_LMA) {
95             bool la57 = pg_mode & PG_MODE_LA57;
96             uint64_t pml5e_addr, pml5e;
97             uint64_t pml4e_addr, pml4e;
98 
99             if (la57) {
100                 pml5e_addr = ((cr3 & ~0xfff) +
101                         (((addr >> 48) & 0x1ff) << 3)) & a20_mask;
102                 pml5e_addr = GET_HPHYS(cs, pml5e_addr, MMU_DATA_STORE, NULL);
103                 pml5e = x86_ldq_phys(cs, pml5e_addr);
104                 if (!(pml5e & PG_PRESENT_MASK)) {
105                     goto do_fault;
106                 }
107                 if (pml5e & (rsvd_mask | PG_PSE_MASK)) {
108                     goto do_fault_rsvd;
109                 }
110                 if (!(pml5e & PG_ACCESSED_MASK)) {
111                     pml5e |= PG_ACCESSED_MASK;
112                     x86_stl_phys_notdirty(cs, pml5e_addr, pml5e);
113                 }
114                 ptep = pml5e ^ PG_NX_MASK;
115             } else {
116                 pml5e = cr3;
117                 ptep = PG_NX_MASK | PG_USER_MASK | PG_RW_MASK;
118             }
119 
120             pml4e_addr = ((pml5e & PG_ADDRESS_MASK) +
121                     (((addr >> 39) & 0x1ff) << 3)) & a20_mask;
122             pml4e_addr = GET_HPHYS(cs, pml4e_addr, MMU_DATA_STORE, NULL);
123             pml4e = x86_ldq_phys(cs, pml4e_addr);
124             if (!(pml4e & PG_PRESENT_MASK)) {
125                 goto do_fault;
126             }
127             if (pml4e & (rsvd_mask | PG_PSE_MASK)) {
128                 goto do_fault_rsvd;
129             }
130             if (!(pml4e & PG_ACCESSED_MASK)) {
131                 pml4e |= PG_ACCESSED_MASK;
132                 x86_stl_phys_notdirty(cs, pml4e_addr, pml4e);
133             }
134             ptep &= pml4e ^ PG_NX_MASK;
135             pdpe_addr = ((pml4e & PG_ADDRESS_MASK) + (((addr >> 30) & 0x1ff) << 3)) &
136                 a20_mask;
137             pdpe_addr = GET_HPHYS(cs, pdpe_addr, MMU_DATA_STORE, NULL);
138             pdpe = x86_ldq_phys(cs, pdpe_addr);
139             if (!(pdpe & PG_PRESENT_MASK)) {
140                 goto do_fault;
141             }
142             if (pdpe & rsvd_mask) {
143                 goto do_fault_rsvd;
144             }
145             ptep &= pdpe ^ PG_NX_MASK;
146             if (!(pdpe & PG_ACCESSED_MASK)) {
147                 pdpe |= PG_ACCESSED_MASK;
148                 x86_stl_phys_notdirty(cs, pdpe_addr, pdpe);
149             }
150             if (pdpe & PG_PSE_MASK) {
151                 /* 1 GB page */
152                 *page_size = 1024 * 1024 * 1024;
153                 pte_addr = pdpe_addr;
154                 pte = pdpe;
155                 goto do_check_protect;
156             }
157         } else
158 #endif
159         {
160             /* XXX: load them when cr3 is loaded ? */
161             pdpe_addr = ((cr3 & ~0x1f) + ((addr >> 27) & 0x18)) &
162                 a20_mask;
163             pdpe_addr = GET_HPHYS(cs, pdpe_addr, MMU_DATA_STORE, NULL);
164             pdpe = x86_ldq_phys(cs, pdpe_addr);
165             if (!(pdpe & PG_PRESENT_MASK)) {
166                 goto do_fault;
167             }
168             rsvd_mask |= PG_HI_USER_MASK;
169             if (pdpe & (rsvd_mask | PG_NX_MASK)) {
170                 goto do_fault_rsvd;
171             }
172             ptep = PG_NX_MASK | PG_USER_MASK | PG_RW_MASK;
173         }
174 
175         pde_addr = ((pdpe & PG_ADDRESS_MASK) + (((addr >> 21) & 0x1ff) << 3)) &
176             a20_mask;
177         pde_addr = GET_HPHYS(cs, pde_addr, MMU_DATA_STORE, NULL);
178         pde = x86_ldq_phys(cs, pde_addr);
179         if (!(pde & PG_PRESENT_MASK)) {
180             goto do_fault;
181         }
182         if (pde & rsvd_mask) {
183             goto do_fault_rsvd;
184         }
185         ptep &= pde ^ PG_NX_MASK;
186         if (pde & PG_PSE_MASK) {
187             /* 2 MB page */
188             *page_size = 2048 * 1024;
189             pte_addr = pde_addr;
190             pte = pde;
191             goto do_check_protect;
192         }
193         /* 4 KB page */
194         if (!(pde & PG_ACCESSED_MASK)) {
195             pde |= PG_ACCESSED_MASK;
196             x86_stl_phys_notdirty(cs, pde_addr, pde);
197         }
198         pte_addr = ((pde & PG_ADDRESS_MASK) + (((addr >> 12) & 0x1ff) << 3)) &
199             a20_mask;
200         pte_addr = GET_HPHYS(cs, pte_addr, MMU_DATA_STORE, NULL);
201         pte = x86_ldq_phys(cs, pte_addr);
202         if (!(pte & PG_PRESENT_MASK)) {
203             goto do_fault;
204         }
205         if (pte & rsvd_mask) {
206             goto do_fault_rsvd;
207         }
208         /* combine pde and pte nx, user and rw protections */
209         ptep &= pte ^ PG_NX_MASK;
210         *page_size = 4096;
211     } else {
212         uint32_t pde;
213 
214         /* page directory entry */
215         pde_addr = ((cr3 & ~0xfff) + ((addr >> 20) & 0xffc)) &
216             a20_mask;
217         pde_addr = GET_HPHYS(cs, pde_addr, MMU_DATA_STORE, NULL);
218         pde = x86_ldl_phys(cs, pde_addr);
219         if (!(pde & PG_PRESENT_MASK)) {
220             goto do_fault;
221         }
222         ptep = pde | PG_NX_MASK;
223 
224         /* if PSE bit is set, then we use a 4MB page */
225         if ((pde & PG_PSE_MASK) && (pg_mode & PG_MODE_PSE)) {
226             *page_size = 4096 * 1024;
227             pte_addr = pde_addr;
228 
229             /* Bits 20-13 provide bits 39-32 of the address, bit 21 is reserved.
230              * Leave bits 20-13 in place for setting accessed/dirty bits below.
231              */
232             pte = pde | ((pde & 0x1fe000LL) << (32 - 13));
233             rsvd_mask = 0x200000;
234             goto do_check_protect_pse36;
235         }
236 
237         if (!(pde & PG_ACCESSED_MASK)) {
238             pde |= PG_ACCESSED_MASK;
239             x86_stl_phys_notdirty(cs, pde_addr, pde);
240         }
241 
242         /* page directory entry */
243         pte_addr = ((pde & ~0xfff) + ((addr >> 10) & 0xffc)) &
244             a20_mask;
245         pte_addr = GET_HPHYS(cs, pte_addr, MMU_DATA_STORE, NULL);
246         pte = x86_ldl_phys(cs, pte_addr);
247         if (!(pte & PG_PRESENT_MASK)) {
248             goto do_fault;
249         }
250         /* combine pde and pte user and rw protections */
251         ptep &= pte | PG_NX_MASK;
252         *page_size = 4096;
253         rsvd_mask = 0;
254     }
255 
256 do_check_protect:
257     rsvd_mask |= (*page_size - 1) & PG_ADDRESS_MASK & ~PG_PSE_PAT_MASK;
258 do_check_protect_pse36:
259     if (pte & rsvd_mask) {
260         goto do_fault_rsvd;
261     }
262     ptep ^= PG_NX_MASK;
263 
264     /* can the page can be put in the TLB?  prot will tell us */
265     if (is_user && !(ptep & PG_USER_MASK)) {
266         goto do_fault_protect;
267     }
268 
269     *prot = 0;
270     if (mmu_idx != MMU_KSMAP_IDX || !(ptep & PG_USER_MASK)) {
271         *prot |= PAGE_READ;
272         if ((ptep & PG_RW_MASK) || !(is_user || (pg_mode & PG_MODE_WP))) {
273             *prot |= PAGE_WRITE;
274         }
275     }
276     if (!(ptep & PG_NX_MASK) &&
277         (mmu_idx == MMU_USER_IDX ||
278          !((pg_mode & PG_MODE_SMEP) && (ptep & PG_USER_MASK)))) {
279         *prot |= PAGE_EXEC;
280     }
281 
282     if (!(pg_mode & PG_MODE_LMA)) {
283         pkr = 0;
284     } else if (ptep & PG_USER_MASK) {
285         pkr = pg_mode & PG_MODE_PKE ? env->pkru : 0;
286     } else {
287         pkr = pg_mode & PG_MODE_PKS ? env->pkrs : 0;
288     }
289     if (pkr) {
290         uint32_t pk = (pte & PG_PKRU_MASK) >> PG_PKRU_BIT;
291         uint32_t pkr_ad = (pkr >> pk * 2) & 1;
292         uint32_t pkr_wd = (pkr >> pk * 2) & 2;
293         uint32_t pkr_prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
294 
295         if (pkr_ad) {
296             pkr_prot &= ~(PAGE_READ | PAGE_WRITE);
297         } else if (pkr_wd && (is_user || (pg_mode & PG_MODE_WP))) {
298             pkr_prot &= ~PAGE_WRITE;
299         }
300 
301         *prot &= pkr_prot;
302         if ((pkr_prot & (1 << is_write1)) == 0) {
303             assert(is_write1 != 2);
304             error_code |= PG_ERROR_PK_MASK;
305             goto do_fault_protect;
306         }
307     }
308 
309     if ((*prot & (1 << is_write1)) == 0) {
310         goto do_fault_protect;
311     }
312 
313     /* yes, it can! */
314     is_dirty = is_write && !(pte & PG_DIRTY_MASK);
315     if (!(pte & PG_ACCESSED_MASK) || is_dirty) {
316         pte |= PG_ACCESSED_MASK;
317         if (is_dirty) {
318             pte |= PG_DIRTY_MASK;
319         }
320         x86_stl_phys_notdirty(cs, pte_addr, pte);
321     }
322 
323     if (!(pte & PG_DIRTY_MASK)) {
324         /* only set write access if already dirty... otherwise wait
325            for dirty access */
326         assert(!is_write);
327         *prot &= ~PAGE_WRITE;
328     }
329 
330     pte = pte & a20_mask;
331 
332     /* align to page_size */
333     pte &= PG_ADDRESS_MASK & ~(*page_size - 1);
334     page_offset = addr & (*page_size - 1);
335     *xlat = GET_HPHYS(cs, pte + page_offset, is_write1, prot);
336     return PG_ERROR_OK;
337 
338  do_fault_rsvd:
339     error_code |= PG_ERROR_RSVD_MASK;
340  do_fault_protect:
341     error_code |= PG_ERROR_P_MASK;
342  do_fault:
343     error_code |= (is_write << PG_ERROR_W_BIT);
344     if (is_user)
345         error_code |= PG_ERROR_U_MASK;
346     if (is_write1 == 2 &&
347         (((pg_mode & PG_MODE_NXE) && (pg_mode & PG_MODE_PAE)) ||
348          (pg_mode & PG_MODE_SMEP)))
349         error_code |= PG_ERROR_I_D_MASK;
350     return error_code;
351 }
352 
353 hwaddr get_hphys(CPUState *cs, hwaddr gphys, MMUAccessType access_type,
354                         int *prot)
355 {
356     CPUX86State *env = &X86_CPU(cs)->env;
357     uint64_t exit_info_1;
358     int page_size;
359     int next_prot;
360     hwaddr hphys;
361 
362     if (likely(!(env->hflags2 & HF2_NPT_MASK))) {
363         return gphys;
364     }
365 
366     exit_info_1 = mmu_translate(cs, gphys, NULL, env->nested_cr3,
367                                access_type, MMU_USER_IDX, env->nested_pg_mode,
368                                &hphys, &page_size, &next_prot);
369     if (exit_info_1 == PG_ERROR_OK) {
370         if (prot) {
371             *prot &= next_prot;
372         }
373         return hphys;
374     }
375 
376     x86_stq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2),
377                  gphys);
378     if (prot) {
379         exit_info_1 |= SVM_NPTEXIT_GPA;
380     } else { /* page table access */
381         exit_info_1 |= SVM_NPTEXIT_GPT;
382     }
383     cpu_vmexit(env, SVM_EXIT_NPF, exit_info_1, env->retaddr);
384 }
385 
386 /* return value:
387  * -1 = cannot handle fault
388  * 0  = nothing more to do
389  * 1  = generate PF fault
390  */
391 static int handle_mmu_fault(CPUState *cs, vaddr addr, int size,
392                             int is_write1, int mmu_idx)
393 {
394     X86CPU *cpu = X86_CPU(cs);
395     CPUX86State *env = &cpu->env;
396     int error_code = PG_ERROR_OK;
397     int pg_mode, prot, page_size;
398     hwaddr paddr;
399     hwaddr vaddr;
400 
401 #if defined(DEBUG_MMU)
402     printf("MMU fault: addr=%" VADDR_PRIx " w=%d mmu=%d eip=" TARGET_FMT_lx "\n",
403            addr, is_write1, mmu_idx, env->eip);
404 #endif
405 
406     if (!(env->cr[0] & CR0_PG_MASK)) {
407         paddr = addr;
408 #ifdef TARGET_X86_64
409         if (!(env->hflags & HF_LMA_MASK)) {
410             /* Without long mode we can only address 32bits in real mode */
411             paddr = (uint32_t)paddr;
412         }
413 #endif
414         prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
415         page_size = 4096;
416     } else {
417         pg_mode = get_pg_mode(env);
418         if (pg_mode & PG_MODE_LMA) {
419             int32_t sext;
420 
421             /* test virtual address sign extension */
422             sext = (int64_t)addr >> (pg_mode & PG_MODE_LA57 ? 56 : 47);
423             if (sext != 0 && sext != -1) {
424                 env->error_code = 0;
425                 cs->exception_index = EXCP0D_GPF;
426                 return 1;
427             }
428         }
429 
430         error_code = mmu_translate(cs, addr, get_hphys, env->cr[3], is_write1,
431                                    mmu_idx, pg_mode,
432                                    &paddr, &page_size, &prot);
433     }
434 
435     if (error_code == PG_ERROR_OK) {
436         /* Even if 4MB pages, we map only one 4KB page in the cache to
437            avoid filling it too fast */
438         vaddr = addr & TARGET_PAGE_MASK;
439         paddr &= TARGET_PAGE_MASK;
440 
441         assert(prot & (1 << is_write1));
442         tlb_set_page_with_attrs(cs, vaddr, paddr, cpu_get_mem_attrs(env),
443                                 prot, mmu_idx, page_size);
444         return 0;
445     } else {
446         if (env->intercept_exceptions & (1 << EXCP0E_PAGE)) {
447             /* cr2 is not modified in case of exceptions */
448             x86_stq_phys(cs,
449                      env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2),
450                      addr);
451         } else {
452             env->cr[2] = addr;
453         }
454         env->error_code = error_code;
455         cs->exception_index = EXCP0E_PAGE;
456         return 1;
457     }
458 }
459 
460 bool x86_cpu_tlb_fill(CPUState *cs, vaddr addr, int size,
461                       MMUAccessType access_type, int mmu_idx,
462                       bool probe, uintptr_t retaddr)
463 {
464     X86CPU *cpu = X86_CPU(cs);
465     CPUX86State *env = &cpu->env;
466 
467     env->retaddr = retaddr;
468     if (handle_mmu_fault(cs, addr, size, access_type, mmu_idx)) {
469         /* FIXME: On error in get_hphys we have already jumped out.  */
470         g_assert(!probe);
471         raise_exception_err_ra(env, cs->exception_index,
472                                env->error_code, retaddr);
473     }
474     return true;
475 }
476