1 /*
2  *  x86 exception helpers - sysemu code
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "tcg/helper-tcg.h"
23 
24 int get_pg_mode(CPUX86State *env)
25 {
26     int pg_mode = 0;
27     if (env->cr[0] & CR0_WP_MASK) {
28         pg_mode |= PG_MODE_WP;
29     }
30     if (env->cr[4] & CR4_PAE_MASK) {
31         pg_mode |= PG_MODE_PAE;
32     }
33     if (env->cr[4] & CR4_PSE_MASK) {
34         pg_mode |= PG_MODE_PSE;
35     }
36     if (env->cr[4] & CR4_PKE_MASK) {
37         pg_mode |= PG_MODE_PKE;
38     }
39     if (env->cr[4] & CR4_PKS_MASK) {
40         pg_mode |= PG_MODE_PKS;
41     }
42     if (env->cr[4] & CR4_SMEP_MASK) {
43         pg_mode |= PG_MODE_SMEP;
44     }
45     if (env->cr[4] & CR4_LA57_MASK) {
46         pg_mode |= PG_MODE_LA57;
47     }
48     if (env->hflags & HF_LMA_MASK) {
49         pg_mode |= PG_MODE_LMA;
50     }
51     if (env->efer & MSR_EFER_NXE) {
52         pg_mode |= PG_MODE_NXE;
53     }
54     return pg_mode;
55 }
56 
57 #define PG_ERROR_OK (-1)
58 
59 typedef hwaddr (*MMUTranslateFunc)(CPUState *cs, hwaddr gphys, MMUAccessType access_type,
60 				int *prot);
61 
62 #define GET_HPHYS(cs, gpa, access_type, prot)  \
63 	(get_hphys_func ? get_hphys_func(cs, gpa, access_type, prot) : gpa)
64 
65 static int mmu_translate(CPUState *cs, hwaddr addr, MMUTranslateFunc get_hphys_func,
66                          uint64_t cr3, int is_write1, int mmu_idx, int pg_mode,
67                          hwaddr *xlat, int *page_size, int *prot)
68 {
69     X86CPU *cpu = X86_CPU(cs);
70     CPUX86State *env = &cpu->env;
71     uint64_t ptep, pte;
72     int32_t a20_mask;
73     target_ulong pde_addr, pte_addr;
74     int error_code = 0;
75     int is_dirty, is_write, is_user;
76     uint64_t rsvd_mask = PG_ADDRESS_MASK & ~MAKE_64BIT_MASK(0, cpu->phys_bits);
77     uint32_t page_offset;
78     uint32_t pkr;
79 
80     is_user = (mmu_idx == MMU_USER_IDX);
81     is_write = is_write1 & 1;
82     a20_mask = x86_get_a20_mask(env);
83 
84     if (!(pg_mode & PG_MODE_NXE)) {
85         rsvd_mask |= PG_NX_MASK;
86     }
87 
88     if (pg_mode & PG_MODE_PAE) {
89         uint64_t pde, pdpe;
90         target_ulong pdpe_addr;
91 
92 #ifdef TARGET_X86_64
93         if (pg_mode & PG_MODE_LMA) {
94             bool la57 = pg_mode & PG_MODE_LA57;
95             uint64_t pml5e_addr, pml5e;
96             uint64_t pml4e_addr, pml4e;
97 
98             if (la57) {
99                 pml5e_addr = ((cr3 & ~0xfff) +
100                         (((addr >> 48) & 0x1ff) << 3)) & a20_mask;
101                 pml5e_addr = GET_HPHYS(cs, pml5e_addr, MMU_DATA_STORE, NULL);
102                 pml5e = x86_ldq_phys(cs, pml5e_addr);
103                 if (!(pml5e & PG_PRESENT_MASK)) {
104                     goto do_fault;
105                 }
106                 if (pml5e & (rsvd_mask | PG_PSE_MASK)) {
107                     goto do_fault_rsvd;
108                 }
109                 if (!(pml5e & PG_ACCESSED_MASK)) {
110                     pml5e |= PG_ACCESSED_MASK;
111                     x86_stl_phys_notdirty(cs, pml5e_addr, pml5e);
112                 }
113                 ptep = pml5e ^ PG_NX_MASK;
114             } else {
115                 pml5e = cr3;
116                 ptep = PG_NX_MASK | PG_USER_MASK | PG_RW_MASK;
117             }
118 
119             pml4e_addr = ((pml5e & PG_ADDRESS_MASK) +
120                     (((addr >> 39) & 0x1ff) << 3)) & a20_mask;
121             pml4e_addr = GET_HPHYS(cs, pml4e_addr, MMU_DATA_STORE, NULL);
122             pml4e = x86_ldq_phys(cs, pml4e_addr);
123             if (!(pml4e & PG_PRESENT_MASK)) {
124                 goto do_fault;
125             }
126             if (pml4e & (rsvd_mask | PG_PSE_MASK)) {
127                 goto do_fault_rsvd;
128             }
129             if (!(pml4e & PG_ACCESSED_MASK)) {
130                 pml4e |= PG_ACCESSED_MASK;
131                 x86_stl_phys_notdirty(cs, pml4e_addr, pml4e);
132             }
133             ptep &= pml4e ^ PG_NX_MASK;
134             pdpe_addr = ((pml4e & PG_ADDRESS_MASK) + (((addr >> 30) & 0x1ff) << 3)) &
135                 a20_mask;
136             pdpe_addr = GET_HPHYS(cs, pdpe_addr, MMU_DATA_STORE, NULL);
137             pdpe = x86_ldq_phys(cs, pdpe_addr);
138             if (!(pdpe & PG_PRESENT_MASK)) {
139                 goto do_fault;
140             }
141             if (pdpe & rsvd_mask) {
142                 goto do_fault_rsvd;
143             }
144             ptep &= pdpe ^ PG_NX_MASK;
145             if (!(pdpe & PG_ACCESSED_MASK)) {
146                 pdpe |= PG_ACCESSED_MASK;
147                 x86_stl_phys_notdirty(cs, pdpe_addr, pdpe);
148             }
149             if (pdpe & PG_PSE_MASK) {
150                 /* 1 GB page */
151                 *page_size = 1024 * 1024 * 1024;
152                 pte_addr = pdpe_addr;
153                 pte = pdpe;
154                 goto do_check_protect;
155             }
156         } else
157 #endif
158         {
159             /* XXX: load them when cr3 is loaded ? */
160             pdpe_addr = ((cr3 & ~0x1f) + ((addr >> 27) & 0x18)) &
161                 a20_mask;
162             pdpe_addr = GET_HPHYS(cs, pdpe_addr, MMU_DATA_STORE, NULL);
163             pdpe = x86_ldq_phys(cs, pdpe_addr);
164             if (!(pdpe & PG_PRESENT_MASK)) {
165                 goto do_fault;
166             }
167             rsvd_mask |= PG_HI_USER_MASK;
168             if (pdpe & (rsvd_mask | PG_NX_MASK)) {
169                 goto do_fault_rsvd;
170             }
171             ptep = PG_NX_MASK | PG_USER_MASK | PG_RW_MASK;
172         }
173 
174         pde_addr = ((pdpe & PG_ADDRESS_MASK) + (((addr >> 21) & 0x1ff) << 3)) &
175             a20_mask;
176         pde_addr = GET_HPHYS(cs, pde_addr, MMU_DATA_STORE, NULL);
177         pde = x86_ldq_phys(cs, pde_addr);
178         if (!(pde & PG_PRESENT_MASK)) {
179             goto do_fault;
180         }
181         if (pde & rsvd_mask) {
182             goto do_fault_rsvd;
183         }
184         ptep &= pde ^ PG_NX_MASK;
185         if (pde & PG_PSE_MASK) {
186             /* 2 MB page */
187             *page_size = 2048 * 1024;
188             pte_addr = pde_addr;
189             pte = pde;
190             goto do_check_protect;
191         }
192         /* 4 KB page */
193         if (!(pde & PG_ACCESSED_MASK)) {
194             pde |= PG_ACCESSED_MASK;
195             x86_stl_phys_notdirty(cs, pde_addr, pde);
196         }
197         pte_addr = ((pde & PG_ADDRESS_MASK) + (((addr >> 12) & 0x1ff) << 3)) &
198             a20_mask;
199         pte_addr = GET_HPHYS(cs, pte_addr, MMU_DATA_STORE, NULL);
200         pte = x86_ldq_phys(cs, pte_addr);
201         if (!(pte & PG_PRESENT_MASK)) {
202             goto do_fault;
203         }
204         if (pte & rsvd_mask) {
205             goto do_fault_rsvd;
206         }
207         /* combine pde and pte nx, user and rw protections */
208         ptep &= pte ^ PG_NX_MASK;
209         *page_size = 4096;
210     } else {
211         uint32_t pde;
212 
213         /* page directory entry */
214         pde_addr = ((cr3 & ~0xfff) + ((addr >> 20) & 0xffc)) &
215             a20_mask;
216         pde_addr = GET_HPHYS(cs, pde_addr, MMU_DATA_STORE, NULL);
217         pde = x86_ldl_phys(cs, pde_addr);
218         if (!(pde & PG_PRESENT_MASK)) {
219             goto do_fault;
220         }
221         ptep = pde | PG_NX_MASK;
222 
223         /* if PSE bit is set, then we use a 4MB page */
224         if ((pde & PG_PSE_MASK) && (pg_mode & PG_MODE_PSE)) {
225             *page_size = 4096 * 1024;
226             pte_addr = pde_addr;
227 
228             /* Bits 20-13 provide bits 39-32 of the address, bit 21 is reserved.
229              * Leave bits 20-13 in place for setting accessed/dirty bits below.
230              */
231             pte = pde | ((pde & 0x1fe000LL) << (32 - 13));
232             rsvd_mask = 0x200000;
233             goto do_check_protect_pse36;
234         }
235 
236         if (!(pde & PG_ACCESSED_MASK)) {
237             pde |= PG_ACCESSED_MASK;
238             x86_stl_phys_notdirty(cs, pde_addr, pde);
239         }
240 
241         /* page directory entry */
242         pte_addr = ((pde & ~0xfff) + ((addr >> 10) & 0xffc)) &
243             a20_mask;
244         pte_addr = GET_HPHYS(cs, pte_addr, MMU_DATA_STORE, NULL);
245         pte = x86_ldl_phys(cs, pte_addr);
246         if (!(pte & PG_PRESENT_MASK)) {
247             goto do_fault;
248         }
249         /* combine pde and pte user and rw protections */
250         ptep &= pte | PG_NX_MASK;
251         *page_size = 4096;
252         rsvd_mask = 0;
253     }
254 
255 do_check_protect:
256     rsvd_mask |= (*page_size - 1) & PG_ADDRESS_MASK & ~PG_PSE_PAT_MASK;
257 do_check_protect_pse36:
258     if (pte & rsvd_mask) {
259         goto do_fault_rsvd;
260     }
261     ptep ^= PG_NX_MASK;
262 
263     /* can the page can be put in the TLB?  prot will tell us */
264     if (is_user && !(ptep & PG_USER_MASK)) {
265         goto do_fault_protect;
266     }
267 
268     *prot = 0;
269     if (mmu_idx != MMU_KSMAP_IDX || !(ptep & PG_USER_MASK)) {
270         *prot |= PAGE_READ;
271         if ((ptep & PG_RW_MASK) || !(is_user || (pg_mode & PG_MODE_WP))) {
272             *prot |= PAGE_WRITE;
273         }
274     }
275     if (!(ptep & PG_NX_MASK) &&
276         (mmu_idx == MMU_USER_IDX ||
277          !((pg_mode & PG_MODE_SMEP) && (ptep & PG_USER_MASK)))) {
278         *prot |= PAGE_EXEC;
279     }
280 
281     if (!(pg_mode & PG_MODE_LMA)) {
282         pkr = 0;
283     } else if (ptep & PG_USER_MASK) {
284         pkr = pg_mode & PG_MODE_PKE ? env->pkru : 0;
285     } else {
286         pkr = pg_mode & PG_MODE_PKS ? env->pkrs : 0;
287     }
288     if (pkr) {
289         uint32_t pk = (pte & PG_PKRU_MASK) >> PG_PKRU_BIT;
290         uint32_t pkr_ad = (pkr >> pk * 2) & 1;
291         uint32_t pkr_wd = (pkr >> pk * 2) & 2;
292         uint32_t pkr_prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
293 
294         if (pkr_ad) {
295             pkr_prot &= ~(PAGE_READ | PAGE_WRITE);
296         } else if (pkr_wd && (is_user || (pg_mode & PG_MODE_WP))) {
297             pkr_prot &= ~PAGE_WRITE;
298         }
299 
300         *prot &= pkr_prot;
301         if ((pkr_prot & (1 << is_write1)) == 0) {
302             assert(is_write1 != 2);
303             error_code |= PG_ERROR_PK_MASK;
304             goto do_fault_protect;
305         }
306     }
307 
308     if ((*prot & (1 << is_write1)) == 0) {
309         goto do_fault_protect;
310     }
311 
312     /* yes, it can! */
313     is_dirty = is_write && !(pte & PG_DIRTY_MASK);
314     if (!(pte & PG_ACCESSED_MASK) || is_dirty) {
315         pte |= PG_ACCESSED_MASK;
316         if (is_dirty) {
317             pte |= PG_DIRTY_MASK;
318         }
319         x86_stl_phys_notdirty(cs, pte_addr, pte);
320     }
321 
322     if (!(pte & PG_DIRTY_MASK)) {
323         /* only set write access if already dirty... otherwise wait
324            for dirty access */
325         assert(!is_write);
326         *prot &= ~PAGE_WRITE;
327     }
328 
329     pte = pte & a20_mask;
330 
331     /* align to page_size */
332     pte &= PG_ADDRESS_MASK & ~(*page_size - 1);
333     page_offset = addr & (*page_size - 1);
334     *xlat = GET_HPHYS(cs, pte + page_offset, is_write1, prot);
335     return PG_ERROR_OK;
336 
337  do_fault_rsvd:
338     error_code |= PG_ERROR_RSVD_MASK;
339  do_fault_protect:
340     error_code |= PG_ERROR_P_MASK;
341  do_fault:
342     error_code |= (is_write << PG_ERROR_W_BIT);
343     if (is_user)
344         error_code |= PG_ERROR_U_MASK;
345     if (is_write1 == 2 &&
346         (((pg_mode & PG_MODE_NXE) && (pg_mode & PG_MODE_PAE)) ||
347          (pg_mode & PG_MODE_SMEP)))
348         error_code |= PG_ERROR_I_D_MASK;
349     return error_code;
350 }
351 
352 hwaddr get_hphys(CPUState *cs, hwaddr gphys, MMUAccessType access_type,
353                         int *prot)
354 {
355     CPUX86State *env = &X86_CPU(cs)->env;
356     uint64_t exit_info_1;
357     int page_size;
358     int next_prot;
359     hwaddr hphys;
360 
361     if (likely(!(env->hflags2 & HF2_NPT_MASK))) {
362         return gphys;
363     }
364 
365     exit_info_1 = mmu_translate(cs, gphys, NULL, env->nested_cr3,
366                                access_type, MMU_USER_IDX, env->nested_pg_mode,
367                                &hphys, &page_size, &next_prot);
368     if (exit_info_1 == PG_ERROR_OK) {
369         if (prot) {
370             *prot &= next_prot;
371         }
372         return hphys;
373     }
374 
375     x86_stq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2),
376                  gphys);
377     if (prot) {
378         exit_info_1 |= SVM_NPTEXIT_GPA;
379     } else { /* page table access */
380         exit_info_1 |= SVM_NPTEXIT_GPT;
381     }
382     cpu_vmexit(env, SVM_EXIT_NPF, exit_info_1, env->retaddr);
383 }
384 
385 /* return value:
386  * -1 = cannot handle fault
387  * 0  = nothing more to do
388  * 1  = generate PF fault
389  */
390 static int handle_mmu_fault(CPUState *cs, vaddr addr, int size,
391                             int is_write1, int mmu_idx)
392 {
393     X86CPU *cpu = X86_CPU(cs);
394     CPUX86State *env = &cpu->env;
395     int error_code = PG_ERROR_OK;
396     int pg_mode, prot, page_size;
397     hwaddr paddr;
398     hwaddr vaddr;
399 
400 #if defined(DEBUG_MMU)
401     printf("MMU fault: addr=%" VADDR_PRIx " w=%d mmu=%d eip=" TARGET_FMT_lx "\n",
402            addr, is_write1, mmu_idx, env->eip);
403 #endif
404 
405     if (!(env->cr[0] & CR0_PG_MASK)) {
406         paddr = addr;
407 #ifdef TARGET_X86_64
408         if (!(env->hflags & HF_LMA_MASK)) {
409             /* Without long mode we can only address 32bits in real mode */
410             paddr = (uint32_t)paddr;
411         }
412 #endif
413         prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
414         page_size = 4096;
415     } else {
416         pg_mode = get_pg_mode(env);
417         if (pg_mode & PG_MODE_LMA) {
418             int32_t sext;
419 
420             /* test virtual address sign extension */
421             sext = (int64_t)addr >> (pg_mode & PG_MODE_LA57 ? 56 : 47);
422             if (sext != 0 && sext != -1) {
423                 env->error_code = 0;
424                 cs->exception_index = EXCP0D_GPF;
425                 return 1;
426             }
427         }
428 
429         error_code = mmu_translate(cs, addr, get_hphys, env->cr[3], is_write1,
430                                    mmu_idx, pg_mode,
431                                    &paddr, &page_size, &prot);
432     }
433 
434     if (error_code == PG_ERROR_OK) {
435         /* Even if 4MB pages, we map only one 4KB page in the cache to
436            avoid filling it too fast */
437         vaddr = addr & TARGET_PAGE_MASK;
438         paddr &= TARGET_PAGE_MASK;
439 
440         assert(prot & (1 << is_write1));
441         tlb_set_page_with_attrs(cs, vaddr, paddr, cpu_get_mem_attrs(env),
442                                 prot, mmu_idx, page_size);
443         return 0;
444     } else {
445         if (env->intercept_exceptions & (1 << EXCP0E_PAGE)) {
446             /* cr2 is not modified in case of exceptions */
447             x86_stq_phys(cs,
448                      env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2),
449                      addr);
450         } else {
451             env->cr[2] = addr;
452         }
453         env->error_code = error_code;
454         cs->exception_index = EXCP0E_PAGE;
455         return 1;
456     }
457 }
458 
459 bool x86_cpu_tlb_fill(CPUState *cs, vaddr addr, int size,
460                       MMUAccessType access_type, int mmu_idx,
461                       bool probe, uintptr_t retaddr)
462 {
463     X86CPU *cpu = X86_CPU(cs);
464     CPUX86State *env = &cpu->env;
465 
466     env->retaddr = retaddr;
467     if (handle_mmu_fault(cs, addr, size, access_type, mmu_idx)) {
468         /* FIXME: On error in get_hphys we have already jumped out.  */
469         g_assert(!probe);
470         raise_exception_err_ra(env, cs->exception_index,
471                                env->error_code, retaddr);
472     }
473     return true;
474 }
475