1 /*
2  *  x86 exception helpers - sysemu code
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "tcg/helper-tcg.h"
23 
24 int get_pg_mode(CPUX86State *env)
25 {
26     int pg_mode = 0;
27     if (env->cr[0] & CR0_WP_MASK) {
28         pg_mode |= PG_MODE_WP;
29     }
30     if (env->cr[4] & CR4_PAE_MASK) {
31         pg_mode |= PG_MODE_PAE;
32     }
33     if (env->cr[4] & CR4_PSE_MASK) {
34         pg_mode |= PG_MODE_PSE;
35     }
36     if (env->cr[4] & CR4_PKE_MASK) {
37         pg_mode |= PG_MODE_PKE;
38     }
39     if (env->cr[4] & CR4_PKS_MASK) {
40         pg_mode |= PG_MODE_PKS;
41     }
42     if (env->cr[4] & CR4_SMEP_MASK) {
43         pg_mode |= PG_MODE_SMEP;
44     }
45     if (env->cr[4] & CR4_LA57_MASK) {
46         pg_mode |= PG_MODE_LA57;
47     }
48     if (env->hflags & HF_LMA_MASK) {
49         pg_mode |= PG_MODE_LMA;
50     }
51     if (env->efer & MSR_EFER_NXE) {
52         pg_mode |= PG_MODE_NXE;
53     }
54     return pg_mode;
55 }
56 
57 #define PG_ERROR_OK (-1)
58 
59 typedef hwaddr (*MMUTranslateFunc)(CPUState *cs, hwaddr gphys, MMUAccessType access_type,
60 				int *prot);
61 
62 #define GET_HPHYS(cs, gpa, access_type, prot)  \
63 	(get_hphys_func ? get_hphys_func(cs, gpa, access_type, prot) : gpa)
64 
65 static int mmu_translate(CPUState *cs, hwaddr addr, MMUTranslateFunc get_hphys_func,
66                          uint64_t cr3, int is_write1, int mmu_idx, int pg_mode,
67                          hwaddr *xlat, int *page_size, int *prot)
68 {
69     X86CPU *cpu = X86_CPU(cs);
70     CPUX86State *env = &cpu->env;
71     uint64_t ptep, pte;
72     int32_t a20_mask;
73     target_ulong pde_addr, pte_addr;
74     int error_code = 0;
75     int is_dirty, is_write, is_user;
76     uint64_t rsvd_mask = PG_ADDRESS_MASK & ~MAKE_64BIT_MASK(0, cpu->phys_bits);
77     uint32_t page_offset;
78     uint32_t pkr;
79 
80     is_user = (mmu_idx == MMU_USER_IDX);
81     is_write = is_write1 & 1;
82     a20_mask = x86_get_a20_mask(env);
83 
84     if (!(pg_mode & PG_MODE_NXE)) {
85         rsvd_mask |= PG_NX_MASK;
86     }
87 
88     if (pg_mode & PG_MODE_PAE) {
89         uint64_t pde, pdpe;
90         target_ulong pdpe_addr;
91 
92 #ifdef TARGET_X86_64
93         if (env->hflags & HF_LMA_MASK) {
94             bool la57 = pg_mode & PG_MODE_LA57;
95             uint64_t pml5e_addr, pml5e;
96             uint64_t pml4e_addr, pml4e;
97             int32_t sext;
98 
99             /* test virtual address sign extension */
100             sext = la57 ? (int64_t)addr >> 56 : (int64_t)addr >> 47;
101             if (get_hphys_func && sext != 0 && sext != -1) {
102                 env->error_code = 0;
103                 cs->exception_index = EXCP0D_GPF;
104                 return 1;
105             }
106 
107             if (la57) {
108                 pml5e_addr = ((cr3 & ~0xfff) +
109                         (((addr >> 48) & 0x1ff) << 3)) & a20_mask;
110                 pml5e_addr = GET_HPHYS(cs, pml5e_addr, MMU_DATA_STORE, NULL);
111                 pml5e = x86_ldq_phys(cs, pml5e_addr);
112                 if (!(pml5e & PG_PRESENT_MASK)) {
113                     goto do_fault;
114                 }
115                 if (pml5e & (rsvd_mask | PG_PSE_MASK)) {
116                     goto do_fault_rsvd;
117                 }
118                 if (!(pml5e & PG_ACCESSED_MASK)) {
119                     pml5e |= PG_ACCESSED_MASK;
120                     x86_stl_phys_notdirty(cs, pml5e_addr, pml5e);
121                 }
122                 ptep = pml5e ^ PG_NX_MASK;
123             } else {
124                 pml5e = cr3;
125                 ptep = PG_NX_MASK | PG_USER_MASK | PG_RW_MASK;
126             }
127 
128             pml4e_addr = ((pml5e & PG_ADDRESS_MASK) +
129                     (((addr >> 39) & 0x1ff) << 3)) & a20_mask;
130             pml4e_addr = GET_HPHYS(cs, pml4e_addr, MMU_DATA_STORE, NULL);
131             pml4e = x86_ldq_phys(cs, pml4e_addr);
132             if (!(pml4e & PG_PRESENT_MASK)) {
133                 goto do_fault;
134             }
135             if (pml4e & (rsvd_mask | PG_PSE_MASK)) {
136                 goto do_fault_rsvd;
137             }
138             if (!(pml4e & PG_ACCESSED_MASK)) {
139                 pml4e |= PG_ACCESSED_MASK;
140                 x86_stl_phys_notdirty(cs, pml4e_addr, pml4e);
141             }
142             ptep &= pml4e ^ PG_NX_MASK;
143             pdpe_addr = ((pml4e & PG_ADDRESS_MASK) + (((addr >> 30) & 0x1ff) << 3)) &
144                 a20_mask;
145             pdpe_addr = GET_HPHYS(cs, pdpe_addr, MMU_DATA_STORE, NULL);
146             pdpe = x86_ldq_phys(cs, pdpe_addr);
147             if (!(pdpe & PG_PRESENT_MASK)) {
148                 goto do_fault;
149             }
150             if (pdpe & rsvd_mask) {
151                 goto do_fault_rsvd;
152             }
153             ptep &= pdpe ^ PG_NX_MASK;
154             if (!(pdpe & PG_ACCESSED_MASK)) {
155                 pdpe |= PG_ACCESSED_MASK;
156                 x86_stl_phys_notdirty(cs, pdpe_addr, pdpe);
157             }
158             if (pdpe & PG_PSE_MASK) {
159                 /* 1 GB page */
160                 *page_size = 1024 * 1024 * 1024;
161                 pte_addr = pdpe_addr;
162                 pte = pdpe;
163                 goto do_check_protect;
164             }
165         } else
166 #endif
167         {
168             /* XXX: load them when cr3 is loaded ? */
169             pdpe_addr = ((cr3 & ~0x1f) + ((addr >> 27) & 0x18)) &
170                 a20_mask;
171             pdpe_addr = GET_HPHYS(cs, pdpe_addr, MMU_DATA_STORE, NULL);
172             pdpe = x86_ldq_phys(cs, pdpe_addr);
173             if (!(pdpe & PG_PRESENT_MASK)) {
174                 goto do_fault;
175             }
176             rsvd_mask |= PG_HI_USER_MASK;
177             if (pdpe & (rsvd_mask | PG_NX_MASK)) {
178                 goto do_fault_rsvd;
179             }
180             ptep = PG_NX_MASK | PG_USER_MASK | PG_RW_MASK;
181         }
182 
183         pde_addr = ((pdpe & PG_ADDRESS_MASK) + (((addr >> 21) & 0x1ff) << 3)) &
184             a20_mask;
185         pde_addr = GET_HPHYS(cs, pde_addr, MMU_DATA_STORE, NULL);
186         pde = x86_ldq_phys(cs, pde_addr);
187         if (!(pde & PG_PRESENT_MASK)) {
188             goto do_fault;
189         }
190         if (pde & rsvd_mask) {
191             goto do_fault_rsvd;
192         }
193         ptep &= pde ^ PG_NX_MASK;
194         if (pde & PG_PSE_MASK) {
195             /* 2 MB page */
196             *page_size = 2048 * 1024;
197             pte_addr = pde_addr;
198             pte = pde;
199             goto do_check_protect;
200         }
201         /* 4 KB page */
202         if (!(pde & PG_ACCESSED_MASK)) {
203             pde |= PG_ACCESSED_MASK;
204             x86_stl_phys_notdirty(cs, pde_addr, pde);
205         }
206         pte_addr = ((pde & PG_ADDRESS_MASK) + (((addr >> 12) & 0x1ff) << 3)) &
207             a20_mask;
208         pte_addr = GET_HPHYS(cs, pte_addr, MMU_DATA_STORE, NULL);
209         pte = x86_ldq_phys(cs, pte_addr);
210         if (!(pte & PG_PRESENT_MASK)) {
211             goto do_fault;
212         }
213         if (pte & rsvd_mask) {
214             goto do_fault_rsvd;
215         }
216         /* combine pde and pte nx, user and rw protections */
217         ptep &= pte ^ PG_NX_MASK;
218         *page_size = 4096;
219     } else {
220         uint32_t pde;
221 
222         /* page directory entry */
223         pde_addr = ((cr3 & ~0xfff) + ((addr >> 20) & 0xffc)) &
224             a20_mask;
225         pde_addr = GET_HPHYS(cs, pde_addr, MMU_DATA_STORE, NULL);
226         pde = x86_ldl_phys(cs, pde_addr);
227         if (!(pde & PG_PRESENT_MASK)) {
228             goto do_fault;
229         }
230         ptep = pde | PG_NX_MASK;
231 
232         /* if PSE bit is set, then we use a 4MB page */
233         if ((pde & PG_PSE_MASK) && (pg_mode & PG_MODE_PSE)) {
234             *page_size = 4096 * 1024;
235             pte_addr = pde_addr;
236 
237             /* Bits 20-13 provide bits 39-32 of the address, bit 21 is reserved.
238              * Leave bits 20-13 in place for setting accessed/dirty bits below.
239              */
240             pte = pde | ((pde & 0x1fe000LL) << (32 - 13));
241             rsvd_mask = 0x200000;
242             goto do_check_protect_pse36;
243         }
244 
245         if (!(pde & PG_ACCESSED_MASK)) {
246             pde |= PG_ACCESSED_MASK;
247             x86_stl_phys_notdirty(cs, pde_addr, pde);
248         }
249 
250         /* page directory entry */
251         pte_addr = ((pde & ~0xfff) + ((addr >> 10) & 0xffc)) &
252             a20_mask;
253         pte_addr = GET_HPHYS(cs, pte_addr, MMU_DATA_STORE, NULL);
254         pte = x86_ldl_phys(cs, pte_addr);
255         if (!(pte & PG_PRESENT_MASK)) {
256             goto do_fault;
257         }
258         /* combine pde and pte user and rw protections */
259         ptep &= pte | PG_NX_MASK;
260         *page_size = 4096;
261         rsvd_mask = 0;
262     }
263 
264 do_check_protect:
265     rsvd_mask |= (*page_size - 1) & PG_ADDRESS_MASK & ~PG_PSE_PAT_MASK;
266 do_check_protect_pse36:
267     if (pte & rsvd_mask) {
268         goto do_fault_rsvd;
269     }
270     ptep ^= PG_NX_MASK;
271 
272     /* can the page can be put in the TLB?  prot will tell us */
273     if (is_user && !(ptep & PG_USER_MASK)) {
274         goto do_fault_protect;
275     }
276 
277     *prot = 0;
278     if (mmu_idx != MMU_KSMAP_IDX || !(ptep & PG_USER_MASK)) {
279         *prot |= PAGE_READ;
280         if ((ptep & PG_RW_MASK) || !(is_user || (pg_mode & PG_MODE_WP))) {
281             *prot |= PAGE_WRITE;
282         }
283     }
284     if (!(ptep & PG_NX_MASK) &&
285         (mmu_idx == MMU_USER_IDX ||
286          !((pg_mode & PG_MODE_SMEP) && (ptep & PG_USER_MASK)))) {
287         *prot |= PAGE_EXEC;
288     }
289 
290     if (!(env->hflags & HF_LMA_MASK)) {
291         pkr = 0;
292     } else if (ptep & PG_USER_MASK) {
293         pkr = pg_mode & PG_MODE_PKE ? env->pkru : 0;
294     } else {
295         pkr = pg_mode & PG_MODE_PKS ? env->pkrs : 0;
296     }
297     if (pkr) {
298         uint32_t pk = (pte & PG_PKRU_MASK) >> PG_PKRU_BIT;
299         uint32_t pkr_ad = (pkr >> pk * 2) & 1;
300         uint32_t pkr_wd = (pkr >> pk * 2) & 2;
301         uint32_t pkr_prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
302 
303         if (pkr_ad) {
304             pkr_prot &= ~(PAGE_READ | PAGE_WRITE);
305         } else if (pkr_wd && (is_user || (pg_mode & PG_MODE_WP))) {
306             pkr_prot &= ~PAGE_WRITE;
307         }
308 
309         *prot &= pkr_prot;
310         if ((pkr_prot & (1 << is_write1)) == 0) {
311             assert(is_write1 != 2);
312             error_code |= PG_ERROR_PK_MASK;
313             goto do_fault_protect;
314         }
315     }
316 
317     if ((*prot & (1 << is_write1)) == 0) {
318         goto do_fault_protect;
319     }
320 
321     /* yes, it can! */
322     is_dirty = is_write && !(pte & PG_DIRTY_MASK);
323     if (!(pte & PG_ACCESSED_MASK) || is_dirty) {
324         pte |= PG_ACCESSED_MASK;
325         if (is_dirty) {
326             pte |= PG_DIRTY_MASK;
327         }
328         x86_stl_phys_notdirty(cs, pte_addr, pte);
329     }
330 
331     if (!(pte & PG_DIRTY_MASK)) {
332         /* only set write access if already dirty... otherwise wait
333            for dirty access */
334         assert(!is_write);
335         *prot &= ~PAGE_WRITE;
336     }
337 
338     pte = pte & a20_mask;
339 
340     /* align to page_size */
341     pte &= PG_ADDRESS_MASK & ~(*page_size - 1);
342     page_offset = addr & (*page_size - 1);
343     *xlat = GET_HPHYS(cs, pte + page_offset, is_write1, prot);
344     return PG_ERROR_OK;
345 
346  do_fault_rsvd:
347     error_code |= PG_ERROR_RSVD_MASK;
348  do_fault_protect:
349     error_code |= PG_ERROR_P_MASK;
350  do_fault:
351     error_code |= (is_write << PG_ERROR_W_BIT);
352     if (is_user)
353         error_code |= PG_ERROR_U_MASK;
354     if (is_write1 == 2 &&
355         (((pg_mode & PG_MODE_NXE) && (pg_mode & PG_MODE_PAE)) ||
356          (pg_mode & PG_MODE_SMEP)))
357         error_code |= PG_ERROR_I_D_MASK;
358     return error_code;
359 }
360 
361 hwaddr get_hphys(CPUState *cs, hwaddr gphys, MMUAccessType access_type,
362                         int *prot)
363 {
364     CPUX86State *env = &X86_CPU(cs)->env;
365     uint64_t exit_info_1;
366     int page_size;
367     int next_prot;
368     hwaddr hphys;
369 
370     if (likely(!(env->hflags2 & HF2_NPT_MASK))) {
371         return gphys;
372     }
373 
374     exit_info_1 = mmu_translate(cs, gphys, NULL, env->nested_cr3,
375                                access_type, MMU_USER_IDX, env->nested_pg_mode,
376                                &hphys, &page_size, &next_prot);
377     if (exit_info_1 == PG_ERROR_OK) {
378         if (prot) {
379             *prot &= next_prot;
380         }
381         return hphys;
382     }
383 
384     x86_stq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2),
385                  gphys);
386     if (prot) {
387         exit_info_1 |= SVM_NPTEXIT_GPA;
388     } else { /* page table access */
389         exit_info_1 |= SVM_NPTEXIT_GPT;
390     }
391     cpu_vmexit(env, SVM_EXIT_NPF, exit_info_1, env->retaddr);
392 }
393 
394 /* return value:
395  * -1 = cannot handle fault
396  * 0  = nothing more to do
397  * 1  = generate PF fault
398  */
399 static int handle_mmu_fault(CPUState *cs, vaddr addr, int size,
400                             int is_write1, int mmu_idx)
401 {
402     X86CPU *cpu = X86_CPU(cs);
403     CPUX86State *env = &cpu->env;
404     int error_code = PG_ERROR_OK;
405     int pg_mode, prot, page_size;
406     hwaddr paddr;
407     hwaddr vaddr;
408 
409 #if defined(DEBUG_MMU)
410     printf("MMU fault: addr=%" VADDR_PRIx " w=%d mmu=%d eip=" TARGET_FMT_lx "\n",
411            addr, is_write1, mmu_idx, env->eip);
412 #endif
413 
414     if (!(env->cr[0] & CR0_PG_MASK)) {
415         paddr = addr;
416 #ifdef TARGET_X86_64
417         if (!(env->hflags & HF_LMA_MASK)) {
418             /* Without long mode we can only address 32bits in real mode */
419             paddr = (uint32_t)paddr;
420         }
421 #endif
422         prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
423         page_size = 4096;
424     } else {
425         pg_mode = get_pg_mode(env);
426         error_code = mmu_translate(cs, addr, get_hphys, env->cr[3], is_write1,
427                                    mmu_idx, pg_mode,
428                                    &paddr, &page_size, &prot);
429     }
430 
431     if (error_code == PG_ERROR_OK) {
432         /* Even if 4MB pages, we map only one 4KB page in the cache to
433            avoid filling it too fast */
434         vaddr = addr & TARGET_PAGE_MASK;
435         paddr &= TARGET_PAGE_MASK;
436 
437         assert(prot & (1 << is_write1));
438         tlb_set_page_with_attrs(cs, vaddr, paddr, cpu_get_mem_attrs(env),
439                                 prot, mmu_idx, page_size);
440         return 0;
441     } else {
442         if (env->intercept_exceptions & (1 << EXCP0E_PAGE)) {
443             /* cr2 is not modified in case of exceptions */
444             x86_stq_phys(cs,
445                      env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2),
446                      addr);
447         } else {
448             env->cr[2] = addr;
449         }
450         env->error_code = error_code;
451         cs->exception_index = EXCP0E_PAGE;
452         return 1;
453     }
454 }
455 
456 bool x86_cpu_tlb_fill(CPUState *cs, vaddr addr, int size,
457                       MMUAccessType access_type, int mmu_idx,
458                       bool probe, uintptr_t retaddr)
459 {
460     X86CPU *cpu = X86_CPU(cs);
461     CPUX86State *env = &cpu->env;
462 
463     env->retaddr = retaddr;
464     if (handle_mmu_fault(cs, addr, size, access_type, mmu_idx)) {
465         /* FIXME: On error in get_hphys we have already jumped out.  */
466         g_assert(!probe);
467         raise_exception_err_ra(env, cs->exception_index,
468                                env->error_code, retaddr);
469     }
470     return true;
471 }
472