xref: /openbmc/qemu/target/i386/tcg/system/excp_helper.c (revision 63e7af2035242dda6e2460f4eadbbe6f58c67614)
1 /*
2  *  x86 exception helpers - system code
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "accel/tcg/cpu-ldst.h"
23 #include "accel/tcg/probe.h"
24 #include "exec/cputlb.h"
25 #include "exec/page-protection.h"
26 #include "exec/target_page.h"
27 #include "exec/tlb-flags.h"
28 #include "exec/tswap.h"
29 #include "tcg/helper-tcg.h"
30 
31 typedef struct TranslateParams {
32     target_ulong addr;
33     target_ulong cr3;
34     int pg_mode;
35     int mmu_idx;
36     int ptw_idx;
37     MMUAccessType access_type;
38 } TranslateParams;
39 
40 typedef struct TranslateResult {
41     hwaddr paddr;
42     int prot;
43     int page_size;
44 } TranslateResult;
45 
46 typedef enum TranslateFaultStage2 {
47     S2_NONE,
48     S2_GPA,
49     S2_GPT,
50 } TranslateFaultStage2;
51 
52 typedef struct TranslateFault {
53     int exception_index;
54     int error_code;
55     target_ulong cr2;
56     TranslateFaultStage2 stage2;
57 } TranslateFault;
58 
59 typedef struct PTETranslate {
60     CPUX86State *env;
61     TranslateFault *err;
62     int ptw_idx;
63     void *haddr;
64     hwaddr gaddr;
65 } PTETranslate;
66 
67 static bool ptw_translate(PTETranslate *inout, hwaddr addr)
68 {
69     int flags;
70 
71     inout->gaddr = addr;
72     flags = probe_access_full_mmu(inout->env, addr, 0, MMU_DATA_STORE,
73                                   inout->ptw_idx, &inout->haddr, NULL);
74 
75     if (unlikely(flags & TLB_INVALID_MASK)) {
76         TranslateFault *err = inout->err;
77 
78         assert(inout->ptw_idx == MMU_NESTED_IDX);
79         *err = (TranslateFault){
80             .error_code = inout->env->error_code,
81             .cr2 = addr,
82             .stage2 = S2_GPT,
83         };
84         return false;
85     }
86     return true;
87 }
88 
89 static inline uint32_t ptw_ldl(const PTETranslate *in, uint64_t ra)
90 {
91     if (likely(in->haddr)) {
92         return ldl_p(in->haddr);
93     }
94     return cpu_ldl_mmuidx_ra(in->env, in->gaddr, in->ptw_idx, ra);
95 }
96 
97 static inline uint64_t ptw_ldq(const PTETranslate *in, uint64_t ra)
98 {
99     if (likely(in->haddr)) {
100         return ldq_p(in->haddr);
101     }
102     return cpu_ldq_mmuidx_ra(in->env, in->gaddr, in->ptw_idx, ra);
103 }
104 
105 /*
106  * Note that we can use a 32-bit cmpxchg for all page table entries,
107  * even 64-bit ones, because PG_PRESENT_MASK, PG_ACCESSED_MASK and
108  * PG_DIRTY_MASK are all in the low 32 bits.
109  */
110 static bool ptw_setl_slow(const PTETranslate *in, uint32_t old, uint32_t new)
111 {
112     uint32_t cmp;
113 
114     CPUState *cpu = env_cpu(in->env);
115     /* We are in cpu_exec, and start_exclusive can't be called directly.*/
116     g_assert(cpu->running);
117     cpu_exec_end(cpu);
118     /* Does x86 really perform a rmw cycle on mmio for ptw? */
119     start_exclusive();
120     cmp = cpu_ldl_mmuidx_ra(in->env, in->gaddr, in->ptw_idx, 0);
121     if (cmp == old) {
122         cpu_stl_mmuidx_ra(in->env, in->gaddr, new, in->ptw_idx, 0);
123     }
124     end_exclusive();
125     cpu_exec_start(cpu);
126     return cmp == old;
127 }
128 
129 static inline bool ptw_setl(const PTETranslate *in, uint32_t old, uint32_t set)
130 {
131     if (set & ~old) {
132         uint32_t new = old | set;
133         if (likely(in->haddr)) {
134             old = cpu_to_le32(old);
135             new = cpu_to_le32(new);
136             return qatomic_cmpxchg((uint32_t *)in->haddr, old, new) == old;
137         }
138         return ptw_setl_slow(in, old, new);
139     }
140     return true;
141 }
142 
143 static bool mmu_translate(CPUX86State *env, const TranslateParams *in,
144                           TranslateResult *out, TranslateFault *err,
145                           uint64_t ra)
146 {
147     const target_ulong addr = in->addr;
148     const int pg_mode = in->pg_mode;
149     const bool is_user = is_mmu_index_user(in->mmu_idx);
150     const MMUAccessType access_type = in->access_type;
151     uint64_t ptep, pte, rsvd_mask;
152     PTETranslate pte_trans = {
153         .env = env,
154         .err = err,
155         .ptw_idx = in->ptw_idx,
156     };
157     hwaddr pte_addr, paddr;
158     uint32_t pkr;
159     int page_size;
160     int error_code;
161     int prot;
162 
163  restart_all:
164     rsvd_mask = ~MAKE_64BIT_MASK(0, env_archcpu(env)->phys_bits);
165     rsvd_mask &= PG_ADDRESS_MASK;
166     if (!(pg_mode & PG_MODE_NXE)) {
167         rsvd_mask |= PG_NX_MASK;
168     }
169 
170     if (pg_mode & PG_MODE_PAE) {
171 #ifdef TARGET_X86_64
172         if (pg_mode & PG_MODE_LMA) {
173             if (pg_mode & PG_MODE_LA57) {
174                 /*
175                  * Page table level 5
176                  */
177                 pte_addr = (in->cr3 & ~0xfff) + (((addr >> 48) & 0x1ff) << 3);
178                 if (!ptw_translate(&pte_trans, pte_addr)) {
179                     return false;
180                 }
181             restart_5:
182                 pte = ptw_ldq(&pte_trans, ra);
183                 if (!(pte & PG_PRESENT_MASK)) {
184                     goto do_fault;
185                 }
186                 if (pte & (rsvd_mask | PG_PSE_MASK)) {
187                     goto do_fault_rsvd;
188                 }
189                 if (!ptw_setl(&pte_trans, pte, PG_ACCESSED_MASK)) {
190                     goto restart_5;
191                 }
192                 ptep = pte ^ PG_NX_MASK;
193             } else {
194                 pte = in->cr3;
195                 ptep = PG_NX_MASK | PG_USER_MASK | PG_RW_MASK;
196             }
197 
198             /*
199              * Page table level 4
200              */
201             pte_addr = (pte & PG_ADDRESS_MASK) + (((addr >> 39) & 0x1ff) << 3);
202             if (!ptw_translate(&pte_trans, pte_addr)) {
203                 return false;
204             }
205         restart_4:
206             pte = ptw_ldq(&pte_trans, ra);
207             if (!(pte & PG_PRESENT_MASK)) {
208                 goto do_fault;
209             }
210             if (pte & (rsvd_mask | PG_PSE_MASK)) {
211                 goto do_fault_rsvd;
212             }
213             if (!ptw_setl(&pte_trans, pte, PG_ACCESSED_MASK)) {
214                 goto restart_4;
215             }
216             ptep &= pte ^ PG_NX_MASK;
217 
218             /*
219              * Page table level 3
220              */
221             pte_addr = (pte & PG_ADDRESS_MASK) + (((addr >> 30) & 0x1ff) << 3);
222             if (!ptw_translate(&pte_trans, pte_addr)) {
223                 return false;
224             }
225         restart_3_lma:
226             pte = ptw_ldq(&pte_trans, ra);
227             if (!(pte & PG_PRESENT_MASK)) {
228                 goto do_fault;
229             }
230             if (pte & rsvd_mask) {
231                 goto do_fault_rsvd;
232             }
233             if (!ptw_setl(&pte_trans, pte, PG_ACCESSED_MASK)) {
234                 goto restart_3_lma;
235             }
236             ptep &= pte ^ PG_NX_MASK;
237             if (pte & PG_PSE_MASK) {
238                 /* 1 GB page */
239                 page_size = 1024 * 1024 * 1024;
240                 goto do_check_protect;
241             }
242         } else
243 #endif
244         {
245             /*
246              * Page table level 3
247              */
248             pte_addr = (in->cr3 & 0xffffffe0ULL) + ((addr >> 27) & 0x18);
249             if (!ptw_translate(&pte_trans, pte_addr)) {
250                 return false;
251             }
252             rsvd_mask |= PG_HI_USER_MASK;
253         restart_3_nolma:
254             pte = ptw_ldq(&pte_trans, ra);
255             if (!(pte & PG_PRESENT_MASK)) {
256                 goto do_fault;
257             }
258             if (pte & (rsvd_mask | PG_NX_MASK)) {
259                 goto do_fault_rsvd;
260             }
261             if (!ptw_setl(&pte_trans, pte, PG_ACCESSED_MASK)) {
262                 goto restart_3_nolma;
263             }
264             ptep = PG_NX_MASK | PG_USER_MASK | PG_RW_MASK;
265         }
266 
267         /*
268          * Page table level 2
269          */
270         pte_addr = (pte & PG_ADDRESS_MASK) + (((addr >> 21) & 0x1ff) << 3);
271         if (!ptw_translate(&pte_trans, pte_addr)) {
272             return false;
273         }
274     restart_2_pae:
275         pte = ptw_ldq(&pte_trans, ra);
276         if (!(pte & PG_PRESENT_MASK)) {
277             goto do_fault;
278         }
279         if (pte & rsvd_mask) {
280             goto do_fault_rsvd;
281         }
282         if (pte & PG_PSE_MASK) {
283             /* 2 MB page */
284             page_size = 2048 * 1024;
285             ptep &= pte ^ PG_NX_MASK;
286             goto do_check_protect;
287         }
288         if (!ptw_setl(&pte_trans, pte, PG_ACCESSED_MASK)) {
289             goto restart_2_pae;
290         }
291         ptep &= pte ^ PG_NX_MASK;
292 
293         /*
294          * Page table level 1
295          */
296         pte_addr = (pte & PG_ADDRESS_MASK) + (((addr >> 12) & 0x1ff) << 3);
297         if (!ptw_translate(&pte_trans, pte_addr)) {
298             return false;
299         }
300         pte = ptw_ldq(&pte_trans, ra);
301         if (!(pte & PG_PRESENT_MASK)) {
302             goto do_fault;
303         }
304         if (pte & rsvd_mask) {
305             goto do_fault_rsvd;
306         }
307         /* combine pde and pte nx, user and rw protections */
308         ptep &= pte ^ PG_NX_MASK;
309         page_size = 4096;
310     } else if (pg_mode & PG_MODE_PG) {
311         /*
312          * Page table level 2
313          */
314         pte_addr = (in->cr3 & 0xfffff000ULL) + ((addr >> 20) & 0xffc);
315         if (!ptw_translate(&pte_trans, pte_addr)) {
316             return false;
317         }
318     restart_2_nopae:
319         pte = ptw_ldl(&pte_trans, ra);
320         if (!(pte & PG_PRESENT_MASK)) {
321             goto do_fault;
322         }
323         ptep = pte | PG_NX_MASK;
324 
325         /* if PSE bit is set, then we use a 4MB page */
326         if ((pte & PG_PSE_MASK) && (pg_mode & PG_MODE_PSE)) {
327             page_size = 4096 * 1024;
328             /*
329              * Bits 20-13 provide bits 39-32 of the address, bit 21 is reserved.
330              * Leave bits 20-13 in place for setting accessed/dirty bits below.
331              */
332             pte = (uint32_t)pte | ((pte & 0x1fe000LL) << (32 - 13));
333             rsvd_mask = 0x200000;
334             goto do_check_protect_pse36;
335         }
336         if (!ptw_setl(&pte_trans, pte, PG_ACCESSED_MASK)) {
337             goto restart_2_nopae;
338         }
339 
340         /*
341          * Page table level 1
342          */
343         pte_addr = (pte & ~0xfffu) + ((addr >> 10) & 0xffc);
344         if (!ptw_translate(&pte_trans, pte_addr)) {
345             return false;
346         }
347         pte = ptw_ldl(&pte_trans, ra);
348         if (!(pte & PG_PRESENT_MASK)) {
349             goto do_fault;
350         }
351         /* combine pde and pte user and rw protections */
352         ptep &= pte | PG_NX_MASK;
353         page_size = 4096;
354         rsvd_mask = 0;
355     } else {
356         /*
357          * No paging (real mode), let's tentatively resolve the address as 1:1
358          * here, but conditionally still perform an NPT walk on it later.
359          */
360         page_size = 0x40000000;
361         paddr = in->addr;
362         prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
363         goto stage2;
364     }
365 
366 do_check_protect:
367     rsvd_mask |= (page_size - 1) & PG_ADDRESS_MASK & ~PG_PSE_PAT_MASK;
368 do_check_protect_pse36:
369     if (pte & rsvd_mask) {
370         goto do_fault_rsvd;
371     }
372     ptep ^= PG_NX_MASK;
373 
374     /* can the page can be put in the TLB?  prot will tell us */
375     if (is_user && !(ptep & PG_USER_MASK)) {
376         goto do_fault_protect;
377     }
378 
379     prot = 0;
380     if (!is_mmu_index_smap(in->mmu_idx) || !(ptep & PG_USER_MASK)) {
381         prot |= PAGE_READ;
382         if ((ptep & PG_RW_MASK) || !(is_user || (pg_mode & PG_MODE_WP))) {
383             prot |= PAGE_WRITE;
384         }
385     }
386     if (!(ptep & PG_NX_MASK) &&
387         (is_user ||
388          !((pg_mode & PG_MODE_SMEP) && (ptep & PG_USER_MASK)))) {
389         prot |= PAGE_EXEC;
390     }
391 
392     if (ptep & PG_USER_MASK) {
393         pkr = pg_mode & PG_MODE_PKE ? env->pkru : 0;
394     } else {
395         pkr = pg_mode & PG_MODE_PKS ? env->pkrs : 0;
396     }
397     if (pkr) {
398         uint32_t pk = (pte & PG_PKRU_MASK) >> PG_PKRU_BIT;
399         uint32_t pkr_ad = (pkr >> pk * 2) & 1;
400         uint32_t pkr_wd = (pkr >> pk * 2) & 2;
401         uint32_t pkr_prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
402 
403         if (pkr_ad) {
404             pkr_prot &= ~(PAGE_READ | PAGE_WRITE);
405         } else if (pkr_wd && (is_user || (pg_mode & PG_MODE_WP))) {
406             pkr_prot &= ~PAGE_WRITE;
407         }
408         if ((pkr_prot & (1 << access_type)) == 0) {
409             goto do_fault_pk_protect;
410         }
411         prot &= pkr_prot;
412     }
413 
414     if ((prot & (1 << access_type)) == 0) {
415         goto do_fault_protect;
416     }
417 
418     /* yes, it can! */
419     {
420         uint32_t set = PG_ACCESSED_MASK;
421         if (access_type == MMU_DATA_STORE) {
422             set |= PG_DIRTY_MASK;
423         } else if (!(pte & PG_DIRTY_MASK)) {
424             /*
425              * Only set write access if already dirty...
426              * otherwise wait for dirty access.
427              */
428             prot &= ~PAGE_WRITE;
429         }
430         if (!ptw_setl(&pte_trans, pte, set)) {
431             /*
432              * We can arrive here from any of 3 levels and 2 formats.
433              * The only safe thing is to restart the entire lookup.
434              */
435             goto restart_all;
436         }
437     }
438 
439     /* merge offset within page */
440     paddr = (pte & PG_ADDRESS_MASK & ~(page_size - 1)) | (addr & (page_size - 1));
441  stage2:
442 
443     /*
444      * Note that NPT is walked (for both paging structures and final guest
445      * addresses) using the address with the A20 bit set.
446      */
447     if (in->ptw_idx == MMU_NESTED_IDX) {
448         CPUTLBEntryFull *full;
449         int flags, nested_page_size;
450 
451         flags = probe_access_full_mmu(env, paddr, 0, access_type,
452                                       MMU_NESTED_IDX, &pte_trans.haddr, &full);
453         if (unlikely(flags & TLB_INVALID_MASK)) {
454             *err = (TranslateFault){
455                 .error_code = env->error_code,
456                 .cr2 = paddr,
457                 .stage2 = S2_GPA,
458             };
459             return false;
460         }
461 
462         /* Merge stage1 & stage2 protection bits. */
463         prot &= full->prot;
464 
465         /* Re-verify resulting protection. */
466         if ((prot & (1 << access_type)) == 0) {
467             goto do_fault_protect;
468         }
469 
470         /* Merge stage1 & stage2 addresses to final physical address. */
471         nested_page_size = 1 << full->lg_page_size;
472         paddr = (full->phys_addr & ~(nested_page_size - 1))
473               | (paddr & (nested_page_size - 1));
474 
475         /*
476          * Use the larger of stage1 & stage2 page sizes, so that
477          * invalidation works.
478          */
479         if (nested_page_size > page_size) {
480             page_size = nested_page_size;
481         }
482     }
483 
484     out->paddr = paddr & x86_get_a20_mask(env);
485     out->prot = prot;
486     out->page_size = page_size;
487     return true;
488 
489  do_fault_rsvd:
490     error_code = PG_ERROR_RSVD_MASK;
491     goto do_fault_cont;
492  do_fault_protect:
493     error_code = PG_ERROR_P_MASK;
494     goto do_fault_cont;
495  do_fault_pk_protect:
496     assert(access_type != MMU_INST_FETCH);
497     error_code = PG_ERROR_PK_MASK | PG_ERROR_P_MASK;
498     goto do_fault_cont;
499  do_fault:
500     error_code = 0;
501  do_fault_cont:
502     if (is_user) {
503         error_code |= PG_ERROR_U_MASK;
504     }
505     switch (access_type) {
506     case MMU_DATA_LOAD:
507         break;
508     case MMU_DATA_STORE:
509         error_code |= PG_ERROR_W_MASK;
510         break;
511     case MMU_INST_FETCH:
512         if (pg_mode & (PG_MODE_NXE | PG_MODE_SMEP)) {
513             error_code |= PG_ERROR_I_D_MASK;
514         }
515         break;
516     }
517     *err = (TranslateFault){
518         .exception_index = EXCP0E_PAGE,
519         .error_code = error_code,
520         .cr2 = addr,
521     };
522     return false;
523 }
524 
525 static G_NORETURN void raise_stage2(CPUX86State *env, TranslateFault *err,
526                                     uintptr_t retaddr)
527 {
528     uint64_t exit_info_1 = err->error_code;
529 
530     switch (err->stage2) {
531     case S2_GPT:
532         exit_info_1 |= SVM_NPTEXIT_GPT;
533         break;
534     case S2_GPA:
535         exit_info_1 |= SVM_NPTEXIT_GPA;
536         break;
537     default:
538         g_assert_not_reached();
539     }
540 
541     x86_stq_phys(env_cpu(env),
542                  env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2),
543                  err->cr2);
544     cpu_vmexit(env, SVM_EXIT_NPF, exit_info_1, retaddr);
545 }
546 
547 static bool get_physical_address(CPUX86State *env, vaddr addr,
548                                  MMUAccessType access_type, int mmu_idx,
549                                  TranslateResult *out, TranslateFault *err,
550                                  uint64_t ra)
551 {
552     TranslateParams in;
553     bool use_stage2 = env->hflags2 & HF2_NPT_MASK;
554 
555     in.addr = addr;
556     in.access_type = access_type;
557 
558     switch (mmu_idx) {
559     case MMU_PHYS_IDX:
560         break;
561 
562     case MMU_NESTED_IDX:
563         if (likely(use_stage2)) {
564             in.cr3 = env->nested_cr3;
565             in.pg_mode = env->nested_pg_mode;
566             in.mmu_idx =
567                 env->nested_pg_mode & PG_MODE_LMA ? MMU_USER64_IDX : MMU_USER32_IDX;
568             in.ptw_idx = MMU_PHYS_IDX;
569 
570             if (!mmu_translate(env, &in, out, err, ra)) {
571                 err->stage2 = S2_GPA;
572                 return false;
573             }
574             return true;
575         }
576         break;
577 
578     default:
579         if (is_mmu_index_32(mmu_idx)) {
580             addr = (uint32_t)addr;
581         }
582 
583         if (likely(env->cr[0] & CR0_PG_MASK || use_stage2)) {
584             in.cr3 = env->cr[3];
585             in.mmu_idx = mmu_idx;
586             in.ptw_idx = use_stage2 ? MMU_NESTED_IDX : MMU_PHYS_IDX;
587             in.pg_mode = get_pg_mode(env);
588 
589             if (in.pg_mode & PG_MODE_LMA) {
590                 /* test virtual address sign extension */
591                 int shift = in.pg_mode & PG_MODE_LA57 ? 56 : 47;
592                 int64_t sext = (int64_t)addr >> shift;
593                 if (sext != 0 && sext != -1) {
594                     *err = (TranslateFault){
595                         .exception_index = EXCP0D_GPF,
596                         .cr2 = addr,
597                     };
598                     return false;
599                 }
600             }
601             return mmu_translate(env, &in, out, err, ra);
602         }
603         break;
604     }
605 
606     /* No translation needed. */
607     out->paddr = addr & x86_get_a20_mask(env);
608     out->prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
609     out->page_size = TARGET_PAGE_SIZE;
610     return true;
611 }
612 
613 bool x86_cpu_tlb_fill(CPUState *cs, vaddr addr, int size,
614                       MMUAccessType access_type, int mmu_idx,
615                       bool probe, uintptr_t retaddr)
616 {
617     CPUX86State *env = cpu_env(cs);
618     TranslateResult out;
619     TranslateFault err;
620 
621     if (get_physical_address(env, addr, access_type, mmu_idx, &out, &err,
622                              retaddr)) {
623         /*
624          * Even if 4MB pages, we map only one 4KB page in the cache to
625          * avoid filling it too fast.
626          */
627         assert(out.prot & (1 << access_type));
628         tlb_set_page_with_attrs(cs, addr & TARGET_PAGE_MASK,
629                                 out.paddr & TARGET_PAGE_MASK,
630                                 cpu_get_mem_attrs(env),
631                                 out.prot, mmu_idx, out.page_size);
632         return true;
633     }
634 
635     if (probe) {
636         /* This will be used if recursing for stage2 translation. */
637         env->error_code = err.error_code;
638         return false;
639     }
640 
641     if (err.stage2 != S2_NONE) {
642         raise_stage2(env, &err, retaddr);
643     }
644 
645     if (env->intercept_exceptions & (1 << err.exception_index)) {
646         /* cr2 is not modified in case of exceptions */
647         x86_stq_phys(cs, env->vm_vmcb +
648                      offsetof(struct vmcb, control.exit_info_2),
649                      err.cr2);
650     } else {
651         env->cr[2] = err.cr2;
652     }
653     raise_exception_err_ra(env, err.exception_index, err.error_code, retaddr);
654 }
655 
656 G_NORETURN void x86_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr,
657                                             MMUAccessType access_type,
658                                             int mmu_idx, uintptr_t retaddr)
659 {
660     X86CPU *cpu = X86_CPU(cs);
661     handle_unaligned_access(&cpu->env, vaddr, access_type, retaddr);
662 }
663