xref: /openbmc/qemu/target/s390x/mmu_helper.c (revision b43047a2)
1 /*
2  * S390x MMU related functions
3  *
4  * Copyright (c) 2011 Alexander Graf
5  * Copyright (c) 2015 Thomas Huth, IBM Corporation
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  */
17 
18 #include "qemu/osdep.h"
19 #include "qemu/error-report.h"
20 #include "exec/address-spaces.h"
21 #include "cpu.h"
22 #include "internal.h"
23 #include "kvm_s390x.h"
24 #include "sysemu/kvm.h"
25 #include "sysemu/tcg.h"
26 #include "exec/exec-all.h"
27 #include "trace.h"
28 #include "hw/hw.h"
29 #include "hw/s390x/storage-keys.h"
30 
31 /* #define DEBUG_S390 */
32 /* #define DEBUG_S390_PTE */
33 /* #define DEBUG_S390_STDOUT */
34 
35 #ifdef DEBUG_S390
36 #ifdef DEBUG_S390_STDOUT
37 #define DPRINTF(fmt, ...) \
38     do { fprintf(stderr, fmt, ## __VA_ARGS__); \
39          if (qemu_log_separate()) qemu_log(fmt, ##__VA_ARGS__); } while (0)
40 #else
41 #define DPRINTF(fmt, ...) \
42     do { qemu_log(fmt, ## __VA_ARGS__); } while (0)
43 #endif
44 #else
45 #define DPRINTF(fmt, ...) \
46     do { } while (0)
47 #endif
48 
49 #ifdef DEBUG_S390_PTE
50 #define PTE_DPRINTF DPRINTF
51 #else
52 #define PTE_DPRINTF(fmt, ...) \
53     do { } while (0)
54 #endif
55 
56 /* Fetch/store bits in the translation exception code: */
57 #define FS_READ  0x800
58 #define FS_WRITE 0x400
59 
60 static void trigger_access_exception(CPUS390XState *env, uint32_t type,
61                                      uint32_t ilen, uint64_t tec)
62 {
63     S390CPU *cpu = env_archcpu(env);
64 
65     if (kvm_enabled()) {
66         kvm_s390_access_exception(cpu, type, tec);
67     } else {
68         CPUState *cs = env_cpu(env);
69         if (type != PGM_ADDRESSING) {
70             stq_phys(cs->as, env->psa + offsetof(LowCore, trans_exc_code), tec);
71         }
72         trigger_pgm_exception(env, type, ilen);
73     }
74 }
75 
76 static void trigger_prot_fault(CPUS390XState *env, target_ulong vaddr,
77                                uint64_t asc, int rw, bool exc)
78 {
79     uint64_t tec;
80 
81     tec = vaddr | (rw == MMU_DATA_STORE ? FS_WRITE : FS_READ) | 4 | asc >> 46;
82 
83     DPRINTF("%s: trans_exc_code=%016" PRIx64 "\n", __func__, tec);
84 
85     if (!exc) {
86         return;
87     }
88 
89     trigger_access_exception(env, PGM_PROTECTION, ILEN_AUTO, tec);
90 }
91 
92 static void trigger_page_fault(CPUS390XState *env, target_ulong vaddr,
93                                uint32_t type, uint64_t asc, int rw, bool exc)
94 {
95     int ilen = ILEN_AUTO;
96     uint64_t tec;
97 
98     tec = vaddr | (rw == MMU_DATA_STORE ? FS_WRITE : FS_READ) | asc >> 46;
99 
100     DPRINTF("%s: trans_exc_code=%016" PRIx64 "\n", __func__, tec);
101 
102     if (!exc) {
103         return;
104     }
105 
106     /* Code accesses have an undefined ilc.  */
107     if (rw == MMU_INST_FETCH) {
108         ilen = 2;
109     }
110 
111     trigger_access_exception(env, type, ilen, tec);
112 }
113 
114 /* check whether the address would be proteted by Low-Address Protection */
115 static bool is_low_address(uint64_t addr)
116 {
117     return addr <= 511 || (addr >= 4096 && addr <= 4607);
118 }
119 
120 /* check whether Low-Address Protection is enabled for mmu_translate() */
121 static bool lowprot_enabled(const CPUS390XState *env, uint64_t asc)
122 {
123     if (!(env->cregs[0] & CR0_LOWPROT)) {
124         return false;
125     }
126     if (!(env->psw.mask & PSW_MASK_DAT)) {
127         return true;
128     }
129 
130     /* Check the private-space control bit */
131     switch (asc) {
132     case PSW_ASC_PRIMARY:
133         return !(env->cregs[1] & ASCE_PRIVATE_SPACE);
134     case PSW_ASC_SECONDARY:
135         return !(env->cregs[7] & ASCE_PRIVATE_SPACE);
136     case PSW_ASC_HOME:
137         return !(env->cregs[13] & ASCE_PRIVATE_SPACE);
138     default:
139         /* We don't support access register mode */
140         error_report("unsupported addressing mode");
141         exit(1);
142     }
143 }
144 
145 /**
146  * Translate real address to absolute (= physical)
147  * address by taking care of the prefix mapping.
148  */
149 target_ulong mmu_real2abs(CPUS390XState *env, target_ulong raddr)
150 {
151     if (raddr < 0x2000) {
152         return raddr + env->psa;    /* Map the lowcore. */
153     } else if (raddr >= env->psa && raddr < env->psa + 0x2000) {
154         return raddr - env->psa;    /* Map the 0 page. */
155     }
156     return raddr;
157 }
158 
159 /* Decode page table entry (normal 4KB page) */
160 static int mmu_translate_pte(CPUS390XState *env, target_ulong vaddr,
161                              uint64_t asc, uint64_t pt_entry,
162                              target_ulong *raddr, int *flags, int rw, bool exc)
163 {
164     if (pt_entry & PAGE_INVALID) {
165         DPRINTF("%s: PTE=0x%" PRIx64 " invalid\n", __func__, pt_entry);
166         trigger_page_fault(env, vaddr, PGM_PAGE_TRANS, asc, rw, exc);
167         return -1;
168     }
169     if (pt_entry & PAGE_RES0) {
170         trigger_page_fault(env, vaddr, PGM_TRANS_SPEC, asc, rw, exc);
171         return -1;
172     }
173     if (pt_entry & PAGE_RO) {
174         *flags &= ~PAGE_WRITE;
175     }
176 
177     *raddr = pt_entry & ASCE_ORIGIN;
178 
179     PTE_DPRINTF("%s: PTE=0x%" PRIx64 "\n", __func__, pt_entry);
180 
181     return 0;
182 }
183 
184 /* Decode segment table entry */
185 static int mmu_translate_segment(CPUS390XState *env, target_ulong vaddr,
186                                  uint64_t asc, uint64_t st_entry,
187                                  target_ulong *raddr, int *flags, int rw,
188                                  bool exc)
189 {
190     CPUState *cs = env_cpu(env);
191     uint64_t origin, offs, pt_entry;
192 
193     if (st_entry & SEGMENT_ENTRY_RO) {
194         *flags &= ~PAGE_WRITE;
195     }
196 
197     if ((st_entry & SEGMENT_ENTRY_FC) && (env->cregs[0] & CR0_EDAT)) {
198         /* Decode EDAT1 segment frame absolute address (1MB page) */
199         *raddr = (st_entry & 0xfffffffffff00000ULL) | (vaddr & 0xfffff);
200         PTE_DPRINTF("%s: SEG=0x%" PRIx64 "\n", __func__, st_entry);
201         return 0;
202     }
203 
204     /* Look up 4KB page entry */
205     origin = st_entry & SEGMENT_ENTRY_ORIGIN;
206     offs  = (vaddr & VADDR_PX) >> 9;
207     pt_entry = ldq_phys(cs->as, origin + offs);
208     PTE_DPRINTF("%s: 0x%" PRIx64 " + 0x%" PRIx64 " => 0x%016" PRIx64 "\n",
209                 __func__, origin, offs, pt_entry);
210     return mmu_translate_pte(env, vaddr, asc, pt_entry, raddr, flags, rw, exc);
211 }
212 
213 /* Decode region table entries */
214 static int mmu_translate_region(CPUS390XState *env, target_ulong vaddr,
215                                 uint64_t asc, uint64_t entry, int level,
216                                 target_ulong *raddr, int *flags, int rw,
217                                 bool exc)
218 {
219     CPUState *cs = env_cpu(env);
220     uint64_t origin, offs, new_entry;
221     const int pchks[4] = {
222         PGM_SEGMENT_TRANS, PGM_REG_THIRD_TRANS,
223         PGM_REG_SEC_TRANS, PGM_REG_FIRST_TRANS
224     };
225 
226     PTE_DPRINTF("%s: 0x%" PRIx64 "\n", __func__, entry);
227 
228     origin = entry & REGION_ENTRY_ORIGIN;
229     offs = (vaddr >> (17 + 11 * level / 4)) & 0x3ff8;
230 
231     new_entry = ldq_phys(cs->as, origin + offs);
232     PTE_DPRINTF("%s: 0x%" PRIx64 " + 0x%" PRIx64 " => 0x%016" PRIx64 "\n",
233                 __func__, origin, offs, new_entry);
234 
235     if ((new_entry & REGION_ENTRY_INV) != 0) {
236         DPRINTF("%s: invalid region\n", __func__);
237         trigger_page_fault(env, vaddr, pchks[level / 4], asc, rw, exc);
238         return -1;
239     }
240 
241     if ((new_entry & REGION_ENTRY_TYPE_MASK) != level) {
242         trigger_page_fault(env, vaddr, PGM_TRANS_SPEC, asc, rw, exc);
243         return -1;
244     }
245 
246     if (level == ASCE_TYPE_SEGMENT) {
247         return mmu_translate_segment(env, vaddr, asc, new_entry, raddr, flags,
248                                      rw, exc);
249     }
250 
251     /* Check region table offset and length */
252     offs = (vaddr >> (28 + 11 * (level - 4) / 4)) & 3;
253     if (offs < ((new_entry & REGION_ENTRY_TF) >> 6)
254         || offs > (new_entry & REGION_ENTRY_LENGTH)) {
255         DPRINTF("%s: invalid offset or len (%lx)\n", __func__, new_entry);
256         trigger_page_fault(env, vaddr, pchks[level / 4 - 1], asc, rw, exc);
257         return -1;
258     }
259 
260     if ((env->cregs[0] & CR0_EDAT) && (new_entry & REGION_ENTRY_RO)) {
261         *flags &= ~PAGE_WRITE;
262     }
263 
264     /* yet another region */
265     return mmu_translate_region(env, vaddr, asc, new_entry, level - 4,
266                                 raddr, flags, rw, exc);
267 }
268 
269 static int mmu_translate_asce(CPUS390XState *env, target_ulong vaddr,
270                               uint64_t asc, uint64_t asce, target_ulong *raddr,
271                               int *flags, int rw, bool exc)
272 {
273     int level;
274     int r;
275 
276     if (asce & ASCE_REAL_SPACE) {
277         /* direct mapping */
278         *raddr = vaddr;
279         return 0;
280     }
281 
282     level = asce & ASCE_TYPE_MASK;
283     switch (level) {
284     case ASCE_TYPE_REGION1:
285         if ((vaddr >> 62) > (asce & ASCE_TABLE_LENGTH)) {
286             trigger_page_fault(env, vaddr, PGM_REG_FIRST_TRANS, asc, rw, exc);
287             return -1;
288         }
289         break;
290     case ASCE_TYPE_REGION2:
291         if (vaddr & 0xffe0000000000000ULL) {
292             DPRINTF("%s: vaddr doesn't fit 0x%16" PRIx64
293                     " 0xffe0000000000000ULL\n", __func__, vaddr);
294             trigger_page_fault(env, vaddr, PGM_ASCE_TYPE, asc, rw, exc);
295             return -1;
296         }
297         if ((vaddr >> 51 & 3) > (asce & ASCE_TABLE_LENGTH)) {
298             trigger_page_fault(env, vaddr, PGM_REG_SEC_TRANS, asc, rw, exc);
299             return -1;
300         }
301         break;
302     case ASCE_TYPE_REGION3:
303         if (vaddr & 0xfffffc0000000000ULL) {
304             DPRINTF("%s: vaddr doesn't fit 0x%16" PRIx64
305                     " 0xfffffc0000000000ULL\n", __func__, vaddr);
306             trigger_page_fault(env, vaddr, PGM_ASCE_TYPE, asc, rw, exc);
307             return -1;
308         }
309         if ((vaddr >> 40 & 3) > (asce & ASCE_TABLE_LENGTH)) {
310             trigger_page_fault(env, vaddr, PGM_REG_THIRD_TRANS, asc, rw, exc);
311             return -1;
312         }
313         break;
314     case ASCE_TYPE_SEGMENT:
315         if (vaddr & 0xffffffff80000000ULL) {
316             DPRINTF("%s: vaddr doesn't fit 0x%16" PRIx64
317                     " 0xffffffff80000000ULL\n", __func__, vaddr);
318             trigger_page_fault(env, vaddr, PGM_ASCE_TYPE, asc, rw, exc);
319             return -1;
320         }
321         if ((vaddr >> 29 & 3) > (asce & ASCE_TABLE_LENGTH)) {
322             trigger_page_fault(env, vaddr, PGM_SEGMENT_TRANS, asc, rw, exc);
323             return -1;
324         }
325         break;
326     }
327 
328     r = mmu_translate_region(env, vaddr, asc, asce, level, raddr, flags, rw,
329                              exc);
330     if (!r && rw == MMU_DATA_STORE && !(*flags & PAGE_WRITE)) {
331         trigger_prot_fault(env, vaddr, asc, rw, exc);
332         return -1;
333     }
334 
335     return r;
336 }
337 
338 static void mmu_handle_skey(target_ulong addr, int rw, int *flags)
339 {
340     static S390SKeysClass *skeyclass;
341     static S390SKeysState *ss;
342     uint8_t key;
343     int rc;
344 
345     if (unlikely(addr >= ram_size)) {
346         return;
347     }
348 
349     if (unlikely(!ss)) {
350         ss = s390_get_skeys_device();
351         skeyclass = S390_SKEYS_GET_CLASS(ss);
352     }
353 
354     /*
355      * Whenever we create a new TLB entry, we set the storage key reference
356      * bit. In case we allow write accesses, we set the storage key change
357      * bit. Whenever the guest changes the storage key, we have to flush the
358      * TLBs of all CPUs (the whole TLB or all affected entries), so that the
359      * next reference/change will result in an MMU fault and make us properly
360      * update the storage key here.
361      *
362      * Note 1: "record of references ... is not necessarily accurate",
363      *         "change bit may be set in case no storing has occurred".
364      *         -> We can set reference/change bits even on exceptions.
365      * Note 2: certain accesses seem to ignore storage keys. For example,
366      *         DAT translation does not set reference bits for table accesses.
367      *
368      * TODO: key-controlled protection. Only CPU accesses make use of the
369      *       PSW key. CSS accesses are different - we have to pass in the key.
370      *
371      * TODO: we have races between getting and setting the key.
372      */
373     rc = skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
374     if (rc) {
375         trace_get_skeys_nonzero(rc);
376         return;
377     }
378 
379     switch (rw) {
380     case MMU_DATA_LOAD:
381     case MMU_INST_FETCH:
382         /*
383          * The TLB entry has to remain write-protected on read-faults if
384          * the storage key does not indicate a change already. Otherwise
385          * we might miss setting the change bit on write accesses.
386          */
387         if (!(key & SK_C)) {
388             *flags &= ~PAGE_WRITE;
389         }
390         break;
391     case MMU_DATA_STORE:
392         key |= SK_C;
393         break;
394     default:
395         g_assert_not_reached();
396     }
397 
398     /* Any store/fetch sets the reference bit */
399     key |= SK_R;
400 
401     rc = skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
402     if (rc) {
403         trace_set_skeys_nonzero(rc);
404     }
405 }
406 
407 /**
408  * Translate a virtual (logical) address into a physical (absolute) address.
409  * @param vaddr  the virtual address
410  * @param rw     0 = read, 1 = write, 2 = code fetch
411  * @param asc    address space control (one of the PSW_ASC_* modes)
412  * @param raddr  the translated address is stored to this pointer
413  * @param flags  the PAGE_READ/WRITE/EXEC flags are stored to this pointer
414  * @param exc    true = inject a program check if a fault occurred
415  * @return       0 if the translation was successful, -1 if a fault occurred
416  */
417 int mmu_translate(CPUS390XState *env, target_ulong vaddr, int rw, uint64_t asc,
418                   target_ulong *raddr, int *flags, bool exc)
419 {
420     uint64_t asce;
421     int r;
422 
423 
424     *flags = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
425     if (is_low_address(vaddr & TARGET_PAGE_MASK) && lowprot_enabled(env, asc)) {
426         /*
427          * If any part of this page is currently protected, make sure the
428          * TLB entry will not be reused.
429          *
430          * As the protected range is always the first 512 bytes of the
431          * two first pages, we are able to catch all writes to these areas
432          * just by looking at the start address (triggering the tlb miss).
433          */
434         *flags |= PAGE_WRITE_INV;
435         if (is_low_address(vaddr) && rw == MMU_DATA_STORE) {
436             if (exc) {
437                 trigger_access_exception(env, PGM_PROTECTION, ILEN_AUTO, 0);
438             }
439             return -EACCES;
440         }
441     }
442 
443     vaddr &= TARGET_PAGE_MASK;
444 
445     if (!(env->psw.mask & PSW_MASK_DAT)) {
446         *raddr = vaddr;
447         goto nodat;
448     }
449 
450     switch (asc) {
451     case PSW_ASC_PRIMARY:
452         PTE_DPRINTF("%s: asc=primary\n", __func__);
453         asce = env->cregs[1];
454         break;
455     case PSW_ASC_HOME:
456         PTE_DPRINTF("%s: asc=home\n", __func__);
457         asce = env->cregs[13];
458         break;
459     case PSW_ASC_SECONDARY:
460         PTE_DPRINTF("%s: asc=secondary\n", __func__);
461         asce = env->cregs[7];
462         break;
463     case PSW_ASC_ACCREG:
464     default:
465         hw_error("guest switched to unknown asc mode\n");
466         break;
467     }
468 
469     /* perform the DAT translation */
470     r = mmu_translate_asce(env, vaddr, asc, asce, raddr, flags, rw, exc);
471     if (r) {
472         return r;
473     }
474 
475 nodat:
476     /* Convert real address -> absolute address */
477     *raddr = mmu_real2abs(env, *raddr);
478 
479     mmu_handle_skey(*raddr, rw, flags);
480     return 0;
481 }
482 
483 /**
484  * translate_pages: Translate a set of consecutive logical page addresses
485  * to absolute addresses. This function is used for TCG and old KVM without
486  * the MEMOP interface.
487  */
488 static int translate_pages(S390CPU *cpu, vaddr addr, int nr_pages,
489                            target_ulong *pages, bool is_write)
490 {
491     uint64_t asc = cpu->env.psw.mask & PSW_MASK_ASC;
492     CPUS390XState *env = &cpu->env;
493     int ret, i, pflags;
494 
495     for (i = 0; i < nr_pages; i++) {
496         ret = mmu_translate(env, addr, is_write, asc, &pages[i], &pflags, true);
497         if (ret) {
498             return ret;
499         }
500         if (!address_space_access_valid(&address_space_memory, pages[i],
501                                         TARGET_PAGE_SIZE, is_write,
502                                         MEMTXATTRS_UNSPECIFIED)) {
503             trigger_access_exception(env, PGM_ADDRESSING, ILEN_AUTO, 0);
504             return -EFAULT;
505         }
506         addr += TARGET_PAGE_SIZE;
507     }
508 
509     return 0;
510 }
511 
512 /**
513  * s390_cpu_virt_mem_rw:
514  * @laddr:     the logical start address
515  * @ar:        the access register number
516  * @hostbuf:   buffer in host memory. NULL = do only checks w/o copying
517  * @len:       length that should be transferred
518  * @is_write:  true = write, false = read
519  * Returns:    0 on success, non-zero if an exception occurred
520  *
521  * Copy from/to guest memory using logical addresses. Note that we inject a
522  * program interrupt in case there is an error while accessing the memory.
523  *
524  * This function will always return (also for TCG), make sure to call
525  * s390_cpu_virt_mem_handle_exc() to properly exit the CPU loop.
526  */
527 int s390_cpu_virt_mem_rw(S390CPU *cpu, vaddr laddr, uint8_t ar, void *hostbuf,
528                          int len, bool is_write)
529 {
530     int currlen, nr_pages, i;
531     target_ulong *pages;
532     int ret;
533 
534     if (kvm_enabled()) {
535         ret = kvm_s390_mem_op(cpu, laddr, ar, hostbuf, len, is_write);
536         if (ret >= 0) {
537             return ret;
538         }
539     }
540 
541     nr_pages = (((laddr & ~TARGET_PAGE_MASK) + len - 1) >> TARGET_PAGE_BITS)
542                + 1;
543     pages = g_malloc(nr_pages * sizeof(*pages));
544 
545     ret = translate_pages(cpu, laddr, nr_pages, pages, is_write);
546     if (ret == 0 && hostbuf != NULL) {
547         /* Copy data by stepping through the area page by page */
548         for (i = 0; i < nr_pages; i++) {
549             currlen = MIN(len, TARGET_PAGE_SIZE - (laddr % TARGET_PAGE_SIZE));
550             cpu_physical_memory_rw(pages[i] | (laddr & ~TARGET_PAGE_MASK),
551                                    hostbuf, currlen, is_write);
552             laddr += currlen;
553             hostbuf += currlen;
554             len -= currlen;
555         }
556     }
557 
558     g_free(pages);
559     return ret;
560 }
561 
562 void s390_cpu_virt_mem_handle_exc(S390CPU *cpu, uintptr_t ra)
563 {
564     /* KVM will handle the interrupt automatically, TCG has to exit the TB */
565 #ifdef CONFIG_TCG
566     if (tcg_enabled()) {
567         cpu_loop_exit_restore(CPU(cpu), ra);
568     }
569 #endif
570 }
571 
572 /**
573  * Translate a real address into a physical (absolute) address.
574  * @param raddr  the real address
575  * @param rw     0 = read, 1 = write, 2 = code fetch
576  * @param addr   the translated address is stored to this pointer
577  * @param flags  the PAGE_READ/WRITE/EXEC flags are stored to this pointer
578  * @return       0 if the translation was successful, < 0 if a fault occurred
579  */
580 int mmu_translate_real(CPUS390XState *env, target_ulong raddr, int rw,
581                        target_ulong *addr, int *flags)
582 {
583     const bool lowprot_enabled = env->cregs[0] & CR0_LOWPROT;
584 
585     *flags = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
586     if (is_low_address(raddr & TARGET_PAGE_MASK) && lowprot_enabled) {
587         /* see comment in mmu_translate() how this works */
588         *flags |= PAGE_WRITE_INV;
589         if (is_low_address(raddr) && rw == MMU_DATA_STORE) {
590             trigger_access_exception(env, PGM_PROTECTION, ILEN_AUTO, 0);
591             return -EACCES;
592         }
593     }
594 
595     *addr = mmu_real2abs(env, raddr & TARGET_PAGE_MASK);
596 
597     mmu_handle_skey(*addr, rw, flags);
598     return 0;
599 }
600