1*baa489faSSeongJae Park // SPDX-License-Identifier: GPL-2.0 2*baa489faSSeongJae Park /* 3*baa489faSSeongJae Park * Tests Memory Protection Keys (see Documentation/core-api/protection-keys.rst) 4*baa489faSSeongJae Park * 5*baa489faSSeongJae Park * There are examples in here of: 6*baa489faSSeongJae Park * * how to set protection keys on memory 7*baa489faSSeongJae Park * * how to set/clear bits in pkey registers (the rights register) 8*baa489faSSeongJae Park * * how to handle SEGV_PKUERR signals and extract pkey-relevant 9*baa489faSSeongJae Park * information from the siginfo 10*baa489faSSeongJae Park * 11*baa489faSSeongJae Park * Things to add: 12*baa489faSSeongJae Park * make sure KSM and KSM COW breaking works 13*baa489faSSeongJae Park * prefault pages in at malloc, or not 14*baa489faSSeongJae Park * protect MPX bounds tables with protection keys? 15*baa489faSSeongJae Park * make sure VMA splitting/merging is working correctly 16*baa489faSSeongJae Park * OOMs can destroy mm->mmap (see exit_mmap()), so make sure it is immune to pkeys 17*baa489faSSeongJae Park * look for pkey "leaks" where it is still set on a VMA but "freed" back to the kernel 18*baa489faSSeongJae Park * do a plain mprotect() to a mprotect_pkey() area and make sure the pkey sticks 19*baa489faSSeongJae Park * 20*baa489faSSeongJae Park * Compile like this: 21*baa489faSSeongJae Park * gcc -mxsave -o protection_keys -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm 22*baa489faSSeongJae Park * gcc -mxsave -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm 23*baa489faSSeongJae Park */ 24*baa489faSSeongJae Park #define _GNU_SOURCE 25*baa489faSSeongJae Park #define __SANE_USERSPACE_TYPES__ 26*baa489faSSeongJae Park #include <errno.h> 27*baa489faSSeongJae Park #include <linux/elf.h> 28*baa489faSSeongJae Park #include <linux/futex.h> 29*baa489faSSeongJae Park #include <time.h> 30*baa489faSSeongJae Park #include <sys/time.h> 31*baa489faSSeongJae Park #include <sys/syscall.h> 32*baa489faSSeongJae Park #include <string.h> 33*baa489faSSeongJae Park #include <stdio.h> 34*baa489faSSeongJae Park #include <stdint.h> 35*baa489faSSeongJae Park #include <stdbool.h> 36*baa489faSSeongJae Park #include <signal.h> 37*baa489faSSeongJae Park #include <assert.h> 38*baa489faSSeongJae Park #include <stdlib.h> 39*baa489faSSeongJae Park #include <ucontext.h> 40*baa489faSSeongJae Park #include <sys/mman.h> 41*baa489faSSeongJae Park #include <sys/types.h> 42*baa489faSSeongJae Park #include <sys/wait.h> 43*baa489faSSeongJae Park #include <sys/stat.h> 44*baa489faSSeongJae Park #include <fcntl.h> 45*baa489faSSeongJae Park #include <unistd.h> 46*baa489faSSeongJae Park #include <sys/ptrace.h> 47*baa489faSSeongJae Park #include <setjmp.h> 48*baa489faSSeongJae Park 49*baa489faSSeongJae Park #include "pkey-helpers.h" 50*baa489faSSeongJae Park 51*baa489faSSeongJae Park int iteration_nr = 1; 52*baa489faSSeongJae Park int test_nr; 53*baa489faSSeongJae Park 54*baa489faSSeongJae Park u64 shadow_pkey_reg; 55*baa489faSSeongJae Park int dprint_in_signal; 56*baa489faSSeongJae Park char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE]; 57*baa489faSSeongJae Park 58*baa489faSSeongJae Park void cat_into_file(char *str, char *file) 59*baa489faSSeongJae Park { 60*baa489faSSeongJae Park int fd = open(file, O_RDWR); 61*baa489faSSeongJae Park int ret; 62*baa489faSSeongJae Park 63*baa489faSSeongJae Park dprintf2("%s(): writing '%s' to '%s'\n", __func__, str, file); 64*baa489faSSeongJae Park /* 65*baa489faSSeongJae Park * these need to be raw because they are called under 66*baa489faSSeongJae Park * pkey_assert() 67*baa489faSSeongJae Park */ 68*baa489faSSeongJae Park if (fd < 0) { 69*baa489faSSeongJae Park fprintf(stderr, "error opening '%s'\n", str); 70*baa489faSSeongJae Park perror("error: "); 71*baa489faSSeongJae Park exit(__LINE__); 72*baa489faSSeongJae Park } 73*baa489faSSeongJae Park 74*baa489faSSeongJae Park ret = write(fd, str, strlen(str)); 75*baa489faSSeongJae Park if (ret != strlen(str)) { 76*baa489faSSeongJae Park perror("write to file failed"); 77*baa489faSSeongJae Park fprintf(stderr, "filename: '%s' str: '%s'\n", file, str); 78*baa489faSSeongJae Park exit(__LINE__); 79*baa489faSSeongJae Park } 80*baa489faSSeongJae Park close(fd); 81*baa489faSSeongJae Park } 82*baa489faSSeongJae Park 83*baa489faSSeongJae Park #if CONTROL_TRACING > 0 84*baa489faSSeongJae Park static int warned_tracing; 85*baa489faSSeongJae Park int tracing_root_ok(void) 86*baa489faSSeongJae Park { 87*baa489faSSeongJae Park if (geteuid() != 0) { 88*baa489faSSeongJae Park if (!warned_tracing) 89*baa489faSSeongJae Park fprintf(stderr, "WARNING: not run as root, " 90*baa489faSSeongJae Park "can not do tracing control\n"); 91*baa489faSSeongJae Park warned_tracing = 1; 92*baa489faSSeongJae Park return 0; 93*baa489faSSeongJae Park } 94*baa489faSSeongJae Park return 1; 95*baa489faSSeongJae Park } 96*baa489faSSeongJae Park #endif 97*baa489faSSeongJae Park 98*baa489faSSeongJae Park void tracing_on(void) 99*baa489faSSeongJae Park { 100*baa489faSSeongJae Park #if CONTROL_TRACING > 0 101*baa489faSSeongJae Park #define TRACEDIR "/sys/kernel/debug/tracing" 102*baa489faSSeongJae Park char pidstr[32]; 103*baa489faSSeongJae Park 104*baa489faSSeongJae Park if (!tracing_root_ok()) 105*baa489faSSeongJae Park return; 106*baa489faSSeongJae Park 107*baa489faSSeongJae Park sprintf(pidstr, "%d", getpid()); 108*baa489faSSeongJae Park cat_into_file("0", TRACEDIR "/tracing_on"); 109*baa489faSSeongJae Park cat_into_file("\n", TRACEDIR "/trace"); 110*baa489faSSeongJae Park if (1) { 111*baa489faSSeongJae Park cat_into_file("function_graph", TRACEDIR "/current_tracer"); 112*baa489faSSeongJae Park cat_into_file("1", TRACEDIR "/options/funcgraph-proc"); 113*baa489faSSeongJae Park } else { 114*baa489faSSeongJae Park cat_into_file("nop", TRACEDIR "/current_tracer"); 115*baa489faSSeongJae Park } 116*baa489faSSeongJae Park cat_into_file(pidstr, TRACEDIR "/set_ftrace_pid"); 117*baa489faSSeongJae Park cat_into_file("1", TRACEDIR "/tracing_on"); 118*baa489faSSeongJae Park dprintf1("enabled tracing\n"); 119*baa489faSSeongJae Park #endif 120*baa489faSSeongJae Park } 121*baa489faSSeongJae Park 122*baa489faSSeongJae Park void tracing_off(void) 123*baa489faSSeongJae Park { 124*baa489faSSeongJae Park #if CONTROL_TRACING > 0 125*baa489faSSeongJae Park if (!tracing_root_ok()) 126*baa489faSSeongJae Park return; 127*baa489faSSeongJae Park cat_into_file("0", "/sys/kernel/debug/tracing/tracing_on"); 128*baa489faSSeongJae Park #endif 129*baa489faSSeongJae Park } 130*baa489faSSeongJae Park 131*baa489faSSeongJae Park void abort_hooks(void) 132*baa489faSSeongJae Park { 133*baa489faSSeongJae Park fprintf(stderr, "running %s()...\n", __func__); 134*baa489faSSeongJae Park tracing_off(); 135*baa489faSSeongJae Park #ifdef SLEEP_ON_ABORT 136*baa489faSSeongJae Park sleep(SLEEP_ON_ABORT); 137*baa489faSSeongJae Park #endif 138*baa489faSSeongJae Park } 139*baa489faSSeongJae Park 140*baa489faSSeongJae Park /* 141*baa489faSSeongJae Park * This attempts to have roughly a page of instructions followed by a few 142*baa489faSSeongJae Park * instructions that do a write, and another page of instructions. That 143*baa489faSSeongJae Park * way, we are pretty sure that the write is in the second page of 144*baa489faSSeongJae Park * instructions and has at least a page of padding behind it. 145*baa489faSSeongJae Park * 146*baa489faSSeongJae Park * *That* lets us be sure to madvise() away the write instruction, which 147*baa489faSSeongJae Park * will then fault, which makes sure that the fault code handles 148*baa489faSSeongJae Park * execute-only memory properly. 149*baa489faSSeongJae Park */ 150*baa489faSSeongJae Park #ifdef __powerpc64__ 151*baa489faSSeongJae Park /* This way, both 4K and 64K alignment are maintained */ 152*baa489faSSeongJae Park __attribute__((__aligned__(65536))) 153*baa489faSSeongJae Park #else 154*baa489faSSeongJae Park __attribute__((__aligned__(PAGE_SIZE))) 155*baa489faSSeongJae Park #endif 156*baa489faSSeongJae Park void lots_o_noops_around_write(int *write_to_me) 157*baa489faSSeongJae Park { 158*baa489faSSeongJae Park dprintf3("running %s()\n", __func__); 159*baa489faSSeongJae Park __page_o_noops(); 160*baa489faSSeongJae Park /* Assume this happens in the second page of instructions: */ 161*baa489faSSeongJae Park *write_to_me = __LINE__; 162*baa489faSSeongJae Park /* pad out by another page: */ 163*baa489faSSeongJae Park __page_o_noops(); 164*baa489faSSeongJae Park dprintf3("%s() done\n", __func__); 165*baa489faSSeongJae Park } 166*baa489faSSeongJae Park 167*baa489faSSeongJae Park void dump_mem(void *dumpme, int len_bytes) 168*baa489faSSeongJae Park { 169*baa489faSSeongJae Park char *c = (void *)dumpme; 170*baa489faSSeongJae Park int i; 171*baa489faSSeongJae Park 172*baa489faSSeongJae Park for (i = 0; i < len_bytes; i += sizeof(u64)) { 173*baa489faSSeongJae Park u64 *ptr = (u64 *)(c + i); 174*baa489faSSeongJae Park dprintf1("dump[%03d][@%p]: %016llx\n", i, ptr, *ptr); 175*baa489faSSeongJae Park } 176*baa489faSSeongJae Park } 177*baa489faSSeongJae Park 178*baa489faSSeongJae Park static u32 hw_pkey_get(int pkey, unsigned long flags) 179*baa489faSSeongJae Park { 180*baa489faSSeongJae Park u64 pkey_reg = __read_pkey_reg(); 181*baa489faSSeongJae Park 182*baa489faSSeongJae Park dprintf1("%s(pkey=%d, flags=%lx) = %x / %d\n", 183*baa489faSSeongJae Park __func__, pkey, flags, 0, 0); 184*baa489faSSeongJae Park dprintf2("%s() raw pkey_reg: %016llx\n", __func__, pkey_reg); 185*baa489faSSeongJae Park 186*baa489faSSeongJae Park return (u32) get_pkey_bits(pkey_reg, pkey); 187*baa489faSSeongJae Park } 188*baa489faSSeongJae Park 189*baa489faSSeongJae Park static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags) 190*baa489faSSeongJae Park { 191*baa489faSSeongJae Park u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); 192*baa489faSSeongJae Park u64 old_pkey_reg = __read_pkey_reg(); 193*baa489faSSeongJae Park u64 new_pkey_reg; 194*baa489faSSeongJae Park 195*baa489faSSeongJae Park /* make sure that 'rights' only contains the bits we expect: */ 196*baa489faSSeongJae Park assert(!(rights & ~mask)); 197*baa489faSSeongJae Park 198*baa489faSSeongJae Park /* modify bits accordingly in old pkey_reg and assign it */ 199*baa489faSSeongJae Park new_pkey_reg = set_pkey_bits(old_pkey_reg, pkey, rights); 200*baa489faSSeongJae Park 201*baa489faSSeongJae Park __write_pkey_reg(new_pkey_reg); 202*baa489faSSeongJae Park 203*baa489faSSeongJae Park dprintf3("%s(pkey=%d, rights=%lx, flags=%lx) = %x" 204*baa489faSSeongJae Park " pkey_reg now: %016llx old_pkey_reg: %016llx\n", 205*baa489faSSeongJae Park __func__, pkey, rights, flags, 0, __read_pkey_reg(), 206*baa489faSSeongJae Park old_pkey_reg); 207*baa489faSSeongJae Park return 0; 208*baa489faSSeongJae Park } 209*baa489faSSeongJae Park 210*baa489faSSeongJae Park void pkey_disable_set(int pkey, int flags) 211*baa489faSSeongJae Park { 212*baa489faSSeongJae Park unsigned long syscall_flags = 0; 213*baa489faSSeongJae Park int ret; 214*baa489faSSeongJae Park int pkey_rights; 215*baa489faSSeongJae Park u64 orig_pkey_reg = read_pkey_reg(); 216*baa489faSSeongJae Park 217*baa489faSSeongJae Park dprintf1("START->%s(%d, 0x%x)\n", __func__, 218*baa489faSSeongJae Park pkey, flags); 219*baa489faSSeongJae Park pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); 220*baa489faSSeongJae Park 221*baa489faSSeongJae Park pkey_rights = hw_pkey_get(pkey, syscall_flags); 222*baa489faSSeongJae Park 223*baa489faSSeongJae Park dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, 224*baa489faSSeongJae Park pkey, pkey, pkey_rights); 225*baa489faSSeongJae Park 226*baa489faSSeongJae Park pkey_assert(pkey_rights >= 0); 227*baa489faSSeongJae Park 228*baa489faSSeongJae Park pkey_rights |= flags; 229*baa489faSSeongJae Park 230*baa489faSSeongJae Park ret = hw_pkey_set(pkey, pkey_rights, syscall_flags); 231*baa489faSSeongJae Park assert(!ret); 232*baa489faSSeongJae Park /* pkey_reg and flags have the same format */ 233*baa489faSSeongJae Park shadow_pkey_reg = set_pkey_bits(shadow_pkey_reg, pkey, pkey_rights); 234*baa489faSSeongJae Park dprintf1("%s(%d) shadow: 0x%016llx\n", 235*baa489faSSeongJae Park __func__, pkey, shadow_pkey_reg); 236*baa489faSSeongJae Park 237*baa489faSSeongJae Park pkey_assert(ret >= 0); 238*baa489faSSeongJae Park 239*baa489faSSeongJae Park pkey_rights = hw_pkey_get(pkey, syscall_flags); 240*baa489faSSeongJae Park dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, 241*baa489faSSeongJae Park pkey, pkey, pkey_rights); 242*baa489faSSeongJae Park 243*baa489faSSeongJae Park dprintf1("%s(%d) pkey_reg: 0x%016llx\n", 244*baa489faSSeongJae Park __func__, pkey, read_pkey_reg()); 245*baa489faSSeongJae Park if (flags) 246*baa489faSSeongJae Park pkey_assert(read_pkey_reg() >= orig_pkey_reg); 247*baa489faSSeongJae Park dprintf1("END<---%s(%d, 0x%x)\n", __func__, 248*baa489faSSeongJae Park pkey, flags); 249*baa489faSSeongJae Park } 250*baa489faSSeongJae Park 251*baa489faSSeongJae Park void pkey_disable_clear(int pkey, int flags) 252*baa489faSSeongJae Park { 253*baa489faSSeongJae Park unsigned long syscall_flags = 0; 254*baa489faSSeongJae Park int ret; 255*baa489faSSeongJae Park int pkey_rights = hw_pkey_get(pkey, syscall_flags); 256*baa489faSSeongJae Park u64 orig_pkey_reg = read_pkey_reg(); 257*baa489faSSeongJae Park 258*baa489faSSeongJae Park pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); 259*baa489faSSeongJae Park 260*baa489faSSeongJae Park dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, 261*baa489faSSeongJae Park pkey, pkey, pkey_rights); 262*baa489faSSeongJae Park pkey_assert(pkey_rights >= 0); 263*baa489faSSeongJae Park 264*baa489faSSeongJae Park pkey_rights &= ~flags; 265*baa489faSSeongJae Park 266*baa489faSSeongJae Park ret = hw_pkey_set(pkey, pkey_rights, 0); 267*baa489faSSeongJae Park shadow_pkey_reg = set_pkey_bits(shadow_pkey_reg, pkey, pkey_rights); 268*baa489faSSeongJae Park pkey_assert(ret >= 0); 269*baa489faSSeongJae Park 270*baa489faSSeongJae Park pkey_rights = hw_pkey_get(pkey, syscall_flags); 271*baa489faSSeongJae Park dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, 272*baa489faSSeongJae Park pkey, pkey, pkey_rights); 273*baa489faSSeongJae Park 274*baa489faSSeongJae Park dprintf1("%s(%d) pkey_reg: 0x%016llx\n", __func__, 275*baa489faSSeongJae Park pkey, read_pkey_reg()); 276*baa489faSSeongJae Park if (flags) 277*baa489faSSeongJae Park assert(read_pkey_reg() <= orig_pkey_reg); 278*baa489faSSeongJae Park } 279*baa489faSSeongJae Park 280*baa489faSSeongJae Park void pkey_write_allow(int pkey) 281*baa489faSSeongJae Park { 282*baa489faSSeongJae Park pkey_disable_clear(pkey, PKEY_DISABLE_WRITE); 283*baa489faSSeongJae Park } 284*baa489faSSeongJae Park void pkey_write_deny(int pkey) 285*baa489faSSeongJae Park { 286*baa489faSSeongJae Park pkey_disable_set(pkey, PKEY_DISABLE_WRITE); 287*baa489faSSeongJae Park } 288*baa489faSSeongJae Park void pkey_access_allow(int pkey) 289*baa489faSSeongJae Park { 290*baa489faSSeongJae Park pkey_disable_clear(pkey, PKEY_DISABLE_ACCESS); 291*baa489faSSeongJae Park } 292*baa489faSSeongJae Park void pkey_access_deny(int pkey) 293*baa489faSSeongJae Park { 294*baa489faSSeongJae Park pkey_disable_set(pkey, PKEY_DISABLE_ACCESS); 295*baa489faSSeongJae Park } 296*baa489faSSeongJae Park 297*baa489faSSeongJae Park /* Failed address bound checks: */ 298*baa489faSSeongJae Park #ifndef SEGV_BNDERR 299*baa489faSSeongJae Park # define SEGV_BNDERR 3 300*baa489faSSeongJae Park #endif 301*baa489faSSeongJae Park 302*baa489faSSeongJae Park #ifndef SEGV_PKUERR 303*baa489faSSeongJae Park # define SEGV_PKUERR 4 304*baa489faSSeongJae Park #endif 305*baa489faSSeongJae Park 306*baa489faSSeongJae Park static char *si_code_str(int si_code) 307*baa489faSSeongJae Park { 308*baa489faSSeongJae Park if (si_code == SEGV_MAPERR) 309*baa489faSSeongJae Park return "SEGV_MAPERR"; 310*baa489faSSeongJae Park if (si_code == SEGV_ACCERR) 311*baa489faSSeongJae Park return "SEGV_ACCERR"; 312*baa489faSSeongJae Park if (si_code == SEGV_BNDERR) 313*baa489faSSeongJae Park return "SEGV_BNDERR"; 314*baa489faSSeongJae Park if (si_code == SEGV_PKUERR) 315*baa489faSSeongJae Park return "SEGV_PKUERR"; 316*baa489faSSeongJae Park return "UNKNOWN"; 317*baa489faSSeongJae Park } 318*baa489faSSeongJae Park 319*baa489faSSeongJae Park int pkey_faults; 320*baa489faSSeongJae Park int last_si_pkey = -1; 321*baa489faSSeongJae Park void signal_handler(int signum, siginfo_t *si, void *vucontext) 322*baa489faSSeongJae Park { 323*baa489faSSeongJae Park ucontext_t *uctxt = vucontext; 324*baa489faSSeongJae Park int trapno; 325*baa489faSSeongJae Park unsigned long ip; 326*baa489faSSeongJae Park char *fpregs; 327*baa489faSSeongJae Park #if defined(__i386__) || defined(__x86_64__) /* arch */ 328*baa489faSSeongJae Park u32 *pkey_reg_ptr; 329*baa489faSSeongJae Park int pkey_reg_offset; 330*baa489faSSeongJae Park #endif /* arch */ 331*baa489faSSeongJae Park u64 siginfo_pkey; 332*baa489faSSeongJae Park u32 *si_pkey_ptr; 333*baa489faSSeongJae Park 334*baa489faSSeongJae Park dprint_in_signal = 1; 335*baa489faSSeongJae Park dprintf1(">>>>===============SIGSEGV============================\n"); 336*baa489faSSeongJae Park dprintf1("%s()::%d, pkey_reg: 0x%016llx shadow: %016llx\n", 337*baa489faSSeongJae Park __func__, __LINE__, 338*baa489faSSeongJae Park __read_pkey_reg(), shadow_pkey_reg); 339*baa489faSSeongJae Park 340*baa489faSSeongJae Park trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO]; 341*baa489faSSeongJae Park ip = uctxt->uc_mcontext.gregs[REG_IP_IDX]; 342*baa489faSSeongJae Park fpregs = (char *) uctxt->uc_mcontext.fpregs; 343*baa489faSSeongJae Park 344*baa489faSSeongJae Park dprintf2("%s() trapno: %d ip: 0x%016lx info->si_code: %s/%d\n", 345*baa489faSSeongJae Park __func__, trapno, ip, si_code_str(si->si_code), 346*baa489faSSeongJae Park si->si_code); 347*baa489faSSeongJae Park 348*baa489faSSeongJae Park #if defined(__i386__) || defined(__x86_64__) /* arch */ 349*baa489faSSeongJae Park #ifdef __i386__ 350*baa489faSSeongJae Park /* 351*baa489faSSeongJae Park * 32-bit has some extra padding so that userspace can tell whether 352*baa489faSSeongJae Park * the XSTATE header is present in addition to the "legacy" FPU 353*baa489faSSeongJae Park * state. We just assume that it is here. 354*baa489faSSeongJae Park */ 355*baa489faSSeongJae Park fpregs += 0x70; 356*baa489faSSeongJae Park #endif /* i386 */ 357*baa489faSSeongJae Park pkey_reg_offset = pkey_reg_xstate_offset(); 358*baa489faSSeongJae Park pkey_reg_ptr = (void *)(&fpregs[pkey_reg_offset]); 359*baa489faSSeongJae Park 360*baa489faSSeongJae Park /* 361*baa489faSSeongJae Park * If we got a PKEY fault, we *HAVE* to have at least one bit set in 362*baa489faSSeongJae Park * here. 363*baa489faSSeongJae Park */ 364*baa489faSSeongJae Park dprintf1("pkey_reg_xstate_offset: %d\n", pkey_reg_xstate_offset()); 365*baa489faSSeongJae Park if (DEBUG_LEVEL > 4) 366*baa489faSSeongJae Park dump_mem(pkey_reg_ptr - 128, 256); 367*baa489faSSeongJae Park pkey_assert(*pkey_reg_ptr); 368*baa489faSSeongJae Park #endif /* arch */ 369*baa489faSSeongJae Park 370*baa489faSSeongJae Park dprintf1("siginfo: %p\n", si); 371*baa489faSSeongJae Park dprintf1(" fpregs: %p\n", fpregs); 372*baa489faSSeongJae Park 373*baa489faSSeongJae Park if ((si->si_code == SEGV_MAPERR) || 374*baa489faSSeongJae Park (si->si_code == SEGV_ACCERR) || 375*baa489faSSeongJae Park (si->si_code == SEGV_BNDERR)) { 376*baa489faSSeongJae Park printf("non-PK si_code, exiting...\n"); 377*baa489faSSeongJae Park exit(4); 378*baa489faSSeongJae Park } 379*baa489faSSeongJae Park 380*baa489faSSeongJae Park si_pkey_ptr = siginfo_get_pkey_ptr(si); 381*baa489faSSeongJae Park dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr); 382*baa489faSSeongJae Park dump_mem((u8 *)si_pkey_ptr - 8, 24); 383*baa489faSSeongJae Park siginfo_pkey = *si_pkey_ptr; 384*baa489faSSeongJae Park pkey_assert(siginfo_pkey < NR_PKEYS); 385*baa489faSSeongJae Park last_si_pkey = siginfo_pkey; 386*baa489faSSeongJae Park 387*baa489faSSeongJae Park /* 388*baa489faSSeongJae Park * need __read_pkey_reg() version so we do not do shadow_pkey_reg 389*baa489faSSeongJae Park * checking 390*baa489faSSeongJae Park */ 391*baa489faSSeongJae Park dprintf1("signal pkey_reg from pkey_reg: %016llx\n", 392*baa489faSSeongJae Park __read_pkey_reg()); 393*baa489faSSeongJae Park dprintf1("pkey from siginfo: %016llx\n", siginfo_pkey); 394*baa489faSSeongJae Park #if defined(__i386__) || defined(__x86_64__) /* arch */ 395*baa489faSSeongJae Park dprintf1("signal pkey_reg from xsave: %08x\n", *pkey_reg_ptr); 396*baa489faSSeongJae Park *(u64 *)pkey_reg_ptr = 0x00000000; 397*baa489faSSeongJae Park dprintf1("WARNING: set PKEY_REG=0 to allow faulting instruction to continue\n"); 398*baa489faSSeongJae Park #elif defined(__powerpc64__) /* arch */ 399*baa489faSSeongJae Park /* restore access and let the faulting instruction continue */ 400*baa489faSSeongJae Park pkey_access_allow(siginfo_pkey); 401*baa489faSSeongJae Park #endif /* arch */ 402*baa489faSSeongJae Park pkey_faults++; 403*baa489faSSeongJae Park dprintf1("<<<<==================================================\n"); 404*baa489faSSeongJae Park dprint_in_signal = 0; 405*baa489faSSeongJae Park } 406*baa489faSSeongJae Park 407*baa489faSSeongJae Park int wait_all_children(void) 408*baa489faSSeongJae Park { 409*baa489faSSeongJae Park int status; 410*baa489faSSeongJae Park return waitpid(-1, &status, 0); 411*baa489faSSeongJae Park } 412*baa489faSSeongJae Park 413*baa489faSSeongJae Park void sig_chld(int x) 414*baa489faSSeongJae Park { 415*baa489faSSeongJae Park dprint_in_signal = 1; 416*baa489faSSeongJae Park dprintf2("[%d] SIGCHLD: %d\n", getpid(), x); 417*baa489faSSeongJae Park dprint_in_signal = 0; 418*baa489faSSeongJae Park } 419*baa489faSSeongJae Park 420*baa489faSSeongJae Park void setup_sigsegv_handler(void) 421*baa489faSSeongJae Park { 422*baa489faSSeongJae Park int r, rs; 423*baa489faSSeongJae Park struct sigaction newact; 424*baa489faSSeongJae Park struct sigaction oldact; 425*baa489faSSeongJae Park 426*baa489faSSeongJae Park /* #PF is mapped to sigsegv */ 427*baa489faSSeongJae Park int signum = SIGSEGV; 428*baa489faSSeongJae Park 429*baa489faSSeongJae Park newact.sa_handler = 0; 430*baa489faSSeongJae Park newact.sa_sigaction = signal_handler; 431*baa489faSSeongJae Park 432*baa489faSSeongJae Park /*sigset_t - signals to block while in the handler */ 433*baa489faSSeongJae Park /* get the old signal mask. */ 434*baa489faSSeongJae Park rs = sigprocmask(SIG_SETMASK, 0, &newact.sa_mask); 435*baa489faSSeongJae Park pkey_assert(rs == 0); 436*baa489faSSeongJae Park 437*baa489faSSeongJae Park /* call sa_sigaction, not sa_handler*/ 438*baa489faSSeongJae Park newact.sa_flags = SA_SIGINFO; 439*baa489faSSeongJae Park 440*baa489faSSeongJae Park newact.sa_restorer = 0; /* void(*)(), obsolete */ 441*baa489faSSeongJae Park r = sigaction(signum, &newact, &oldact); 442*baa489faSSeongJae Park r = sigaction(SIGALRM, &newact, &oldact); 443*baa489faSSeongJae Park pkey_assert(r == 0); 444*baa489faSSeongJae Park } 445*baa489faSSeongJae Park 446*baa489faSSeongJae Park void setup_handlers(void) 447*baa489faSSeongJae Park { 448*baa489faSSeongJae Park signal(SIGCHLD, &sig_chld); 449*baa489faSSeongJae Park setup_sigsegv_handler(); 450*baa489faSSeongJae Park } 451*baa489faSSeongJae Park 452*baa489faSSeongJae Park pid_t fork_lazy_child(void) 453*baa489faSSeongJae Park { 454*baa489faSSeongJae Park pid_t forkret; 455*baa489faSSeongJae Park 456*baa489faSSeongJae Park forkret = fork(); 457*baa489faSSeongJae Park pkey_assert(forkret >= 0); 458*baa489faSSeongJae Park dprintf3("[%d] fork() ret: %d\n", getpid(), forkret); 459*baa489faSSeongJae Park 460*baa489faSSeongJae Park if (!forkret) { 461*baa489faSSeongJae Park /* in the child */ 462*baa489faSSeongJae Park while (1) { 463*baa489faSSeongJae Park dprintf1("child sleeping...\n"); 464*baa489faSSeongJae Park sleep(30); 465*baa489faSSeongJae Park } 466*baa489faSSeongJae Park } 467*baa489faSSeongJae Park return forkret; 468*baa489faSSeongJae Park } 469*baa489faSSeongJae Park 470*baa489faSSeongJae Park int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, 471*baa489faSSeongJae Park unsigned long pkey) 472*baa489faSSeongJae Park { 473*baa489faSSeongJae Park int sret; 474*baa489faSSeongJae Park 475*baa489faSSeongJae Park dprintf2("%s(0x%p, %zx, prot=%lx, pkey=%lx)\n", __func__, 476*baa489faSSeongJae Park ptr, size, orig_prot, pkey); 477*baa489faSSeongJae Park 478*baa489faSSeongJae Park errno = 0; 479*baa489faSSeongJae Park sret = syscall(SYS_mprotect_key, ptr, size, orig_prot, pkey); 480*baa489faSSeongJae Park if (errno) { 481*baa489faSSeongJae Park dprintf2("SYS_mprotect_key sret: %d\n", sret); 482*baa489faSSeongJae Park dprintf2("SYS_mprotect_key prot: 0x%lx\n", orig_prot); 483*baa489faSSeongJae Park dprintf2("SYS_mprotect_key failed, errno: %d\n", errno); 484*baa489faSSeongJae Park if (DEBUG_LEVEL >= 2) 485*baa489faSSeongJae Park perror("SYS_mprotect_pkey"); 486*baa489faSSeongJae Park } 487*baa489faSSeongJae Park return sret; 488*baa489faSSeongJae Park } 489*baa489faSSeongJae Park 490*baa489faSSeongJae Park int sys_pkey_alloc(unsigned long flags, unsigned long init_val) 491*baa489faSSeongJae Park { 492*baa489faSSeongJae Park int ret = syscall(SYS_pkey_alloc, flags, init_val); 493*baa489faSSeongJae Park dprintf1("%s(flags=%lx, init_val=%lx) syscall ret: %d errno: %d\n", 494*baa489faSSeongJae Park __func__, flags, init_val, ret, errno); 495*baa489faSSeongJae Park return ret; 496*baa489faSSeongJae Park } 497*baa489faSSeongJae Park 498*baa489faSSeongJae Park int alloc_pkey(void) 499*baa489faSSeongJae Park { 500*baa489faSSeongJae Park int ret; 501*baa489faSSeongJae Park unsigned long init_val = 0x0; 502*baa489faSSeongJae Park 503*baa489faSSeongJae Park dprintf1("%s()::%d, pkey_reg: 0x%016llx shadow: %016llx\n", 504*baa489faSSeongJae Park __func__, __LINE__, __read_pkey_reg(), shadow_pkey_reg); 505*baa489faSSeongJae Park ret = sys_pkey_alloc(0, init_val); 506*baa489faSSeongJae Park /* 507*baa489faSSeongJae Park * pkey_alloc() sets PKEY register, so we need to reflect it in 508*baa489faSSeongJae Park * shadow_pkey_reg: 509*baa489faSSeongJae Park */ 510*baa489faSSeongJae Park dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016llx" 511*baa489faSSeongJae Park " shadow: 0x%016llx\n", 512*baa489faSSeongJae Park __func__, __LINE__, ret, __read_pkey_reg(), 513*baa489faSSeongJae Park shadow_pkey_reg); 514*baa489faSSeongJae Park if (ret > 0) { 515*baa489faSSeongJae Park /* clear both the bits: */ 516*baa489faSSeongJae Park shadow_pkey_reg = set_pkey_bits(shadow_pkey_reg, ret, 517*baa489faSSeongJae Park ~PKEY_MASK); 518*baa489faSSeongJae Park dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016llx" 519*baa489faSSeongJae Park " shadow: 0x%016llx\n", 520*baa489faSSeongJae Park __func__, 521*baa489faSSeongJae Park __LINE__, ret, __read_pkey_reg(), 522*baa489faSSeongJae Park shadow_pkey_reg); 523*baa489faSSeongJae Park /* 524*baa489faSSeongJae Park * move the new state in from init_val 525*baa489faSSeongJae Park * (remember, we cheated and init_val == pkey_reg format) 526*baa489faSSeongJae Park */ 527*baa489faSSeongJae Park shadow_pkey_reg = set_pkey_bits(shadow_pkey_reg, ret, 528*baa489faSSeongJae Park init_val); 529*baa489faSSeongJae Park } 530*baa489faSSeongJae Park dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016llx" 531*baa489faSSeongJae Park " shadow: 0x%016llx\n", 532*baa489faSSeongJae Park __func__, __LINE__, ret, __read_pkey_reg(), 533*baa489faSSeongJae Park shadow_pkey_reg); 534*baa489faSSeongJae Park dprintf1("%s()::%d errno: %d\n", __func__, __LINE__, errno); 535*baa489faSSeongJae Park /* for shadow checking: */ 536*baa489faSSeongJae Park read_pkey_reg(); 537*baa489faSSeongJae Park dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016llx" 538*baa489faSSeongJae Park " shadow: 0x%016llx\n", 539*baa489faSSeongJae Park __func__, __LINE__, ret, __read_pkey_reg(), 540*baa489faSSeongJae Park shadow_pkey_reg); 541*baa489faSSeongJae Park return ret; 542*baa489faSSeongJae Park } 543*baa489faSSeongJae Park 544*baa489faSSeongJae Park int sys_pkey_free(unsigned long pkey) 545*baa489faSSeongJae Park { 546*baa489faSSeongJae Park int ret = syscall(SYS_pkey_free, pkey); 547*baa489faSSeongJae Park dprintf1("%s(pkey=%ld) syscall ret: %d\n", __func__, pkey, ret); 548*baa489faSSeongJae Park return ret; 549*baa489faSSeongJae Park } 550*baa489faSSeongJae Park 551*baa489faSSeongJae Park /* 552*baa489faSSeongJae Park * I had a bug where pkey bits could be set by mprotect() but 553*baa489faSSeongJae Park * not cleared. This ensures we get lots of random bit sets 554*baa489faSSeongJae Park * and clears on the vma and pte pkey bits. 555*baa489faSSeongJae Park */ 556*baa489faSSeongJae Park int alloc_random_pkey(void) 557*baa489faSSeongJae Park { 558*baa489faSSeongJae Park int max_nr_pkey_allocs; 559*baa489faSSeongJae Park int ret; 560*baa489faSSeongJae Park int i; 561*baa489faSSeongJae Park int alloced_pkeys[NR_PKEYS]; 562*baa489faSSeongJae Park int nr_alloced = 0; 563*baa489faSSeongJae Park int random_index; 564*baa489faSSeongJae Park memset(alloced_pkeys, 0, sizeof(alloced_pkeys)); 565*baa489faSSeongJae Park 566*baa489faSSeongJae Park /* allocate every possible key and make a note of which ones we got */ 567*baa489faSSeongJae Park max_nr_pkey_allocs = NR_PKEYS; 568*baa489faSSeongJae Park for (i = 0; i < max_nr_pkey_allocs; i++) { 569*baa489faSSeongJae Park int new_pkey = alloc_pkey(); 570*baa489faSSeongJae Park if (new_pkey < 0) 571*baa489faSSeongJae Park break; 572*baa489faSSeongJae Park alloced_pkeys[nr_alloced++] = new_pkey; 573*baa489faSSeongJae Park } 574*baa489faSSeongJae Park 575*baa489faSSeongJae Park pkey_assert(nr_alloced > 0); 576*baa489faSSeongJae Park /* select a random one out of the allocated ones */ 577*baa489faSSeongJae Park random_index = rand() % nr_alloced; 578*baa489faSSeongJae Park ret = alloced_pkeys[random_index]; 579*baa489faSSeongJae Park /* now zero it out so we don't free it next */ 580*baa489faSSeongJae Park alloced_pkeys[random_index] = 0; 581*baa489faSSeongJae Park 582*baa489faSSeongJae Park /* go through the allocated ones that we did not want and free them */ 583*baa489faSSeongJae Park for (i = 0; i < nr_alloced; i++) { 584*baa489faSSeongJae Park int free_ret; 585*baa489faSSeongJae Park if (!alloced_pkeys[i]) 586*baa489faSSeongJae Park continue; 587*baa489faSSeongJae Park free_ret = sys_pkey_free(alloced_pkeys[i]); 588*baa489faSSeongJae Park pkey_assert(!free_ret); 589*baa489faSSeongJae Park } 590*baa489faSSeongJae Park dprintf1("%s()::%d, ret: %d pkey_reg: 0x%016llx" 591*baa489faSSeongJae Park " shadow: 0x%016llx\n", __func__, 592*baa489faSSeongJae Park __LINE__, ret, __read_pkey_reg(), shadow_pkey_reg); 593*baa489faSSeongJae Park return ret; 594*baa489faSSeongJae Park } 595*baa489faSSeongJae Park 596*baa489faSSeongJae Park int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, 597*baa489faSSeongJae Park unsigned long pkey) 598*baa489faSSeongJae Park { 599*baa489faSSeongJae Park int nr_iterations = random() % 100; 600*baa489faSSeongJae Park int ret; 601*baa489faSSeongJae Park 602*baa489faSSeongJae Park while (0) { 603*baa489faSSeongJae Park int rpkey = alloc_random_pkey(); 604*baa489faSSeongJae Park ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey); 605*baa489faSSeongJae Park dprintf1("sys_mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n", 606*baa489faSSeongJae Park ptr, size, orig_prot, pkey, ret); 607*baa489faSSeongJae Park if (nr_iterations-- < 0) 608*baa489faSSeongJae Park break; 609*baa489faSSeongJae Park 610*baa489faSSeongJae Park dprintf1("%s()::%d, ret: %d pkey_reg: 0x%016llx" 611*baa489faSSeongJae Park " shadow: 0x%016llx\n", 612*baa489faSSeongJae Park __func__, __LINE__, ret, __read_pkey_reg(), 613*baa489faSSeongJae Park shadow_pkey_reg); 614*baa489faSSeongJae Park sys_pkey_free(rpkey); 615*baa489faSSeongJae Park dprintf1("%s()::%d, ret: %d pkey_reg: 0x%016llx" 616*baa489faSSeongJae Park " shadow: 0x%016llx\n", 617*baa489faSSeongJae Park __func__, __LINE__, ret, __read_pkey_reg(), 618*baa489faSSeongJae Park shadow_pkey_reg); 619*baa489faSSeongJae Park } 620*baa489faSSeongJae Park pkey_assert(pkey < NR_PKEYS); 621*baa489faSSeongJae Park 622*baa489faSSeongJae Park ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey); 623*baa489faSSeongJae Park dprintf1("mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n", 624*baa489faSSeongJae Park ptr, size, orig_prot, pkey, ret); 625*baa489faSSeongJae Park pkey_assert(!ret); 626*baa489faSSeongJae Park dprintf1("%s()::%d, ret: %d pkey_reg: 0x%016llx" 627*baa489faSSeongJae Park " shadow: 0x%016llx\n", __func__, 628*baa489faSSeongJae Park __LINE__, ret, __read_pkey_reg(), shadow_pkey_reg); 629*baa489faSSeongJae Park return ret; 630*baa489faSSeongJae Park } 631*baa489faSSeongJae Park 632*baa489faSSeongJae Park struct pkey_malloc_record { 633*baa489faSSeongJae Park void *ptr; 634*baa489faSSeongJae Park long size; 635*baa489faSSeongJae Park int prot; 636*baa489faSSeongJae Park }; 637*baa489faSSeongJae Park struct pkey_malloc_record *pkey_malloc_records; 638*baa489faSSeongJae Park struct pkey_malloc_record *pkey_last_malloc_record; 639*baa489faSSeongJae Park long nr_pkey_malloc_records; 640*baa489faSSeongJae Park void record_pkey_malloc(void *ptr, long size, int prot) 641*baa489faSSeongJae Park { 642*baa489faSSeongJae Park long i; 643*baa489faSSeongJae Park struct pkey_malloc_record *rec = NULL; 644*baa489faSSeongJae Park 645*baa489faSSeongJae Park for (i = 0; i < nr_pkey_malloc_records; i++) { 646*baa489faSSeongJae Park rec = &pkey_malloc_records[i]; 647*baa489faSSeongJae Park /* find a free record */ 648*baa489faSSeongJae Park if (rec) 649*baa489faSSeongJae Park break; 650*baa489faSSeongJae Park } 651*baa489faSSeongJae Park if (!rec) { 652*baa489faSSeongJae Park /* every record is full */ 653*baa489faSSeongJae Park size_t old_nr_records = nr_pkey_malloc_records; 654*baa489faSSeongJae Park size_t new_nr_records = (nr_pkey_malloc_records * 2 + 1); 655*baa489faSSeongJae Park size_t new_size = new_nr_records * sizeof(struct pkey_malloc_record); 656*baa489faSSeongJae Park dprintf2("new_nr_records: %zd\n", new_nr_records); 657*baa489faSSeongJae Park dprintf2("new_size: %zd\n", new_size); 658*baa489faSSeongJae Park pkey_malloc_records = realloc(pkey_malloc_records, new_size); 659*baa489faSSeongJae Park pkey_assert(pkey_malloc_records != NULL); 660*baa489faSSeongJae Park rec = &pkey_malloc_records[nr_pkey_malloc_records]; 661*baa489faSSeongJae Park /* 662*baa489faSSeongJae Park * realloc() does not initialize memory, so zero it from 663*baa489faSSeongJae Park * the first new record all the way to the end. 664*baa489faSSeongJae Park */ 665*baa489faSSeongJae Park for (i = 0; i < new_nr_records - old_nr_records; i++) 666*baa489faSSeongJae Park memset(rec + i, 0, sizeof(*rec)); 667*baa489faSSeongJae Park } 668*baa489faSSeongJae Park dprintf3("filling malloc record[%d/%p]: {%p, %ld}\n", 669*baa489faSSeongJae Park (int)(rec - pkey_malloc_records), rec, ptr, size); 670*baa489faSSeongJae Park rec->ptr = ptr; 671*baa489faSSeongJae Park rec->size = size; 672*baa489faSSeongJae Park rec->prot = prot; 673*baa489faSSeongJae Park pkey_last_malloc_record = rec; 674*baa489faSSeongJae Park nr_pkey_malloc_records++; 675*baa489faSSeongJae Park } 676*baa489faSSeongJae Park 677*baa489faSSeongJae Park void free_pkey_malloc(void *ptr) 678*baa489faSSeongJae Park { 679*baa489faSSeongJae Park long i; 680*baa489faSSeongJae Park int ret; 681*baa489faSSeongJae Park dprintf3("%s(%p)\n", __func__, ptr); 682*baa489faSSeongJae Park for (i = 0; i < nr_pkey_malloc_records; i++) { 683*baa489faSSeongJae Park struct pkey_malloc_record *rec = &pkey_malloc_records[i]; 684*baa489faSSeongJae Park dprintf4("looking for ptr %p at record[%ld/%p]: {%p, %ld}\n", 685*baa489faSSeongJae Park ptr, i, rec, rec->ptr, rec->size); 686*baa489faSSeongJae Park if ((ptr < rec->ptr) || 687*baa489faSSeongJae Park (ptr >= rec->ptr + rec->size)) 688*baa489faSSeongJae Park continue; 689*baa489faSSeongJae Park 690*baa489faSSeongJae Park dprintf3("found ptr %p at record[%ld/%p]: {%p, %ld}\n", 691*baa489faSSeongJae Park ptr, i, rec, rec->ptr, rec->size); 692*baa489faSSeongJae Park nr_pkey_malloc_records--; 693*baa489faSSeongJae Park ret = munmap(rec->ptr, rec->size); 694*baa489faSSeongJae Park dprintf3("munmap ret: %d\n", ret); 695*baa489faSSeongJae Park pkey_assert(!ret); 696*baa489faSSeongJae Park dprintf3("clearing rec->ptr, rec: %p\n", rec); 697*baa489faSSeongJae Park rec->ptr = NULL; 698*baa489faSSeongJae Park dprintf3("done clearing rec->ptr, rec: %p\n", rec); 699*baa489faSSeongJae Park return; 700*baa489faSSeongJae Park } 701*baa489faSSeongJae Park pkey_assert(false); 702*baa489faSSeongJae Park } 703*baa489faSSeongJae Park 704*baa489faSSeongJae Park 705*baa489faSSeongJae Park void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey) 706*baa489faSSeongJae Park { 707*baa489faSSeongJae Park void *ptr; 708*baa489faSSeongJae Park int ret; 709*baa489faSSeongJae Park 710*baa489faSSeongJae Park read_pkey_reg(); 711*baa489faSSeongJae Park dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__, 712*baa489faSSeongJae Park size, prot, pkey); 713*baa489faSSeongJae Park pkey_assert(pkey < NR_PKEYS); 714*baa489faSSeongJae Park ptr = mmap(NULL, size, prot, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); 715*baa489faSSeongJae Park pkey_assert(ptr != (void *)-1); 716*baa489faSSeongJae Park ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey); 717*baa489faSSeongJae Park pkey_assert(!ret); 718*baa489faSSeongJae Park record_pkey_malloc(ptr, size, prot); 719*baa489faSSeongJae Park read_pkey_reg(); 720*baa489faSSeongJae Park 721*baa489faSSeongJae Park dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr); 722*baa489faSSeongJae Park return ptr; 723*baa489faSSeongJae Park } 724*baa489faSSeongJae Park 725*baa489faSSeongJae Park void *malloc_pkey_anon_huge(long size, int prot, u16 pkey) 726*baa489faSSeongJae Park { 727*baa489faSSeongJae Park int ret; 728*baa489faSSeongJae Park void *ptr; 729*baa489faSSeongJae Park 730*baa489faSSeongJae Park dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__, 731*baa489faSSeongJae Park size, prot, pkey); 732*baa489faSSeongJae Park /* 733*baa489faSSeongJae Park * Guarantee we can fit at least one huge page in the resulting 734*baa489faSSeongJae Park * allocation by allocating space for 2: 735*baa489faSSeongJae Park */ 736*baa489faSSeongJae Park size = ALIGN_UP(size, HPAGE_SIZE * 2); 737*baa489faSSeongJae Park ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); 738*baa489faSSeongJae Park pkey_assert(ptr != (void *)-1); 739*baa489faSSeongJae Park record_pkey_malloc(ptr, size, prot); 740*baa489faSSeongJae Park mprotect_pkey(ptr, size, prot, pkey); 741*baa489faSSeongJae Park 742*baa489faSSeongJae Park dprintf1("unaligned ptr: %p\n", ptr); 743*baa489faSSeongJae Park ptr = ALIGN_PTR_UP(ptr, HPAGE_SIZE); 744*baa489faSSeongJae Park dprintf1(" aligned ptr: %p\n", ptr); 745*baa489faSSeongJae Park ret = madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE); 746*baa489faSSeongJae Park dprintf1("MADV_HUGEPAGE ret: %d\n", ret); 747*baa489faSSeongJae Park ret = madvise(ptr, HPAGE_SIZE, MADV_WILLNEED); 748*baa489faSSeongJae Park dprintf1("MADV_WILLNEED ret: %d\n", ret); 749*baa489faSSeongJae Park memset(ptr, 0, HPAGE_SIZE); 750*baa489faSSeongJae Park 751*baa489faSSeongJae Park dprintf1("mmap()'d thp for pkey %d @ %p\n", pkey, ptr); 752*baa489faSSeongJae Park return ptr; 753*baa489faSSeongJae Park } 754*baa489faSSeongJae Park 755*baa489faSSeongJae Park int hugetlb_setup_ok; 756*baa489faSSeongJae Park #define SYSFS_FMT_NR_HUGE_PAGES "/sys/kernel/mm/hugepages/hugepages-%ldkB/nr_hugepages" 757*baa489faSSeongJae Park #define GET_NR_HUGE_PAGES 10 758*baa489faSSeongJae Park void setup_hugetlbfs(void) 759*baa489faSSeongJae Park { 760*baa489faSSeongJae Park int err; 761*baa489faSSeongJae Park int fd; 762*baa489faSSeongJae Park char buf[256]; 763*baa489faSSeongJae Park long hpagesz_kb; 764*baa489faSSeongJae Park long hpagesz_mb; 765*baa489faSSeongJae Park 766*baa489faSSeongJae Park if (geteuid() != 0) { 767*baa489faSSeongJae Park fprintf(stderr, "WARNING: not run as root, can not do hugetlb test\n"); 768*baa489faSSeongJae Park return; 769*baa489faSSeongJae Park } 770*baa489faSSeongJae Park 771*baa489faSSeongJae Park cat_into_file(__stringify(GET_NR_HUGE_PAGES), "/proc/sys/vm/nr_hugepages"); 772*baa489faSSeongJae Park 773*baa489faSSeongJae Park /* 774*baa489faSSeongJae Park * Now go make sure that we got the pages and that they 775*baa489faSSeongJae Park * are PMD-level pages. Someone might have made PUD-level 776*baa489faSSeongJae Park * pages the default. 777*baa489faSSeongJae Park */ 778*baa489faSSeongJae Park hpagesz_kb = HPAGE_SIZE / 1024; 779*baa489faSSeongJae Park hpagesz_mb = hpagesz_kb / 1024; 780*baa489faSSeongJae Park sprintf(buf, SYSFS_FMT_NR_HUGE_PAGES, hpagesz_kb); 781*baa489faSSeongJae Park fd = open(buf, O_RDONLY); 782*baa489faSSeongJae Park if (fd < 0) { 783*baa489faSSeongJae Park fprintf(stderr, "opening sysfs %ldM hugetlb config: %s\n", 784*baa489faSSeongJae Park hpagesz_mb, strerror(errno)); 785*baa489faSSeongJae Park return; 786*baa489faSSeongJae Park } 787*baa489faSSeongJae Park 788*baa489faSSeongJae Park /* -1 to guarantee leaving the trailing \0 */ 789*baa489faSSeongJae Park err = read(fd, buf, sizeof(buf)-1); 790*baa489faSSeongJae Park close(fd); 791*baa489faSSeongJae Park if (err <= 0) { 792*baa489faSSeongJae Park fprintf(stderr, "reading sysfs %ldM hugetlb config: %s\n", 793*baa489faSSeongJae Park hpagesz_mb, strerror(errno)); 794*baa489faSSeongJae Park return; 795*baa489faSSeongJae Park } 796*baa489faSSeongJae Park 797*baa489faSSeongJae Park if (atoi(buf) != GET_NR_HUGE_PAGES) { 798*baa489faSSeongJae Park fprintf(stderr, "could not confirm %ldM pages, got: '%s' expected %d\n", 799*baa489faSSeongJae Park hpagesz_mb, buf, GET_NR_HUGE_PAGES); 800*baa489faSSeongJae Park return; 801*baa489faSSeongJae Park } 802*baa489faSSeongJae Park 803*baa489faSSeongJae Park hugetlb_setup_ok = 1; 804*baa489faSSeongJae Park } 805*baa489faSSeongJae Park 806*baa489faSSeongJae Park void *malloc_pkey_hugetlb(long size, int prot, u16 pkey) 807*baa489faSSeongJae Park { 808*baa489faSSeongJae Park void *ptr; 809*baa489faSSeongJae Park int flags = MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB; 810*baa489faSSeongJae Park 811*baa489faSSeongJae Park if (!hugetlb_setup_ok) 812*baa489faSSeongJae Park return PTR_ERR_ENOTSUP; 813*baa489faSSeongJae Park 814*baa489faSSeongJae Park dprintf1("doing %s(%ld, %x, %x)\n", __func__, size, prot, pkey); 815*baa489faSSeongJae Park size = ALIGN_UP(size, HPAGE_SIZE * 2); 816*baa489faSSeongJae Park pkey_assert(pkey < NR_PKEYS); 817*baa489faSSeongJae Park ptr = mmap(NULL, size, PROT_NONE, flags, -1, 0); 818*baa489faSSeongJae Park pkey_assert(ptr != (void *)-1); 819*baa489faSSeongJae Park mprotect_pkey(ptr, size, prot, pkey); 820*baa489faSSeongJae Park 821*baa489faSSeongJae Park record_pkey_malloc(ptr, size, prot); 822*baa489faSSeongJae Park 823*baa489faSSeongJae Park dprintf1("mmap()'d hugetlbfs for pkey %d @ %p\n", pkey, ptr); 824*baa489faSSeongJae Park return ptr; 825*baa489faSSeongJae Park } 826*baa489faSSeongJae Park 827*baa489faSSeongJae Park void *malloc_pkey_mmap_dax(long size, int prot, u16 pkey) 828*baa489faSSeongJae Park { 829*baa489faSSeongJae Park void *ptr; 830*baa489faSSeongJae Park int fd; 831*baa489faSSeongJae Park 832*baa489faSSeongJae Park dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__, 833*baa489faSSeongJae Park size, prot, pkey); 834*baa489faSSeongJae Park pkey_assert(pkey < NR_PKEYS); 835*baa489faSSeongJae Park fd = open("/dax/foo", O_RDWR); 836*baa489faSSeongJae Park pkey_assert(fd >= 0); 837*baa489faSSeongJae Park 838*baa489faSSeongJae Park ptr = mmap(0, size, prot, MAP_SHARED, fd, 0); 839*baa489faSSeongJae Park pkey_assert(ptr != (void *)-1); 840*baa489faSSeongJae Park 841*baa489faSSeongJae Park mprotect_pkey(ptr, size, prot, pkey); 842*baa489faSSeongJae Park 843*baa489faSSeongJae Park record_pkey_malloc(ptr, size, prot); 844*baa489faSSeongJae Park 845*baa489faSSeongJae Park dprintf1("mmap()'d for pkey %d @ %p\n", pkey, ptr); 846*baa489faSSeongJae Park close(fd); 847*baa489faSSeongJae Park return ptr; 848*baa489faSSeongJae Park } 849*baa489faSSeongJae Park 850*baa489faSSeongJae Park void *(*pkey_malloc[])(long size, int prot, u16 pkey) = { 851*baa489faSSeongJae Park 852*baa489faSSeongJae Park malloc_pkey_with_mprotect, 853*baa489faSSeongJae Park malloc_pkey_with_mprotect_subpage, 854*baa489faSSeongJae Park malloc_pkey_anon_huge, 855*baa489faSSeongJae Park malloc_pkey_hugetlb 856*baa489faSSeongJae Park /* can not do direct with the pkey_mprotect() API: 857*baa489faSSeongJae Park malloc_pkey_mmap_direct, 858*baa489faSSeongJae Park malloc_pkey_mmap_dax, 859*baa489faSSeongJae Park */ 860*baa489faSSeongJae Park }; 861*baa489faSSeongJae Park 862*baa489faSSeongJae Park void *malloc_pkey(long size, int prot, u16 pkey) 863*baa489faSSeongJae Park { 864*baa489faSSeongJae Park void *ret; 865*baa489faSSeongJae Park static int malloc_type; 866*baa489faSSeongJae Park int nr_malloc_types = ARRAY_SIZE(pkey_malloc); 867*baa489faSSeongJae Park 868*baa489faSSeongJae Park pkey_assert(pkey < NR_PKEYS); 869*baa489faSSeongJae Park 870*baa489faSSeongJae Park while (1) { 871*baa489faSSeongJae Park pkey_assert(malloc_type < nr_malloc_types); 872*baa489faSSeongJae Park 873*baa489faSSeongJae Park ret = pkey_malloc[malloc_type](size, prot, pkey); 874*baa489faSSeongJae Park pkey_assert(ret != (void *)-1); 875*baa489faSSeongJae Park 876*baa489faSSeongJae Park malloc_type++; 877*baa489faSSeongJae Park if (malloc_type >= nr_malloc_types) 878*baa489faSSeongJae Park malloc_type = (random()%nr_malloc_types); 879*baa489faSSeongJae Park 880*baa489faSSeongJae Park /* try again if the malloc_type we tried is unsupported */ 881*baa489faSSeongJae Park if (ret == PTR_ERR_ENOTSUP) 882*baa489faSSeongJae Park continue; 883*baa489faSSeongJae Park 884*baa489faSSeongJae Park break; 885*baa489faSSeongJae Park } 886*baa489faSSeongJae Park 887*baa489faSSeongJae Park dprintf3("%s(%ld, prot=%x, pkey=%x) returning: %p\n", __func__, 888*baa489faSSeongJae Park size, prot, pkey, ret); 889*baa489faSSeongJae Park return ret; 890*baa489faSSeongJae Park } 891*baa489faSSeongJae Park 892*baa489faSSeongJae Park int last_pkey_faults; 893*baa489faSSeongJae Park #define UNKNOWN_PKEY -2 894*baa489faSSeongJae Park void expected_pkey_fault(int pkey) 895*baa489faSSeongJae Park { 896*baa489faSSeongJae Park dprintf2("%s(): last_pkey_faults: %d pkey_faults: %d\n", 897*baa489faSSeongJae Park __func__, last_pkey_faults, pkey_faults); 898*baa489faSSeongJae Park dprintf2("%s(%d): last_si_pkey: %d\n", __func__, pkey, last_si_pkey); 899*baa489faSSeongJae Park pkey_assert(last_pkey_faults + 1 == pkey_faults); 900*baa489faSSeongJae Park 901*baa489faSSeongJae Park /* 902*baa489faSSeongJae Park * For exec-only memory, we do not know the pkey in 903*baa489faSSeongJae Park * advance, so skip this check. 904*baa489faSSeongJae Park */ 905*baa489faSSeongJae Park if (pkey != UNKNOWN_PKEY) 906*baa489faSSeongJae Park pkey_assert(last_si_pkey == pkey); 907*baa489faSSeongJae Park 908*baa489faSSeongJae Park #if defined(__i386__) || defined(__x86_64__) /* arch */ 909*baa489faSSeongJae Park /* 910*baa489faSSeongJae Park * The signal handler shold have cleared out PKEY register to let the 911*baa489faSSeongJae Park * test program continue. We now have to restore it. 912*baa489faSSeongJae Park */ 913*baa489faSSeongJae Park if (__read_pkey_reg() != 0) 914*baa489faSSeongJae Park #else /* arch */ 915*baa489faSSeongJae Park if (__read_pkey_reg() != shadow_pkey_reg) 916*baa489faSSeongJae Park #endif /* arch */ 917*baa489faSSeongJae Park pkey_assert(0); 918*baa489faSSeongJae Park 919*baa489faSSeongJae Park __write_pkey_reg(shadow_pkey_reg); 920*baa489faSSeongJae Park dprintf1("%s() set pkey_reg=%016llx to restore state after signal " 921*baa489faSSeongJae Park "nuked it\n", __func__, shadow_pkey_reg); 922*baa489faSSeongJae Park last_pkey_faults = pkey_faults; 923*baa489faSSeongJae Park last_si_pkey = -1; 924*baa489faSSeongJae Park } 925*baa489faSSeongJae Park 926*baa489faSSeongJae Park #define do_not_expect_pkey_fault(msg) do { \ 927*baa489faSSeongJae Park if (last_pkey_faults != pkey_faults) \ 928*baa489faSSeongJae Park dprintf0("unexpected PKey fault: %s\n", msg); \ 929*baa489faSSeongJae Park pkey_assert(last_pkey_faults == pkey_faults); \ 930*baa489faSSeongJae Park } while (0) 931*baa489faSSeongJae Park 932*baa489faSSeongJae Park int test_fds[10] = { -1 }; 933*baa489faSSeongJae Park int nr_test_fds; 934*baa489faSSeongJae Park void __save_test_fd(int fd) 935*baa489faSSeongJae Park { 936*baa489faSSeongJae Park pkey_assert(fd >= 0); 937*baa489faSSeongJae Park pkey_assert(nr_test_fds < ARRAY_SIZE(test_fds)); 938*baa489faSSeongJae Park test_fds[nr_test_fds] = fd; 939*baa489faSSeongJae Park nr_test_fds++; 940*baa489faSSeongJae Park } 941*baa489faSSeongJae Park 942*baa489faSSeongJae Park int get_test_read_fd(void) 943*baa489faSSeongJae Park { 944*baa489faSSeongJae Park int test_fd = open("/etc/passwd", O_RDONLY); 945*baa489faSSeongJae Park __save_test_fd(test_fd); 946*baa489faSSeongJae Park return test_fd; 947*baa489faSSeongJae Park } 948*baa489faSSeongJae Park 949*baa489faSSeongJae Park void close_test_fds(void) 950*baa489faSSeongJae Park { 951*baa489faSSeongJae Park int i; 952*baa489faSSeongJae Park 953*baa489faSSeongJae Park for (i = 0; i < nr_test_fds; i++) { 954*baa489faSSeongJae Park if (test_fds[i] < 0) 955*baa489faSSeongJae Park continue; 956*baa489faSSeongJae Park close(test_fds[i]); 957*baa489faSSeongJae Park test_fds[i] = -1; 958*baa489faSSeongJae Park } 959*baa489faSSeongJae Park nr_test_fds = 0; 960*baa489faSSeongJae Park } 961*baa489faSSeongJae Park 962*baa489faSSeongJae Park #define barrier() __asm__ __volatile__("": : :"memory") 963*baa489faSSeongJae Park __attribute__((noinline)) int read_ptr(int *ptr) 964*baa489faSSeongJae Park { 965*baa489faSSeongJae Park /* 966*baa489faSSeongJae Park * Keep GCC from optimizing this away somehow 967*baa489faSSeongJae Park */ 968*baa489faSSeongJae Park barrier(); 969*baa489faSSeongJae Park return *ptr; 970*baa489faSSeongJae Park } 971*baa489faSSeongJae Park 972*baa489faSSeongJae Park void test_pkey_alloc_free_attach_pkey0(int *ptr, u16 pkey) 973*baa489faSSeongJae Park { 974*baa489faSSeongJae Park int i, err; 975*baa489faSSeongJae Park int max_nr_pkey_allocs; 976*baa489faSSeongJae Park int alloced_pkeys[NR_PKEYS]; 977*baa489faSSeongJae Park int nr_alloced = 0; 978*baa489faSSeongJae Park long size; 979*baa489faSSeongJae Park 980*baa489faSSeongJae Park pkey_assert(pkey_last_malloc_record); 981*baa489faSSeongJae Park size = pkey_last_malloc_record->size; 982*baa489faSSeongJae Park /* 983*baa489faSSeongJae Park * This is a bit of a hack. But mprotect() requires 984*baa489faSSeongJae Park * huge-page-aligned sizes when operating on hugetlbfs. 985*baa489faSSeongJae Park * So, make sure that we use something that's a multiple 986*baa489faSSeongJae Park * of a huge page when we can. 987*baa489faSSeongJae Park */ 988*baa489faSSeongJae Park if (size >= HPAGE_SIZE) 989*baa489faSSeongJae Park size = HPAGE_SIZE; 990*baa489faSSeongJae Park 991*baa489faSSeongJae Park /* allocate every possible key and make sure key-0 never got allocated */ 992*baa489faSSeongJae Park max_nr_pkey_allocs = NR_PKEYS; 993*baa489faSSeongJae Park for (i = 0; i < max_nr_pkey_allocs; i++) { 994*baa489faSSeongJae Park int new_pkey = alloc_pkey(); 995*baa489faSSeongJae Park pkey_assert(new_pkey != 0); 996*baa489faSSeongJae Park 997*baa489faSSeongJae Park if (new_pkey < 0) 998*baa489faSSeongJae Park break; 999*baa489faSSeongJae Park alloced_pkeys[nr_alloced++] = new_pkey; 1000*baa489faSSeongJae Park } 1001*baa489faSSeongJae Park /* free all the allocated keys */ 1002*baa489faSSeongJae Park for (i = 0; i < nr_alloced; i++) { 1003*baa489faSSeongJae Park int free_ret; 1004*baa489faSSeongJae Park 1005*baa489faSSeongJae Park if (!alloced_pkeys[i]) 1006*baa489faSSeongJae Park continue; 1007*baa489faSSeongJae Park free_ret = sys_pkey_free(alloced_pkeys[i]); 1008*baa489faSSeongJae Park pkey_assert(!free_ret); 1009*baa489faSSeongJae Park } 1010*baa489faSSeongJae Park 1011*baa489faSSeongJae Park /* attach key-0 in various modes */ 1012*baa489faSSeongJae Park err = sys_mprotect_pkey(ptr, size, PROT_READ, 0); 1013*baa489faSSeongJae Park pkey_assert(!err); 1014*baa489faSSeongJae Park err = sys_mprotect_pkey(ptr, size, PROT_WRITE, 0); 1015*baa489faSSeongJae Park pkey_assert(!err); 1016*baa489faSSeongJae Park err = sys_mprotect_pkey(ptr, size, PROT_EXEC, 0); 1017*baa489faSSeongJae Park pkey_assert(!err); 1018*baa489faSSeongJae Park err = sys_mprotect_pkey(ptr, size, PROT_READ|PROT_WRITE, 0); 1019*baa489faSSeongJae Park pkey_assert(!err); 1020*baa489faSSeongJae Park err = sys_mprotect_pkey(ptr, size, PROT_READ|PROT_WRITE|PROT_EXEC, 0); 1021*baa489faSSeongJae Park pkey_assert(!err); 1022*baa489faSSeongJae Park } 1023*baa489faSSeongJae Park 1024*baa489faSSeongJae Park void test_read_of_write_disabled_region(int *ptr, u16 pkey) 1025*baa489faSSeongJae Park { 1026*baa489faSSeongJae Park int ptr_contents; 1027*baa489faSSeongJae Park 1028*baa489faSSeongJae Park dprintf1("disabling write access to PKEY[1], doing read\n"); 1029*baa489faSSeongJae Park pkey_write_deny(pkey); 1030*baa489faSSeongJae Park ptr_contents = read_ptr(ptr); 1031*baa489faSSeongJae Park dprintf1("*ptr: %d\n", ptr_contents); 1032*baa489faSSeongJae Park dprintf1("\n"); 1033*baa489faSSeongJae Park } 1034*baa489faSSeongJae Park void test_read_of_access_disabled_region(int *ptr, u16 pkey) 1035*baa489faSSeongJae Park { 1036*baa489faSSeongJae Park int ptr_contents; 1037*baa489faSSeongJae Park 1038*baa489faSSeongJae Park dprintf1("disabling access to PKEY[%02d], doing read @ %p\n", pkey, ptr); 1039*baa489faSSeongJae Park read_pkey_reg(); 1040*baa489faSSeongJae Park pkey_access_deny(pkey); 1041*baa489faSSeongJae Park ptr_contents = read_ptr(ptr); 1042*baa489faSSeongJae Park dprintf1("*ptr: %d\n", ptr_contents); 1043*baa489faSSeongJae Park expected_pkey_fault(pkey); 1044*baa489faSSeongJae Park } 1045*baa489faSSeongJae Park 1046*baa489faSSeongJae Park void test_read_of_access_disabled_region_with_page_already_mapped(int *ptr, 1047*baa489faSSeongJae Park u16 pkey) 1048*baa489faSSeongJae Park { 1049*baa489faSSeongJae Park int ptr_contents; 1050*baa489faSSeongJae Park 1051*baa489faSSeongJae Park dprintf1("disabling access to PKEY[%02d], doing read @ %p\n", 1052*baa489faSSeongJae Park pkey, ptr); 1053*baa489faSSeongJae Park ptr_contents = read_ptr(ptr); 1054*baa489faSSeongJae Park dprintf1("reading ptr before disabling the read : %d\n", 1055*baa489faSSeongJae Park ptr_contents); 1056*baa489faSSeongJae Park read_pkey_reg(); 1057*baa489faSSeongJae Park pkey_access_deny(pkey); 1058*baa489faSSeongJae Park ptr_contents = read_ptr(ptr); 1059*baa489faSSeongJae Park dprintf1("*ptr: %d\n", ptr_contents); 1060*baa489faSSeongJae Park expected_pkey_fault(pkey); 1061*baa489faSSeongJae Park } 1062*baa489faSSeongJae Park 1063*baa489faSSeongJae Park void test_write_of_write_disabled_region_with_page_already_mapped(int *ptr, 1064*baa489faSSeongJae Park u16 pkey) 1065*baa489faSSeongJae Park { 1066*baa489faSSeongJae Park *ptr = __LINE__; 1067*baa489faSSeongJae Park dprintf1("disabling write access; after accessing the page, " 1068*baa489faSSeongJae Park "to PKEY[%02d], doing write\n", pkey); 1069*baa489faSSeongJae Park pkey_write_deny(pkey); 1070*baa489faSSeongJae Park *ptr = __LINE__; 1071*baa489faSSeongJae Park expected_pkey_fault(pkey); 1072*baa489faSSeongJae Park } 1073*baa489faSSeongJae Park 1074*baa489faSSeongJae Park void test_write_of_write_disabled_region(int *ptr, u16 pkey) 1075*baa489faSSeongJae Park { 1076*baa489faSSeongJae Park dprintf1("disabling write access to PKEY[%02d], doing write\n", pkey); 1077*baa489faSSeongJae Park pkey_write_deny(pkey); 1078*baa489faSSeongJae Park *ptr = __LINE__; 1079*baa489faSSeongJae Park expected_pkey_fault(pkey); 1080*baa489faSSeongJae Park } 1081*baa489faSSeongJae Park void test_write_of_access_disabled_region(int *ptr, u16 pkey) 1082*baa489faSSeongJae Park { 1083*baa489faSSeongJae Park dprintf1("disabling access to PKEY[%02d], doing write\n", pkey); 1084*baa489faSSeongJae Park pkey_access_deny(pkey); 1085*baa489faSSeongJae Park *ptr = __LINE__; 1086*baa489faSSeongJae Park expected_pkey_fault(pkey); 1087*baa489faSSeongJae Park } 1088*baa489faSSeongJae Park 1089*baa489faSSeongJae Park void test_write_of_access_disabled_region_with_page_already_mapped(int *ptr, 1090*baa489faSSeongJae Park u16 pkey) 1091*baa489faSSeongJae Park { 1092*baa489faSSeongJae Park *ptr = __LINE__; 1093*baa489faSSeongJae Park dprintf1("disabling access; after accessing the page, " 1094*baa489faSSeongJae Park " to PKEY[%02d], doing write\n", pkey); 1095*baa489faSSeongJae Park pkey_access_deny(pkey); 1096*baa489faSSeongJae Park *ptr = __LINE__; 1097*baa489faSSeongJae Park expected_pkey_fault(pkey); 1098*baa489faSSeongJae Park } 1099*baa489faSSeongJae Park 1100*baa489faSSeongJae Park void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey) 1101*baa489faSSeongJae Park { 1102*baa489faSSeongJae Park int ret; 1103*baa489faSSeongJae Park int test_fd = get_test_read_fd(); 1104*baa489faSSeongJae Park 1105*baa489faSSeongJae Park dprintf1("disabling access to PKEY[%02d], " 1106*baa489faSSeongJae Park "having kernel read() to buffer\n", pkey); 1107*baa489faSSeongJae Park pkey_access_deny(pkey); 1108*baa489faSSeongJae Park ret = read(test_fd, ptr, 1); 1109*baa489faSSeongJae Park dprintf1("read ret: %d\n", ret); 1110*baa489faSSeongJae Park pkey_assert(ret); 1111*baa489faSSeongJae Park } 1112*baa489faSSeongJae Park void test_kernel_write_of_write_disabled_region(int *ptr, u16 pkey) 1113*baa489faSSeongJae Park { 1114*baa489faSSeongJae Park int ret; 1115*baa489faSSeongJae Park int test_fd = get_test_read_fd(); 1116*baa489faSSeongJae Park 1117*baa489faSSeongJae Park pkey_write_deny(pkey); 1118*baa489faSSeongJae Park ret = read(test_fd, ptr, 100); 1119*baa489faSSeongJae Park dprintf1("read ret: %d\n", ret); 1120*baa489faSSeongJae Park if (ret < 0 && (DEBUG_LEVEL > 0)) 1121*baa489faSSeongJae Park perror("verbose read result (OK for this to be bad)"); 1122*baa489faSSeongJae Park pkey_assert(ret); 1123*baa489faSSeongJae Park } 1124*baa489faSSeongJae Park 1125*baa489faSSeongJae Park void test_kernel_gup_of_access_disabled_region(int *ptr, u16 pkey) 1126*baa489faSSeongJae Park { 1127*baa489faSSeongJae Park int pipe_ret, vmsplice_ret; 1128*baa489faSSeongJae Park struct iovec iov; 1129*baa489faSSeongJae Park int pipe_fds[2]; 1130*baa489faSSeongJae Park 1131*baa489faSSeongJae Park pipe_ret = pipe(pipe_fds); 1132*baa489faSSeongJae Park 1133*baa489faSSeongJae Park pkey_assert(pipe_ret == 0); 1134*baa489faSSeongJae Park dprintf1("disabling access to PKEY[%02d], " 1135*baa489faSSeongJae Park "having kernel vmsplice from buffer\n", pkey); 1136*baa489faSSeongJae Park pkey_access_deny(pkey); 1137*baa489faSSeongJae Park iov.iov_base = ptr; 1138*baa489faSSeongJae Park iov.iov_len = PAGE_SIZE; 1139*baa489faSSeongJae Park vmsplice_ret = vmsplice(pipe_fds[1], &iov, 1, SPLICE_F_GIFT); 1140*baa489faSSeongJae Park dprintf1("vmsplice() ret: %d\n", vmsplice_ret); 1141*baa489faSSeongJae Park pkey_assert(vmsplice_ret == -1); 1142*baa489faSSeongJae Park 1143*baa489faSSeongJae Park close(pipe_fds[0]); 1144*baa489faSSeongJae Park close(pipe_fds[1]); 1145*baa489faSSeongJae Park } 1146*baa489faSSeongJae Park 1147*baa489faSSeongJae Park void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey) 1148*baa489faSSeongJae Park { 1149*baa489faSSeongJae Park int ignored = 0xdada; 1150*baa489faSSeongJae Park int futex_ret; 1151*baa489faSSeongJae Park int some_int = __LINE__; 1152*baa489faSSeongJae Park 1153*baa489faSSeongJae Park dprintf1("disabling write to PKEY[%02d], " 1154*baa489faSSeongJae Park "doing futex gunk in buffer\n", pkey); 1155*baa489faSSeongJae Park *ptr = some_int; 1156*baa489faSSeongJae Park pkey_write_deny(pkey); 1157*baa489faSSeongJae Park futex_ret = syscall(SYS_futex, ptr, FUTEX_WAIT, some_int-1, NULL, 1158*baa489faSSeongJae Park &ignored, ignored); 1159*baa489faSSeongJae Park if (DEBUG_LEVEL > 0) 1160*baa489faSSeongJae Park perror("futex"); 1161*baa489faSSeongJae Park dprintf1("futex() ret: %d\n", futex_ret); 1162*baa489faSSeongJae Park } 1163*baa489faSSeongJae Park 1164*baa489faSSeongJae Park /* Assumes that all pkeys other than 'pkey' are unallocated */ 1165*baa489faSSeongJae Park void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey) 1166*baa489faSSeongJae Park { 1167*baa489faSSeongJae Park int err; 1168*baa489faSSeongJae Park int i; 1169*baa489faSSeongJae Park 1170*baa489faSSeongJae Park /* Note: 0 is the default pkey, so don't mess with it */ 1171*baa489faSSeongJae Park for (i = 1; i < NR_PKEYS; i++) { 1172*baa489faSSeongJae Park if (pkey == i) 1173*baa489faSSeongJae Park continue; 1174*baa489faSSeongJae Park 1175*baa489faSSeongJae Park dprintf1("trying get/set/free to non-allocated pkey: %2d\n", i); 1176*baa489faSSeongJae Park err = sys_pkey_free(i); 1177*baa489faSSeongJae Park pkey_assert(err); 1178*baa489faSSeongJae Park 1179*baa489faSSeongJae Park err = sys_pkey_free(i); 1180*baa489faSSeongJae Park pkey_assert(err); 1181*baa489faSSeongJae Park 1182*baa489faSSeongJae Park err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, i); 1183*baa489faSSeongJae Park pkey_assert(err); 1184*baa489faSSeongJae Park } 1185*baa489faSSeongJae Park } 1186*baa489faSSeongJae Park 1187*baa489faSSeongJae Park /* Assumes that all pkeys other than 'pkey' are unallocated */ 1188*baa489faSSeongJae Park void test_pkey_syscalls_bad_args(int *ptr, u16 pkey) 1189*baa489faSSeongJae Park { 1190*baa489faSSeongJae Park int err; 1191*baa489faSSeongJae Park int bad_pkey = NR_PKEYS+99; 1192*baa489faSSeongJae Park 1193*baa489faSSeongJae Park /* pass a known-invalid pkey in: */ 1194*baa489faSSeongJae Park err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, bad_pkey); 1195*baa489faSSeongJae Park pkey_assert(err); 1196*baa489faSSeongJae Park } 1197*baa489faSSeongJae Park 1198*baa489faSSeongJae Park void become_child(void) 1199*baa489faSSeongJae Park { 1200*baa489faSSeongJae Park pid_t forkret; 1201*baa489faSSeongJae Park 1202*baa489faSSeongJae Park forkret = fork(); 1203*baa489faSSeongJae Park pkey_assert(forkret >= 0); 1204*baa489faSSeongJae Park dprintf3("[%d] fork() ret: %d\n", getpid(), forkret); 1205*baa489faSSeongJae Park 1206*baa489faSSeongJae Park if (!forkret) { 1207*baa489faSSeongJae Park /* in the child */ 1208*baa489faSSeongJae Park return; 1209*baa489faSSeongJae Park } 1210*baa489faSSeongJae Park exit(0); 1211*baa489faSSeongJae Park } 1212*baa489faSSeongJae Park 1213*baa489faSSeongJae Park /* Assumes that all pkeys other than 'pkey' are unallocated */ 1214*baa489faSSeongJae Park void test_pkey_alloc_exhaust(int *ptr, u16 pkey) 1215*baa489faSSeongJae Park { 1216*baa489faSSeongJae Park int err; 1217*baa489faSSeongJae Park int allocated_pkeys[NR_PKEYS] = {0}; 1218*baa489faSSeongJae Park int nr_allocated_pkeys = 0; 1219*baa489faSSeongJae Park int i; 1220*baa489faSSeongJae Park 1221*baa489faSSeongJae Park for (i = 0; i < NR_PKEYS*3; i++) { 1222*baa489faSSeongJae Park int new_pkey; 1223*baa489faSSeongJae Park dprintf1("%s() alloc loop: %d\n", __func__, i); 1224*baa489faSSeongJae Park new_pkey = alloc_pkey(); 1225*baa489faSSeongJae Park dprintf4("%s()::%d, err: %d pkey_reg: 0x%016llx" 1226*baa489faSSeongJae Park " shadow: 0x%016llx\n", 1227*baa489faSSeongJae Park __func__, __LINE__, err, __read_pkey_reg(), 1228*baa489faSSeongJae Park shadow_pkey_reg); 1229*baa489faSSeongJae Park read_pkey_reg(); /* for shadow checking */ 1230*baa489faSSeongJae Park dprintf2("%s() errno: %d ENOSPC: %d\n", __func__, errno, ENOSPC); 1231*baa489faSSeongJae Park if ((new_pkey == -1) && (errno == ENOSPC)) { 1232*baa489faSSeongJae Park dprintf2("%s() failed to allocate pkey after %d tries\n", 1233*baa489faSSeongJae Park __func__, nr_allocated_pkeys); 1234*baa489faSSeongJae Park } else { 1235*baa489faSSeongJae Park /* 1236*baa489faSSeongJae Park * Ensure the number of successes never 1237*baa489faSSeongJae Park * exceeds the number of keys supported 1238*baa489faSSeongJae Park * in the hardware. 1239*baa489faSSeongJae Park */ 1240*baa489faSSeongJae Park pkey_assert(nr_allocated_pkeys < NR_PKEYS); 1241*baa489faSSeongJae Park allocated_pkeys[nr_allocated_pkeys++] = new_pkey; 1242*baa489faSSeongJae Park } 1243*baa489faSSeongJae Park 1244*baa489faSSeongJae Park /* 1245*baa489faSSeongJae Park * Make sure that allocation state is properly 1246*baa489faSSeongJae Park * preserved across fork(). 1247*baa489faSSeongJae Park */ 1248*baa489faSSeongJae Park if (i == NR_PKEYS*2) 1249*baa489faSSeongJae Park become_child(); 1250*baa489faSSeongJae Park } 1251*baa489faSSeongJae Park 1252*baa489faSSeongJae Park dprintf3("%s()::%d\n", __func__, __LINE__); 1253*baa489faSSeongJae Park 1254*baa489faSSeongJae Park /* 1255*baa489faSSeongJae Park * On x86: 1256*baa489faSSeongJae Park * There are 16 pkeys supported in hardware. Three are 1257*baa489faSSeongJae Park * allocated by the time we get here: 1258*baa489faSSeongJae Park * 1. The default key (0) 1259*baa489faSSeongJae Park * 2. One possibly consumed by an execute-only mapping. 1260*baa489faSSeongJae Park * 3. One allocated by the test code and passed in via 1261*baa489faSSeongJae Park * 'pkey' to this function. 1262*baa489faSSeongJae Park * Ensure that we can allocate at least another 13 (16-3). 1263*baa489faSSeongJae Park * 1264*baa489faSSeongJae Park * On powerpc: 1265*baa489faSSeongJae Park * There are either 5, 28, 29 or 32 pkeys supported in 1266*baa489faSSeongJae Park * hardware depending on the page size (4K or 64K) and 1267*baa489faSSeongJae Park * platform (powernv or powervm). Four are allocated by 1268*baa489faSSeongJae Park * the time we get here. These include pkey-0, pkey-1, 1269*baa489faSSeongJae Park * exec-only pkey and the one allocated by the test code. 1270*baa489faSSeongJae Park * Ensure that we can allocate the remaining. 1271*baa489faSSeongJae Park */ 1272*baa489faSSeongJae Park pkey_assert(i >= (NR_PKEYS - get_arch_reserved_keys() - 1)); 1273*baa489faSSeongJae Park 1274*baa489faSSeongJae Park for (i = 0; i < nr_allocated_pkeys; i++) { 1275*baa489faSSeongJae Park err = sys_pkey_free(allocated_pkeys[i]); 1276*baa489faSSeongJae Park pkey_assert(!err); 1277*baa489faSSeongJae Park read_pkey_reg(); /* for shadow checking */ 1278*baa489faSSeongJae Park } 1279*baa489faSSeongJae Park } 1280*baa489faSSeongJae Park 1281*baa489faSSeongJae Park void arch_force_pkey_reg_init(void) 1282*baa489faSSeongJae Park { 1283*baa489faSSeongJae Park #if defined(__i386__) || defined(__x86_64__) /* arch */ 1284*baa489faSSeongJae Park u64 *buf; 1285*baa489faSSeongJae Park 1286*baa489faSSeongJae Park /* 1287*baa489faSSeongJae Park * All keys should be allocated and set to allow reads and 1288*baa489faSSeongJae Park * writes, so the register should be all 0. If not, just 1289*baa489faSSeongJae Park * skip the test. 1290*baa489faSSeongJae Park */ 1291*baa489faSSeongJae Park if (read_pkey_reg()) 1292*baa489faSSeongJae Park return; 1293*baa489faSSeongJae Park 1294*baa489faSSeongJae Park /* 1295*baa489faSSeongJae Park * Just allocate an absurd about of memory rather than 1296*baa489faSSeongJae Park * doing the XSAVE size enumeration dance. 1297*baa489faSSeongJae Park */ 1298*baa489faSSeongJae Park buf = mmap(NULL, 1*MB, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); 1299*baa489faSSeongJae Park 1300*baa489faSSeongJae Park /* These __builtins require compiling with -mxsave */ 1301*baa489faSSeongJae Park 1302*baa489faSSeongJae Park /* XSAVE to build a valid buffer: */ 1303*baa489faSSeongJae Park __builtin_ia32_xsave(buf, XSTATE_PKEY); 1304*baa489faSSeongJae Park /* Clear XSTATE_BV[PKRU]: */ 1305*baa489faSSeongJae Park buf[XSTATE_BV_OFFSET/sizeof(u64)] &= ~XSTATE_PKEY; 1306*baa489faSSeongJae Park /* XRSTOR will likely get PKRU back to the init state: */ 1307*baa489faSSeongJae Park __builtin_ia32_xrstor(buf, XSTATE_PKEY); 1308*baa489faSSeongJae Park 1309*baa489faSSeongJae Park munmap(buf, 1*MB); 1310*baa489faSSeongJae Park #endif 1311*baa489faSSeongJae Park } 1312*baa489faSSeongJae Park 1313*baa489faSSeongJae Park 1314*baa489faSSeongJae Park /* 1315*baa489faSSeongJae Park * This is mostly useless on ppc for now. But it will not 1316*baa489faSSeongJae Park * hurt anything and should give some better coverage as 1317*baa489faSSeongJae Park * a long-running test that continually checks the pkey 1318*baa489faSSeongJae Park * register. 1319*baa489faSSeongJae Park */ 1320*baa489faSSeongJae Park void test_pkey_init_state(int *ptr, u16 pkey) 1321*baa489faSSeongJae Park { 1322*baa489faSSeongJae Park int err; 1323*baa489faSSeongJae Park int allocated_pkeys[NR_PKEYS] = {0}; 1324*baa489faSSeongJae Park int nr_allocated_pkeys = 0; 1325*baa489faSSeongJae Park int i; 1326*baa489faSSeongJae Park 1327*baa489faSSeongJae Park for (i = 0; i < NR_PKEYS; i++) { 1328*baa489faSSeongJae Park int new_pkey = alloc_pkey(); 1329*baa489faSSeongJae Park 1330*baa489faSSeongJae Park if (new_pkey < 0) 1331*baa489faSSeongJae Park continue; 1332*baa489faSSeongJae Park allocated_pkeys[nr_allocated_pkeys++] = new_pkey; 1333*baa489faSSeongJae Park } 1334*baa489faSSeongJae Park 1335*baa489faSSeongJae Park dprintf3("%s()::%d\n", __func__, __LINE__); 1336*baa489faSSeongJae Park 1337*baa489faSSeongJae Park arch_force_pkey_reg_init(); 1338*baa489faSSeongJae Park 1339*baa489faSSeongJae Park /* 1340*baa489faSSeongJae Park * Loop for a bit, hoping to get exercise the kernel 1341*baa489faSSeongJae Park * context switch code. 1342*baa489faSSeongJae Park */ 1343*baa489faSSeongJae Park for (i = 0; i < 1000000; i++) 1344*baa489faSSeongJae Park read_pkey_reg(); 1345*baa489faSSeongJae Park 1346*baa489faSSeongJae Park for (i = 0; i < nr_allocated_pkeys; i++) { 1347*baa489faSSeongJae Park err = sys_pkey_free(allocated_pkeys[i]); 1348*baa489faSSeongJae Park pkey_assert(!err); 1349*baa489faSSeongJae Park read_pkey_reg(); /* for shadow checking */ 1350*baa489faSSeongJae Park } 1351*baa489faSSeongJae Park } 1352*baa489faSSeongJae Park 1353*baa489faSSeongJae Park /* 1354*baa489faSSeongJae Park * pkey 0 is special. It is allocated by default, so you do not 1355*baa489faSSeongJae Park * have to call pkey_alloc() to use it first. Make sure that it 1356*baa489faSSeongJae Park * is usable. 1357*baa489faSSeongJae Park */ 1358*baa489faSSeongJae Park void test_mprotect_with_pkey_0(int *ptr, u16 pkey) 1359*baa489faSSeongJae Park { 1360*baa489faSSeongJae Park long size; 1361*baa489faSSeongJae Park int prot; 1362*baa489faSSeongJae Park 1363*baa489faSSeongJae Park assert(pkey_last_malloc_record); 1364*baa489faSSeongJae Park size = pkey_last_malloc_record->size; 1365*baa489faSSeongJae Park /* 1366*baa489faSSeongJae Park * This is a bit of a hack. But mprotect() requires 1367*baa489faSSeongJae Park * huge-page-aligned sizes when operating on hugetlbfs. 1368*baa489faSSeongJae Park * So, make sure that we use something that's a multiple 1369*baa489faSSeongJae Park * of a huge page when we can. 1370*baa489faSSeongJae Park */ 1371*baa489faSSeongJae Park if (size >= HPAGE_SIZE) 1372*baa489faSSeongJae Park size = HPAGE_SIZE; 1373*baa489faSSeongJae Park prot = pkey_last_malloc_record->prot; 1374*baa489faSSeongJae Park 1375*baa489faSSeongJae Park /* Use pkey 0 */ 1376*baa489faSSeongJae Park mprotect_pkey(ptr, size, prot, 0); 1377*baa489faSSeongJae Park 1378*baa489faSSeongJae Park /* Make sure that we can set it back to the original pkey. */ 1379*baa489faSSeongJae Park mprotect_pkey(ptr, size, prot, pkey); 1380*baa489faSSeongJae Park } 1381*baa489faSSeongJae Park 1382*baa489faSSeongJae Park void test_ptrace_of_child(int *ptr, u16 pkey) 1383*baa489faSSeongJae Park { 1384*baa489faSSeongJae Park __attribute__((__unused__)) int peek_result; 1385*baa489faSSeongJae Park pid_t child_pid; 1386*baa489faSSeongJae Park void *ignored = 0; 1387*baa489faSSeongJae Park long ret; 1388*baa489faSSeongJae Park int status; 1389*baa489faSSeongJae Park /* 1390*baa489faSSeongJae Park * This is the "control" for our little expermient. Make sure 1391*baa489faSSeongJae Park * we can always access it when ptracing. 1392*baa489faSSeongJae Park */ 1393*baa489faSSeongJae Park int *plain_ptr_unaligned = malloc(HPAGE_SIZE); 1394*baa489faSSeongJae Park int *plain_ptr = ALIGN_PTR_UP(plain_ptr_unaligned, PAGE_SIZE); 1395*baa489faSSeongJae Park 1396*baa489faSSeongJae Park /* 1397*baa489faSSeongJae Park * Fork a child which is an exact copy of this process, of course. 1398*baa489faSSeongJae Park * That means we can do all of our tests via ptrace() and then plain 1399*baa489faSSeongJae Park * memory access and ensure they work differently. 1400*baa489faSSeongJae Park */ 1401*baa489faSSeongJae Park child_pid = fork_lazy_child(); 1402*baa489faSSeongJae Park dprintf1("[%d] child pid: %d\n", getpid(), child_pid); 1403*baa489faSSeongJae Park 1404*baa489faSSeongJae Park ret = ptrace(PTRACE_ATTACH, child_pid, ignored, ignored); 1405*baa489faSSeongJae Park if (ret) 1406*baa489faSSeongJae Park perror("attach"); 1407*baa489faSSeongJae Park dprintf1("[%d] attach ret: %ld %d\n", getpid(), ret, __LINE__); 1408*baa489faSSeongJae Park pkey_assert(ret != -1); 1409*baa489faSSeongJae Park ret = waitpid(child_pid, &status, WUNTRACED); 1410*baa489faSSeongJae Park if ((ret != child_pid) || !(WIFSTOPPED(status))) { 1411*baa489faSSeongJae Park fprintf(stderr, "weird waitpid result %ld stat %x\n", 1412*baa489faSSeongJae Park ret, status); 1413*baa489faSSeongJae Park pkey_assert(0); 1414*baa489faSSeongJae Park } 1415*baa489faSSeongJae Park dprintf2("waitpid ret: %ld\n", ret); 1416*baa489faSSeongJae Park dprintf2("waitpid status: %d\n", status); 1417*baa489faSSeongJae Park 1418*baa489faSSeongJae Park pkey_access_deny(pkey); 1419*baa489faSSeongJae Park pkey_write_deny(pkey); 1420*baa489faSSeongJae Park 1421*baa489faSSeongJae Park /* Write access, untested for now: 1422*baa489faSSeongJae Park ret = ptrace(PTRACE_POKEDATA, child_pid, peek_at, data); 1423*baa489faSSeongJae Park pkey_assert(ret != -1); 1424*baa489faSSeongJae Park dprintf1("poke at %p: %ld\n", peek_at, ret); 1425*baa489faSSeongJae Park */ 1426*baa489faSSeongJae Park 1427*baa489faSSeongJae Park /* 1428*baa489faSSeongJae Park * Try to access the pkey-protected "ptr" via ptrace: 1429*baa489faSSeongJae Park */ 1430*baa489faSSeongJae Park ret = ptrace(PTRACE_PEEKDATA, child_pid, ptr, ignored); 1431*baa489faSSeongJae Park /* expect it to work, without an error: */ 1432*baa489faSSeongJae Park pkey_assert(ret != -1); 1433*baa489faSSeongJae Park /* Now access from the current task, and expect an exception: */ 1434*baa489faSSeongJae Park peek_result = read_ptr(ptr); 1435*baa489faSSeongJae Park expected_pkey_fault(pkey); 1436*baa489faSSeongJae Park 1437*baa489faSSeongJae Park /* 1438*baa489faSSeongJae Park * Try to access the NON-pkey-protected "plain_ptr" via ptrace: 1439*baa489faSSeongJae Park */ 1440*baa489faSSeongJae Park ret = ptrace(PTRACE_PEEKDATA, child_pid, plain_ptr, ignored); 1441*baa489faSSeongJae Park /* expect it to work, without an error: */ 1442*baa489faSSeongJae Park pkey_assert(ret != -1); 1443*baa489faSSeongJae Park /* Now access from the current task, and expect NO exception: */ 1444*baa489faSSeongJae Park peek_result = read_ptr(plain_ptr); 1445*baa489faSSeongJae Park do_not_expect_pkey_fault("read plain pointer after ptrace"); 1446*baa489faSSeongJae Park 1447*baa489faSSeongJae Park ret = ptrace(PTRACE_DETACH, child_pid, ignored, 0); 1448*baa489faSSeongJae Park pkey_assert(ret != -1); 1449*baa489faSSeongJae Park 1450*baa489faSSeongJae Park ret = kill(child_pid, SIGKILL); 1451*baa489faSSeongJae Park pkey_assert(ret != -1); 1452*baa489faSSeongJae Park 1453*baa489faSSeongJae Park wait(&status); 1454*baa489faSSeongJae Park 1455*baa489faSSeongJae Park free(plain_ptr_unaligned); 1456*baa489faSSeongJae Park } 1457*baa489faSSeongJae Park 1458*baa489faSSeongJae Park void *get_pointer_to_instructions(void) 1459*baa489faSSeongJae Park { 1460*baa489faSSeongJae Park void *p1; 1461*baa489faSSeongJae Park 1462*baa489faSSeongJae Park p1 = ALIGN_PTR_UP(&lots_o_noops_around_write, PAGE_SIZE); 1463*baa489faSSeongJae Park dprintf3("&lots_o_noops: %p\n", &lots_o_noops_around_write); 1464*baa489faSSeongJae Park /* lots_o_noops_around_write should be page-aligned already */ 1465*baa489faSSeongJae Park assert(p1 == &lots_o_noops_around_write); 1466*baa489faSSeongJae Park 1467*baa489faSSeongJae Park /* Point 'p1' at the *second* page of the function: */ 1468*baa489faSSeongJae Park p1 += PAGE_SIZE; 1469*baa489faSSeongJae Park 1470*baa489faSSeongJae Park /* 1471*baa489faSSeongJae Park * Try to ensure we fault this in on next touch to ensure 1472*baa489faSSeongJae Park * we get an instruction fault as opposed to a data one 1473*baa489faSSeongJae Park */ 1474*baa489faSSeongJae Park madvise(p1, PAGE_SIZE, MADV_DONTNEED); 1475*baa489faSSeongJae Park 1476*baa489faSSeongJae Park return p1; 1477*baa489faSSeongJae Park } 1478*baa489faSSeongJae Park 1479*baa489faSSeongJae Park void test_executing_on_unreadable_memory(int *ptr, u16 pkey) 1480*baa489faSSeongJae Park { 1481*baa489faSSeongJae Park void *p1; 1482*baa489faSSeongJae Park int scratch; 1483*baa489faSSeongJae Park int ptr_contents; 1484*baa489faSSeongJae Park int ret; 1485*baa489faSSeongJae Park 1486*baa489faSSeongJae Park p1 = get_pointer_to_instructions(); 1487*baa489faSSeongJae Park lots_o_noops_around_write(&scratch); 1488*baa489faSSeongJae Park ptr_contents = read_ptr(p1); 1489*baa489faSSeongJae Park dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); 1490*baa489faSSeongJae Park 1491*baa489faSSeongJae Park ret = mprotect_pkey(p1, PAGE_SIZE, PROT_EXEC, (u64)pkey); 1492*baa489faSSeongJae Park pkey_assert(!ret); 1493*baa489faSSeongJae Park pkey_access_deny(pkey); 1494*baa489faSSeongJae Park 1495*baa489faSSeongJae Park dprintf2("pkey_reg: %016llx\n", read_pkey_reg()); 1496*baa489faSSeongJae Park 1497*baa489faSSeongJae Park /* 1498*baa489faSSeongJae Park * Make sure this is an *instruction* fault 1499*baa489faSSeongJae Park */ 1500*baa489faSSeongJae Park madvise(p1, PAGE_SIZE, MADV_DONTNEED); 1501*baa489faSSeongJae Park lots_o_noops_around_write(&scratch); 1502*baa489faSSeongJae Park do_not_expect_pkey_fault("executing on PROT_EXEC memory"); 1503*baa489faSSeongJae Park expect_fault_on_read_execonly_key(p1, pkey); 1504*baa489faSSeongJae Park } 1505*baa489faSSeongJae Park 1506*baa489faSSeongJae Park void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey) 1507*baa489faSSeongJae Park { 1508*baa489faSSeongJae Park void *p1; 1509*baa489faSSeongJae Park int scratch; 1510*baa489faSSeongJae Park int ptr_contents; 1511*baa489faSSeongJae Park int ret; 1512*baa489faSSeongJae Park 1513*baa489faSSeongJae Park dprintf1("%s() start\n", __func__); 1514*baa489faSSeongJae Park 1515*baa489faSSeongJae Park p1 = get_pointer_to_instructions(); 1516*baa489faSSeongJae Park lots_o_noops_around_write(&scratch); 1517*baa489faSSeongJae Park ptr_contents = read_ptr(p1); 1518*baa489faSSeongJae Park dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); 1519*baa489faSSeongJae Park 1520*baa489faSSeongJae Park /* Use a *normal* mprotect(), not mprotect_pkey(): */ 1521*baa489faSSeongJae Park ret = mprotect(p1, PAGE_SIZE, PROT_EXEC); 1522*baa489faSSeongJae Park pkey_assert(!ret); 1523*baa489faSSeongJae Park 1524*baa489faSSeongJae Park /* 1525*baa489faSSeongJae Park * Reset the shadow, assuming that the above mprotect() 1526*baa489faSSeongJae Park * correctly changed PKRU, but to an unknown value since 1527*baa489faSSeongJae Park * the actual allocated pkey is unknown. 1528*baa489faSSeongJae Park */ 1529*baa489faSSeongJae Park shadow_pkey_reg = __read_pkey_reg(); 1530*baa489faSSeongJae Park 1531*baa489faSSeongJae Park dprintf2("pkey_reg: %016llx\n", read_pkey_reg()); 1532*baa489faSSeongJae Park 1533*baa489faSSeongJae Park /* Make sure this is an *instruction* fault */ 1534*baa489faSSeongJae Park madvise(p1, PAGE_SIZE, MADV_DONTNEED); 1535*baa489faSSeongJae Park lots_o_noops_around_write(&scratch); 1536*baa489faSSeongJae Park do_not_expect_pkey_fault("executing on PROT_EXEC memory"); 1537*baa489faSSeongJae Park expect_fault_on_read_execonly_key(p1, UNKNOWN_PKEY); 1538*baa489faSSeongJae Park 1539*baa489faSSeongJae Park /* 1540*baa489faSSeongJae Park * Put the memory back to non-PROT_EXEC. Should clear the 1541*baa489faSSeongJae Park * exec-only pkey off the VMA and allow it to be readable 1542*baa489faSSeongJae Park * again. Go to PROT_NONE first to check for a kernel bug 1543*baa489faSSeongJae Park * that did not clear the pkey when doing PROT_NONE. 1544*baa489faSSeongJae Park */ 1545*baa489faSSeongJae Park ret = mprotect(p1, PAGE_SIZE, PROT_NONE); 1546*baa489faSSeongJae Park pkey_assert(!ret); 1547*baa489faSSeongJae Park 1548*baa489faSSeongJae Park ret = mprotect(p1, PAGE_SIZE, PROT_READ|PROT_EXEC); 1549*baa489faSSeongJae Park pkey_assert(!ret); 1550*baa489faSSeongJae Park ptr_contents = read_ptr(p1); 1551*baa489faSSeongJae Park do_not_expect_pkey_fault("plain read on recently PROT_EXEC area"); 1552*baa489faSSeongJae Park } 1553*baa489faSSeongJae Park 1554*baa489faSSeongJae Park #if defined(__i386__) || defined(__x86_64__) 1555*baa489faSSeongJae Park void test_ptrace_modifies_pkru(int *ptr, u16 pkey) 1556*baa489faSSeongJae Park { 1557*baa489faSSeongJae Park u32 new_pkru; 1558*baa489faSSeongJae Park pid_t child; 1559*baa489faSSeongJae Park int status, ret; 1560*baa489faSSeongJae Park int pkey_offset = pkey_reg_xstate_offset(); 1561*baa489faSSeongJae Park size_t xsave_size = cpu_max_xsave_size(); 1562*baa489faSSeongJae Park void *xsave; 1563*baa489faSSeongJae Park u32 *pkey_register; 1564*baa489faSSeongJae Park u64 *xstate_bv; 1565*baa489faSSeongJae Park struct iovec iov; 1566*baa489faSSeongJae Park 1567*baa489faSSeongJae Park new_pkru = ~read_pkey_reg(); 1568*baa489faSSeongJae Park /* Don't make PROT_EXEC mappings inaccessible */ 1569*baa489faSSeongJae Park new_pkru &= ~3; 1570*baa489faSSeongJae Park 1571*baa489faSSeongJae Park child = fork(); 1572*baa489faSSeongJae Park pkey_assert(child >= 0); 1573*baa489faSSeongJae Park dprintf3("[%d] fork() ret: %d\n", getpid(), child); 1574*baa489faSSeongJae Park if (!child) { 1575*baa489faSSeongJae Park ptrace(PTRACE_TRACEME, 0, 0, 0); 1576*baa489faSSeongJae Park /* Stop and allow the tracer to modify PKRU directly */ 1577*baa489faSSeongJae Park raise(SIGSTOP); 1578*baa489faSSeongJae Park 1579*baa489faSSeongJae Park /* 1580*baa489faSSeongJae Park * need __read_pkey_reg() version so we do not do shadow_pkey_reg 1581*baa489faSSeongJae Park * checking 1582*baa489faSSeongJae Park */ 1583*baa489faSSeongJae Park if (__read_pkey_reg() != new_pkru) 1584*baa489faSSeongJae Park exit(1); 1585*baa489faSSeongJae Park 1586*baa489faSSeongJae Park /* Stop and allow the tracer to clear XSTATE_BV for PKRU */ 1587*baa489faSSeongJae Park raise(SIGSTOP); 1588*baa489faSSeongJae Park 1589*baa489faSSeongJae Park if (__read_pkey_reg() != 0) 1590*baa489faSSeongJae Park exit(1); 1591*baa489faSSeongJae Park 1592*baa489faSSeongJae Park /* Stop and allow the tracer to examine PKRU */ 1593*baa489faSSeongJae Park raise(SIGSTOP); 1594*baa489faSSeongJae Park 1595*baa489faSSeongJae Park exit(0); 1596*baa489faSSeongJae Park } 1597*baa489faSSeongJae Park 1598*baa489faSSeongJae Park pkey_assert(child == waitpid(child, &status, 0)); 1599*baa489faSSeongJae Park dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status); 1600*baa489faSSeongJae Park pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP); 1601*baa489faSSeongJae Park 1602*baa489faSSeongJae Park xsave = (void *)malloc(xsave_size); 1603*baa489faSSeongJae Park pkey_assert(xsave > 0); 1604*baa489faSSeongJae Park 1605*baa489faSSeongJae Park /* Modify the PKRU register directly */ 1606*baa489faSSeongJae Park iov.iov_base = xsave; 1607*baa489faSSeongJae Park iov.iov_len = xsave_size; 1608*baa489faSSeongJae Park ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov); 1609*baa489faSSeongJae Park pkey_assert(ret == 0); 1610*baa489faSSeongJae Park 1611*baa489faSSeongJae Park pkey_register = (u32 *)(xsave + pkey_offset); 1612*baa489faSSeongJae Park pkey_assert(*pkey_register == read_pkey_reg()); 1613*baa489faSSeongJae Park 1614*baa489faSSeongJae Park *pkey_register = new_pkru; 1615*baa489faSSeongJae Park 1616*baa489faSSeongJae Park ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_X86_XSTATE, &iov); 1617*baa489faSSeongJae Park pkey_assert(ret == 0); 1618*baa489faSSeongJae Park 1619*baa489faSSeongJae Park /* Test that the modification is visible in ptrace before any execution */ 1620*baa489faSSeongJae Park memset(xsave, 0xCC, xsave_size); 1621*baa489faSSeongJae Park ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov); 1622*baa489faSSeongJae Park pkey_assert(ret == 0); 1623*baa489faSSeongJae Park pkey_assert(*pkey_register == new_pkru); 1624*baa489faSSeongJae Park 1625*baa489faSSeongJae Park /* Execute the tracee */ 1626*baa489faSSeongJae Park ret = ptrace(PTRACE_CONT, child, 0, 0); 1627*baa489faSSeongJae Park pkey_assert(ret == 0); 1628*baa489faSSeongJae Park 1629*baa489faSSeongJae Park /* Test that the tracee saw the PKRU value change */ 1630*baa489faSSeongJae Park pkey_assert(child == waitpid(child, &status, 0)); 1631*baa489faSSeongJae Park dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status); 1632*baa489faSSeongJae Park pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP); 1633*baa489faSSeongJae Park 1634*baa489faSSeongJae Park /* Test that the modification is visible in ptrace after execution */ 1635*baa489faSSeongJae Park memset(xsave, 0xCC, xsave_size); 1636*baa489faSSeongJae Park ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov); 1637*baa489faSSeongJae Park pkey_assert(ret == 0); 1638*baa489faSSeongJae Park pkey_assert(*pkey_register == new_pkru); 1639*baa489faSSeongJae Park 1640*baa489faSSeongJae Park /* Clear the PKRU bit from XSTATE_BV */ 1641*baa489faSSeongJae Park xstate_bv = (u64 *)(xsave + 512); 1642*baa489faSSeongJae Park *xstate_bv &= ~(1 << 9); 1643*baa489faSSeongJae Park 1644*baa489faSSeongJae Park ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_X86_XSTATE, &iov); 1645*baa489faSSeongJae Park pkey_assert(ret == 0); 1646*baa489faSSeongJae Park 1647*baa489faSSeongJae Park /* Test that the modification is visible in ptrace before any execution */ 1648*baa489faSSeongJae Park memset(xsave, 0xCC, xsave_size); 1649*baa489faSSeongJae Park ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov); 1650*baa489faSSeongJae Park pkey_assert(ret == 0); 1651*baa489faSSeongJae Park pkey_assert(*pkey_register == 0); 1652*baa489faSSeongJae Park 1653*baa489faSSeongJae Park ret = ptrace(PTRACE_CONT, child, 0, 0); 1654*baa489faSSeongJae Park pkey_assert(ret == 0); 1655*baa489faSSeongJae Park 1656*baa489faSSeongJae Park /* Test that the tracee saw the PKRU value go to 0 */ 1657*baa489faSSeongJae Park pkey_assert(child == waitpid(child, &status, 0)); 1658*baa489faSSeongJae Park dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status); 1659*baa489faSSeongJae Park pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP); 1660*baa489faSSeongJae Park 1661*baa489faSSeongJae Park /* Test that the modification is visible in ptrace after execution */ 1662*baa489faSSeongJae Park memset(xsave, 0xCC, xsave_size); 1663*baa489faSSeongJae Park ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov); 1664*baa489faSSeongJae Park pkey_assert(ret == 0); 1665*baa489faSSeongJae Park pkey_assert(*pkey_register == 0); 1666*baa489faSSeongJae Park 1667*baa489faSSeongJae Park ret = ptrace(PTRACE_CONT, child, 0, 0); 1668*baa489faSSeongJae Park pkey_assert(ret == 0); 1669*baa489faSSeongJae Park pkey_assert(child == waitpid(child, &status, 0)); 1670*baa489faSSeongJae Park dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status); 1671*baa489faSSeongJae Park pkey_assert(WIFEXITED(status)); 1672*baa489faSSeongJae Park pkey_assert(WEXITSTATUS(status) == 0); 1673*baa489faSSeongJae Park free(xsave); 1674*baa489faSSeongJae Park } 1675*baa489faSSeongJae Park #endif 1676*baa489faSSeongJae Park 1677*baa489faSSeongJae Park void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) 1678*baa489faSSeongJae Park { 1679*baa489faSSeongJae Park int size = PAGE_SIZE; 1680*baa489faSSeongJae Park int sret; 1681*baa489faSSeongJae Park 1682*baa489faSSeongJae Park if (cpu_has_pkeys()) { 1683*baa489faSSeongJae Park dprintf1("SKIP: %s: no CPU support\n", __func__); 1684*baa489faSSeongJae Park return; 1685*baa489faSSeongJae Park } 1686*baa489faSSeongJae Park 1687*baa489faSSeongJae Park sret = syscall(SYS_mprotect_key, ptr, size, PROT_READ, pkey); 1688*baa489faSSeongJae Park pkey_assert(sret < 0); 1689*baa489faSSeongJae Park } 1690*baa489faSSeongJae Park 1691*baa489faSSeongJae Park void (*pkey_tests[])(int *ptr, u16 pkey) = { 1692*baa489faSSeongJae Park test_read_of_write_disabled_region, 1693*baa489faSSeongJae Park test_read_of_access_disabled_region, 1694*baa489faSSeongJae Park test_read_of_access_disabled_region_with_page_already_mapped, 1695*baa489faSSeongJae Park test_write_of_write_disabled_region, 1696*baa489faSSeongJae Park test_write_of_write_disabled_region_with_page_already_mapped, 1697*baa489faSSeongJae Park test_write_of_access_disabled_region, 1698*baa489faSSeongJae Park test_write_of_access_disabled_region_with_page_already_mapped, 1699*baa489faSSeongJae Park test_kernel_write_of_access_disabled_region, 1700*baa489faSSeongJae Park test_kernel_write_of_write_disabled_region, 1701*baa489faSSeongJae Park test_kernel_gup_of_access_disabled_region, 1702*baa489faSSeongJae Park test_kernel_gup_write_to_write_disabled_region, 1703*baa489faSSeongJae Park test_executing_on_unreadable_memory, 1704*baa489faSSeongJae Park test_implicit_mprotect_exec_only_memory, 1705*baa489faSSeongJae Park test_mprotect_with_pkey_0, 1706*baa489faSSeongJae Park test_ptrace_of_child, 1707*baa489faSSeongJae Park test_pkey_init_state, 1708*baa489faSSeongJae Park test_pkey_syscalls_on_non_allocated_pkey, 1709*baa489faSSeongJae Park test_pkey_syscalls_bad_args, 1710*baa489faSSeongJae Park test_pkey_alloc_exhaust, 1711*baa489faSSeongJae Park test_pkey_alloc_free_attach_pkey0, 1712*baa489faSSeongJae Park #if defined(__i386__) || defined(__x86_64__) 1713*baa489faSSeongJae Park test_ptrace_modifies_pkru, 1714*baa489faSSeongJae Park #endif 1715*baa489faSSeongJae Park }; 1716*baa489faSSeongJae Park 1717*baa489faSSeongJae Park void run_tests_once(void) 1718*baa489faSSeongJae Park { 1719*baa489faSSeongJae Park int *ptr; 1720*baa489faSSeongJae Park int prot = PROT_READ|PROT_WRITE; 1721*baa489faSSeongJae Park 1722*baa489faSSeongJae Park for (test_nr = 0; test_nr < ARRAY_SIZE(pkey_tests); test_nr++) { 1723*baa489faSSeongJae Park int pkey; 1724*baa489faSSeongJae Park int orig_pkey_faults = pkey_faults; 1725*baa489faSSeongJae Park 1726*baa489faSSeongJae Park dprintf1("======================\n"); 1727*baa489faSSeongJae Park dprintf1("test %d preparing...\n", test_nr); 1728*baa489faSSeongJae Park 1729*baa489faSSeongJae Park tracing_on(); 1730*baa489faSSeongJae Park pkey = alloc_random_pkey(); 1731*baa489faSSeongJae Park dprintf1("test %d starting with pkey: %d\n", test_nr, pkey); 1732*baa489faSSeongJae Park ptr = malloc_pkey(PAGE_SIZE, prot, pkey); 1733*baa489faSSeongJae Park dprintf1("test %d starting...\n", test_nr); 1734*baa489faSSeongJae Park pkey_tests[test_nr](ptr, pkey); 1735*baa489faSSeongJae Park dprintf1("freeing test memory: %p\n", ptr); 1736*baa489faSSeongJae Park free_pkey_malloc(ptr); 1737*baa489faSSeongJae Park sys_pkey_free(pkey); 1738*baa489faSSeongJae Park 1739*baa489faSSeongJae Park dprintf1("pkey_faults: %d\n", pkey_faults); 1740*baa489faSSeongJae Park dprintf1("orig_pkey_faults: %d\n", orig_pkey_faults); 1741*baa489faSSeongJae Park 1742*baa489faSSeongJae Park tracing_off(); 1743*baa489faSSeongJae Park close_test_fds(); 1744*baa489faSSeongJae Park 1745*baa489faSSeongJae Park printf("test %2d PASSED (iteration %d)\n", test_nr, iteration_nr); 1746*baa489faSSeongJae Park dprintf1("======================\n\n"); 1747*baa489faSSeongJae Park } 1748*baa489faSSeongJae Park iteration_nr++; 1749*baa489faSSeongJae Park } 1750*baa489faSSeongJae Park 1751*baa489faSSeongJae Park void pkey_setup_shadow(void) 1752*baa489faSSeongJae Park { 1753*baa489faSSeongJae Park shadow_pkey_reg = __read_pkey_reg(); 1754*baa489faSSeongJae Park } 1755*baa489faSSeongJae Park 1756*baa489faSSeongJae Park int main(void) 1757*baa489faSSeongJae Park { 1758*baa489faSSeongJae Park int nr_iterations = 22; 1759*baa489faSSeongJae Park int pkeys_supported = is_pkeys_supported(); 1760*baa489faSSeongJae Park 1761*baa489faSSeongJae Park srand((unsigned int)time(NULL)); 1762*baa489faSSeongJae Park 1763*baa489faSSeongJae Park setup_handlers(); 1764*baa489faSSeongJae Park 1765*baa489faSSeongJae Park printf("has pkeys: %d\n", pkeys_supported); 1766*baa489faSSeongJae Park 1767*baa489faSSeongJae Park if (!pkeys_supported) { 1768*baa489faSSeongJae Park int size = PAGE_SIZE; 1769*baa489faSSeongJae Park int *ptr; 1770*baa489faSSeongJae Park 1771*baa489faSSeongJae Park printf("running PKEY tests for unsupported CPU/OS\n"); 1772*baa489faSSeongJae Park 1773*baa489faSSeongJae Park ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); 1774*baa489faSSeongJae Park assert(ptr != (void *)-1); 1775*baa489faSSeongJae Park test_mprotect_pkey_on_unsupported_cpu(ptr, 1); 1776*baa489faSSeongJae Park exit(0); 1777*baa489faSSeongJae Park } 1778*baa489faSSeongJae Park 1779*baa489faSSeongJae Park pkey_setup_shadow(); 1780*baa489faSSeongJae Park printf("startup pkey_reg: %016llx\n", read_pkey_reg()); 1781*baa489faSSeongJae Park setup_hugetlbfs(); 1782*baa489faSSeongJae Park 1783*baa489faSSeongJae Park while (nr_iterations-- > 0) 1784*baa489faSSeongJae Park run_tests_once(); 1785*baa489faSSeongJae Park 1786*baa489faSSeongJae Park printf("done (all tests OK)\n"); 1787*baa489faSSeongJae Park return 0; 1788*baa489faSSeongJae Park } 1789