1baa489faSSeongJae Park // SPDX-License-Identifier: GPL-2.0 2baa489faSSeongJae Park /* 3baa489faSSeongJae Park * Tests Memory Protection Keys (see Documentation/core-api/protection-keys.rst) 4baa489faSSeongJae Park * 5baa489faSSeongJae Park * There are examples in here of: 6baa489faSSeongJae Park * * how to set protection keys on memory 7baa489faSSeongJae Park * * how to set/clear bits in pkey registers (the rights register) 8baa489faSSeongJae Park * * how to handle SEGV_PKUERR signals and extract pkey-relevant 9baa489faSSeongJae Park * information from the siginfo 10baa489faSSeongJae Park * 11baa489faSSeongJae Park * Things to add: 12baa489faSSeongJae Park * make sure KSM and KSM COW breaking works 13baa489faSSeongJae Park * prefault pages in at malloc, or not 14baa489faSSeongJae Park * protect MPX bounds tables with protection keys? 15baa489faSSeongJae Park * make sure VMA splitting/merging is working correctly 16baa489faSSeongJae Park * OOMs can destroy mm->mmap (see exit_mmap()), so make sure it is immune to pkeys 17baa489faSSeongJae Park * look for pkey "leaks" where it is still set on a VMA but "freed" back to the kernel 18baa489faSSeongJae Park * do a plain mprotect() to a mprotect_pkey() area and make sure the pkey sticks 19baa489faSSeongJae Park * 20baa489faSSeongJae Park * Compile like this: 21baa489faSSeongJae Park * gcc -mxsave -o protection_keys -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm 22baa489faSSeongJae Park * gcc -mxsave -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm 23baa489faSSeongJae Park */ 24baa489faSSeongJae Park #define _GNU_SOURCE 25baa489faSSeongJae Park #define __SANE_USERSPACE_TYPES__ 26baa489faSSeongJae Park #include <errno.h> 27baa489faSSeongJae Park #include <linux/elf.h> 28baa489faSSeongJae Park #include <linux/futex.h> 29baa489faSSeongJae Park #include <time.h> 30baa489faSSeongJae Park #include <sys/time.h> 31baa489faSSeongJae Park #include <sys/syscall.h> 32baa489faSSeongJae Park #include <string.h> 33baa489faSSeongJae Park #include <stdio.h> 34baa489faSSeongJae Park #include <stdint.h> 35baa489faSSeongJae Park #include <stdbool.h> 36baa489faSSeongJae Park #include <signal.h> 37baa489faSSeongJae Park #include <assert.h> 38baa489faSSeongJae Park #include <stdlib.h> 39baa489faSSeongJae Park #include <ucontext.h> 40baa489faSSeongJae Park #include <sys/mman.h> 41baa489faSSeongJae Park #include <sys/types.h> 42baa489faSSeongJae Park #include <sys/wait.h> 43baa489faSSeongJae Park #include <sys/stat.h> 44baa489faSSeongJae Park #include <fcntl.h> 45baa489faSSeongJae Park #include <unistd.h> 46baa489faSSeongJae Park #include <sys/ptrace.h> 47baa489faSSeongJae Park #include <setjmp.h> 48baa489faSSeongJae Park 49baa489faSSeongJae Park #include "pkey-helpers.h" 50baa489faSSeongJae Park 51baa489faSSeongJae Park int iteration_nr = 1; 52baa489faSSeongJae Park int test_nr; 53baa489faSSeongJae Park 54baa489faSSeongJae Park u64 shadow_pkey_reg; 55baa489faSSeongJae Park int dprint_in_signal; 56baa489faSSeongJae Park char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE]; 57baa489faSSeongJae Park 58baa489faSSeongJae Park void cat_into_file(char *str, char *file) 59baa489faSSeongJae Park { 60baa489faSSeongJae Park int fd = open(file, O_RDWR); 61baa489faSSeongJae Park int ret; 62baa489faSSeongJae Park 63baa489faSSeongJae Park dprintf2("%s(): writing '%s' to '%s'\n", __func__, str, file); 64baa489faSSeongJae Park /* 65baa489faSSeongJae Park * these need to be raw because they are called under 66baa489faSSeongJae Park * pkey_assert() 67baa489faSSeongJae Park */ 68baa489faSSeongJae Park if (fd < 0) { 69baa489faSSeongJae Park fprintf(stderr, "error opening '%s'\n", str); 70baa489faSSeongJae Park perror("error: "); 71baa489faSSeongJae Park exit(__LINE__); 72baa489faSSeongJae Park } 73baa489faSSeongJae Park 74baa489faSSeongJae Park ret = write(fd, str, strlen(str)); 75baa489faSSeongJae Park if (ret != strlen(str)) { 76baa489faSSeongJae Park perror("write to file failed"); 77baa489faSSeongJae Park fprintf(stderr, "filename: '%s' str: '%s'\n", file, str); 78baa489faSSeongJae Park exit(__LINE__); 79baa489faSSeongJae Park } 80baa489faSSeongJae Park close(fd); 81baa489faSSeongJae Park } 82baa489faSSeongJae Park 83baa489faSSeongJae Park #if CONTROL_TRACING > 0 84baa489faSSeongJae Park static int warned_tracing; 85baa489faSSeongJae Park int tracing_root_ok(void) 86baa489faSSeongJae Park { 87baa489faSSeongJae Park if (geteuid() != 0) { 88baa489faSSeongJae Park if (!warned_tracing) 89baa489faSSeongJae Park fprintf(stderr, "WARNING: not run as root, " 90baa489faSSeongJae Park "can not do tracing control\n"); 91baa489faSSeongJae Park warned_tracing = 1; 92baa489faSSeongJae Park return 0; 93baa489faSSeongJae Park } 94baa489faSSeongJae Park return 1; 95baa489faSSeongJae Park } 96baa489faSSeongJae Park #endif 97baa489faSSeongJae Park 98baa489faSSeongJae Park void tracing_on(void) 99baa489faSSeongJae Park { 100baa489faSSeongJae Park #if CONTROL_TRACING > 0 101*4336cc15SRoss Zwisler #define TRACEDIR "/sys/kernel/tracing" 102baa489faSSeongJae Park char pidstr[32]; 103baa489faSSeongJae Park 104baa489faSSeongJae Park if (!tracing_root_ok()) 105baa489faSSeongJae Park return; 106baa489faSSeongJae Park 107baa489faSSeongJae Park sprintf(pidstr, "%d", getpid()); 108baa489faSSeongJae Park cat_into_file("0", TRACEDIR "/tracing_on"); 109baa489faSSeongJae Park cat_into_file("\n", TRACEDIR "/trace"); 110baa489faSSeongJae Park if (1) { 111baa489faSSeongJae Park cat_into_file("function_graph", TRACEDIR "/current_tracer"); 112baa489faSSeongJae Park cat_into_file("1", TRACEDIR "/options/funcgraph-proc"); 113baa489faSSeongJae Park } else { 114baa489faSSeongJae Park cat_into_file("nop", TRACEDIR "/current_tracer"); 115baa489faSSeongJae Park } 116baa489faSSeongJae Park cat_into_file(pidstr, TRACEDIR "/set_ftrace_pid"); 117baa489faSSeongJae Park cat_into_file("1", TRACEDIR "/tracing_on"); 118baa489faSSeongJae Park dprintf1("enabled tracing\n"); 119baa489faSSeongJae Park #endif 120baa489faSSeongJae Park } 121baa489faSSeongJae Park 122baa489faSSeongJae Park void tracing_off(void) 123baa489faSSeongJae Park { 124baa489faSSeongJae Park #if CONTROL_TRACING > 0 125baa489faSSeongJae Park if (!tracing_root_ok()) 126baa489faSSeongJae Park return; 127*4336cc15SRoss Zwisler cat_into_file("0", "/sys/kernel/tracing/tracing_on"); 128baa489faSSeongJae Park #endif 129baa489faSSeongJae Park } 130baa489faSSeongJae Park 131baa489faSSeongJae Park void abort_hooks(void) 132baa489faSSeongJae Park { 133baa489faSSeongJae Park fprintf(stderr, "running %s()...\n", __func__); 134baa489faSSeongJae Park tracing_off(); 135baa489faSSeongJae Park #ifdef SLEEP_ON_ABORT 136baa489faSSeongJae Park sleep(SLEEP_ON_ABORT); 137baa489faSSeongJae Park #endif 138baa489faSSeongJae Park } 139baa489faSSeongJae Park 140baa489faSSeongJae Park /* 141baa489faSSeongJae Park * This attempts to have roughly a page of instructions followed by a few 142baa489faSSeongJae Park * instructions that do a write, and another page of instructions. That 143baa489faSSeongJae Park * way, we are pretty sure that the write is in the second page of 144baa489faSSeongJae Park * instructions and has at least a page of padding behind it. 145baa489faSSeongJae Park * 146baa489faSSeongJae Park * *That* lets us be sure to madvise() away the write instruction, which 147baa489faSSeongJae Park * will then fault, which makes sure that the fault code handles 148baa489faSSeongJae Park * execute-only memory properly. 149baa489faSSeongJae Park */ 150baa489faSSeongJae Park #ifdef __powerpc64__ 151baa489faSSeongJae Park /* This way, both 4K and 64K alignment are maintained */ 152baa489faSSeongJae Park __attribute__((__aligned__(65536))) 153baa489faSSeongJae Park #else 154baa489faSSeongJae Park __attribute__((__aligned__(PAGE_SIZE))) 155baa489faSSeongJae Park #endif 156baa489faSSeongJae Park void lots_o_noops_around_write(int *write_to_me) 157baa489faSSeongJae Park { 158baa489faSSeongJae Park dprintf3("running %s()\n", __func__); 159baa489faSSeongJae Park __page_o_noops(); 160baa489faSSeongJae Park /* Assume this happens in the second page of instructions: */ 161baa489faSSeongJae Park *write_to_me = __LINE__; 162baa489faSSeongJae Park /* pad out by another page: */ 163baa489faSSeongJae Park __page_o_noops(); 164baa489faSSeongJae Park dprintf3("%s() done\n", __func__); 165baa489faSSeongJae Park } 166baa489faSSeongJae Park 167baa489faSSeongJae Park void dump_mem(void *dumpme, int len_bytes) 168baa489faSSeongJae Park { 169baa489faSSeongJae Park char *c = (void *)dumpme; 170baa489faSSeongJae Park int i; 171baa489faSSeongJae Park 172baa489faSSeongJae Park for (i = 0; i < len_bytes; i += sizeof(u64)) { 173baa489faSSeongJae Park u64 *ptr = (u64 *)(c + i); 174baa489faSSeongJae Park dprintf1("dump[%03d][@%p]: %016llx\n", i, ptr, *ptr); 175baa489faSSeongJae Park } 176baa489faSSeongJae Park } 177baa489faSSeongJae Park 178baa489faSSeongJae Park static u32 hw_pkey_get(int pkey, unsigned long flags) 179baa489faSSeongJae Park { 180baa489faSSeongJae Park u64 pkey_reg = __read_pkey_reg(); 181baa489faSSeongJae Park 182baa489faSSeongJae Park dprintf1("%s(pkey=%d, flags=%lx) = %x / %d\n", 183baa489faSSeongJae Park __func__, pkey, flags, 0, 0); 184baa489faSSeongJae Park dprintf2("%s() raw pkey_reg: %016llx\n", __func__, pkey_reg); 185baa489faSSeongJae Park 186baa489faSSeongJae Park return (u32) get_pkey_bits(pkey_reg, pkey); 187baa489faSSeongJae Park } 188baa489faSSeongJae Park 189baa489faSSeongJae Park static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags) 190baa489faSSeongJae Park { 191baa489faSSeongJae Park u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); 192baa489faSSeongJae Park u64 old_pkey_reg = __read_pkey_reg(); 193baa489faSSeongJae Park u64 new_pkey_reg; 194baa489faSSeongJae Park 195baa489faSSeongJae Park /* make sure that 'rights' only contains the bits we expect: */ 196baa489faSSeongJae Park assert(!(rights & ~mask)); 197baa489faSSeongJae Park 198baa489faSSeongJae Park /* modify bits accordingly in old pkey_reg and assign it */ 199baa489faSSeongJae Park new_pkey_reg = set_pkey_bits(old_pkey_reg, pkey, rights); 200baa489faSSeongJae Park 201baa489faSSeongJae Park __write_pkey_reg(new_pkey_reg); 202baa489faSSeongJae Park 203baa489faSSeongJae Park dprintf3("%s(pkey=%d, rights=%lx, flags=%lx) = %x" 204baa489faSSeongJae Park " pkey_reg now: %016llx old_pkey_reg: %016llx\n", 205baa489faSSeongJae Park __func__, pkey, rights, flags, 0, __read_pkey_reg(), 206baa489faSSeongJae Park old_pkey_reg); 207baa489faSSeongJae Park return 0; 208baa489faSSeongJae Park } 209baa489faSSeongJae Park 210baa489faSSeongJae Park void pkey_disable_set(int pkey, int flags) 211baa489faSSeongJae Park { 212baa489faSSeongJae Park unsigned long syscall_flags = 0; 213baa489faSSeongJae Park int ret; 214baa489faSSeongJae Park int pkey_rights; 215baa489faSSeongJae Park u64 orig_pkey_reg = read_pkey_reg(); 216baa489faSSeongJae Park 217baa489faSSeongJae Park dprintf1("START->%s(%d, 0x%x)\n", __func__, 218baa489faSSeongJae Park pkey, flags); 219baa489faSSeongJae Park pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); 220baa489faSSeongJae Park 221baa489faSSeongJae Park pkey_rights = hw_pkey_get(pkey, syscall_flags); 222baa489faSSeongJae Park 223baa489faSSeongJae Park dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, 224baa489faSSeongJae Park pkey, pkey, pkey_rights); 225baa489faSSeongJae Park 226baa489faSSeongJae Park pkey_assert(pkey_rights >= 0); 227baa489faSSeongJae Park 228baa489faSSeongJae Park pkey_rights |= flags; 229baa489faSSeongJae Park 230baa489faSSeongJae Park ret = hw_pkey_set(pkey, pkey_rights, syscall_flags); 231baa489faSSeongJae Park assert(!ret); 232baa489faSSeongJae Park /* pkey_reg and flags have the same format */ 233baa489faSSeongJae Park shadow_pkey_reg = set_pkey_bits(shadow_pkey_reg, pkey, pkey_rights); 234baa489faSSeongJae Park dprintf1("%s(%d) shadow: 0x%016llx\n", 235baa489faSSeongJae Park __func__, pkey, shadow_pkey_reg); 236baa489faSSeongJae Park 237baa489faSSeongJae Park pkey_assert(ret >= 0); 238baa489faSSeongJae Park 239baa489faSSeongJae Park pkey_rights = hw_pkey_get(pkey, syscall_flags); 240baa489faSSeongJae Park dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, 241baa489faSSeongJae Park pkey, pkey, pkey_rights); 242baa489faSSeongJae Park 243baa489faSSeongJae Park dprintf1("%s(%d) pkey_reg: 0x%016llx\n", 244baa489faSSeongJae Park __func__, pkey, read_pkey_reg()); 245baa489faSSeongJae Park if (flags) 246baa489faSSeongJae Park pkey_assert(read_pkey_reg() >= orig_pkey_reg); 247baa489faSSeongJae Park dprintf1("END<---%s(%d, 0x%x)\n", __func__, 248baa489faSSeongJae Park pkey, flags); 249baa489faSSeongJae Park } 250baa489faSSeongJae Park 251baa489faSSeongJae Park void pkey_disable_clear(int pkey, int flags) 252baa489faSSeongJae Park { 253baa489faSSeongJae Park unsigned long syscall_flags = 0; 254baa489faSSeongJae Park int ret; 255baa489faSSeongJae Park int pkey_rights = hw_pkey_get(pkey, syscall_flags); 256baa489faSSeongJae Park u64 orig_pkey_reg = read_pkey_reg(); 257baa489faSSeongJae Park 258baa489faSSeongJae Park pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); 259baa489faSSeongJae Park 260baa489faSSeongJae Park dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, 261baa489faSSeongJae Park pkey, pkey, pkey_rights); 262baa489faSSeongJae Park pkey_assert(pkey_rights >= 0); 263baa489faSSeongJae Park 264baa489faSSeongJae Park pkey_rights &= ~flags; 265baa489faSSeongJae Park 266baa489faSSeongJae Park ret = hw_pkey_set(pkey, pkey_rights, 0); 267baa489faSSeongJae Park shadow_pkey_reg = set_pkey_bits(shadow_pkey_reg, pkey, pkey_rights); 268baa489faSSeongJae Park pkey_assert(ret >= 0); 269baa489faSSeongJae Park 270baa489faSSeongJae Park pkey_rights = hw_pkey_get(pkey, syscall_flags); 271baa489faSSeongJae Park dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, 272baa489faSSeongJae Park pkey, pkey, pkey_rights); 273baa489faSSeongJae Park 274baa489faSSeongJae Park dprintf1("%s(%d) pkey_reg: 0x%016llx\n", __func__, 275baa489faSSeongJae Park pkey, read_pkey_reg()); 276baa489faSSeongJae Park if (flags) 277baa489faSSeongJae Park assert(read_pkey_reg() <= orig_pkey_reg); 278baa489faSSeongJae Park } 279baa489faSSeongJae Park 280baa489faSSeongJae Park void pkey_write_allow(int pkey) 281baa489faSSeongJae Park { 282baa489faSSeongJae Park pkey_disable_clear(pkey, PKEY_DISABLE_WRITE); 283baa489faSSeongJae Park } 284baa489faSSeongJae Park void pkey_write_deny(int pkey) 285baa489faSSeongJae Park { 286baa489faSSeongJae Park pkey_disable_set(pkey, PKEY_DISABLE_WRITE); 287baa489faSSeongJae Park } 288baa489faSSeongJae Park void pkey_access_allow(int pkey) 289baa489faSSeongJae Park { 290baa489faSSeongJae Park pkey_disable_clear(pkey, PKEY_DISABLE_ACCESS); 291baa489faSSeongJae Park } 292baa489faSSeongJae Park void pkey_access_deny(int pkey) 293baa489faSSeongJae Park { 294baa489faSSeongJae Park pkey_disable_set(pkey, PKEY_DISABLE_ACCESS); 295baa489faSSeongJae Park } 296baa489faSSeongJae Park 297baa489faSSeongJae Park /* Failed address bound checks: */ 298baa489faSSeongJae Park #ifndef SEGV_BNDERR 299baa489faSSeongJae Park # define SEGV_BNDERR 3 300baa489faSSeongJae Park #endif 301baa489faSSeongJae Park 302baa489faSSeongJae Park #ifndef SEGV_PKUERR 303baa489faSSeongJae Park # define SEGV_PKUERR 4 304baa489faSSeongJae Park #endif 305baa489faSSeongJae Park 306baa489faSSeongJae Park static char *si_code_str(int si_code) 307baa489faSSeongJae Park { 308baa489faSSeongJae Park if (si_code == SEGV_MAPERR) 309baa489faSSeongJae Park return "SEGV_MAPERR"; 310baa489faSSeongJae Park if (si_code == SEGV_ACCERR) 311baa489faSSeongJae Park return "SEGV_ACCERR"; 312baa489faSSeongJae Park if (si_code == SEGV_BNDERR) 313baa489faSSeongJae Park return "SEGV_BNDERR"; 314baa489faSSeongJae Park if (si_code == SEGV_PKUERR) 315baa489faSSeongJae Park return "SEGV_PKUERR"; 316baa489faSSeongJae Park return "UNKNOWN"; 317baa489faSSeongJae Park } 318baa489faSSeongJae Park 319baa489faSSeongJae Park int pkey_faults; 320baa489faSSeongJae Park int last_si_pkey = -1; 321baa489faSSeongJae Park void signal_handler(int signum, siginfo_t *si, void *vucontext) 322baa489faSSeongJae Park { 323baa489faSSeongJae Park ucontext_t *uctxt = vucontext; 324baa489faSSeongJae Park int trapno; 325baa489faSSeongJae Park unsigned long ip; 326baa489faSSeongJae Park char *fpregs; 327baa489faSSeongJae Park #if defined(__i386__) || defined(__x86_64__) /* arch */ 328baa489faSSeongJae Park u32 *pkey_reg_ptr; 329baa489faSSeongJae Park int pkey_reg_offset; 330baa489faSSeongJae Park #endif /* arch */ 331baa489faSSeongJae Park u64 siginfo_pkey; 332baa489faSSeongJae Park u32 *si_pkey_ptr; 333baa489faSSeongJae Park 334baa489faSSeongJae Park dprint_in_signal = 1; 335baa489faSSeongJae Park dprintf1(">>>>===============SIGSEGV============================\n"); 336baa489faSSeongJae Park dprintf1("%s()::%d, pkey_reg: 0x%016llx shadow: %016llx\n", 337baa489faSSeongJae Park __func__, __LINE__, 338baa489faSSeongJae Park __read_pkey_reg(), shadow_pkey_reg); 339baa489faSSeongJae Park 340baa489faSSeongJae Park trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO]; 341baa489faSSeongJae Park ip = uctxt->uc_mcontext.gregs[REG_IP_IDX]; 342baa489faSSeongJae Park fpregs = (char *) uctxt->uc_mcontext.fpregs; 343baa489faSSeongJae Park 344baa489faSSeongJae Park dprintf2("%s() trapno: %d ip: 0x%016lx info->si_code: %s/%d\n", 345baa489faSSeongJae Park __func__, trapno, ip, si_code_str(si->si_code), 346baa489faSSeongJae Park si->si_code); 347baa489faSSeongJae Park 348baa489faSSeongJae Park #if defined(__i386__) || defined(__x86_64__) /* arch */ 349baa489faSSeongJae Park #ifdef __i386__ 350baa489faSSeongJae Park /* 351baa489faSSeongJae Park * 32-bit has some extra padding so that userspace can tell whether 352baa489faSSeongJae Park * the XSTATE header is present in addition to the "legacy" FPU 353baa489faSSeongJae Park * state. We just assume that it is here. 354baa489faSSeongJae Park */ 355baa489faSSeongJae Park fpregs += 0x70; 356baa489faSSeongJae Park #endif /* i386 */ 357baa489faSSeongJae Park pkey_reg_offset = pkey_reg_xstate_offset(); 358baa489faSSeongJae Park pkey_reg_ptr = (void *)(&fpregs[pkey_reg_offset]); 359baa489faSSeongJae Park 360baa489faSSeongJae Park /* 361baa489faSSeongJae Park * If we got a PKEY fault, we *HAVE* to have at least one bit set in 362baa489faSSeongJae Park * here. 363baa489faSSeongJae Park */ 364baa489faSSeongJae Park dprintf1("pkey_reg_xstate_offset: %d\n", pkey_reg_xstate_offset()); 365baa489faSSeongJae Park if (DEBUG_LEVEL > 4) 366baa489faSSeongJae Park dump_mem(pkey_reg_ptr - 128, 256); 367baa489faSSeongJae Park pkey_assert(*pkey_reg_ptr); 368baa489faSSeongJae Park #endif /* arch */ 369baa489faSSeongJae Park 370baa489faSSeongJae Park dprintf1("siginfo: %p\n", si); 371baa489faSSeongJae Park dprintf1(" fpregs: %p\n", fpregs); 372baa489faSSeongJae Park 373baa489faSSeongJae Park if ((si->si_code == SEGV_MAPERR) || 374baa489faSSeongJae Park (si->si_code == SEGV_ACCERR) || 375baa489faSSeongJae Park (si->si_code == SEGV_BNDERR)) { 376baa489faSSeongJae Park printf("non-PK si_code, exiting...\n"); 377baa489faSSeongJae Park exit(4); 378baa489faSSeongJae Park } 379baa489faSSeongJae Park 380baa489faSSeongJae Park si_pkey_ptr = siginfo_get_pkey_ptr(si); 381baa489faSSeongJae Park dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr); 382baa489faSSeongJae Park dump_mem((u8 *)si_pkey_ptr - 8, 24); 383baa489faSSeongJae Park siginfo_pkey = *si_pkey_ptr; 384baa489faSSeongJae Park pkey_assert(siginfo_pkey < NR_PKEYS); 385baa489faSSeongJae Park last_si_pkey = siginfo_pkey; 386baa489faSSeongJae Park 387baa489faSSeongJae Park /* 388baa489faSSeongJae Park * need __read_pkey_reg() version so we do not do shadow_pkey_reg 389baa489faSSeongJae Park * checking 390baa489faSSeongJae Park */ 391baa489faSSeongJae Park dprintf1("signal pkey_reg from pkey_reg: %016llx\n", 392baa489faSSeongJae Park __read_pkey_reg()); 393baa489faSSeongJae Park dprintf1("pkey from siginfo: %016llx\n", siginfo_pkey); 394baa489faSSeongJae Park #if defined(__i386__) || defined(__x86_64__) /* arch */ 395baa489faSSeongJae Park dprintf1("signal pkey_reg from xsave: %08x\n", *pkey_reg_ptr); 396baa489faSSeongJae Park *(u64 *)pkey_reg_ptr = 0x00000000; 397baa489faSSeongJae Park dprintf1("WARNING: set PKEY_REG=0 to allow faulting instruction to continue\n"); 398baa489faSSeongJae Park #elif defined(__powerpc64__) /* arch */ 399baa489faSSeongJae Park /* restore access and let the faulting instruction continue */ 400baa489faSSeongJae Park pkey_access_allow(siginfo_pkey); 401baa489faSSeongJae Park #endif /* arch */ 402baa489faSSeongJae Park pkey_faults++; 403baa489faSSeongJae Park dprintf1("<<<<==================================================\n"); 404baa489faSSeongJae Park dprint_in_signal = 0; 405baa489faSSeongJae Park } 406baa489faSSeongJae Park 407baa489faSSeongJae Park int wait_all_children(void) 408baa489faSSeongJae Park { 409baa489faSSeongJae Park int status; 410baa489faSSeongJae Park return waitpid(-1, &status, 0); 411baa489faSSeongJae Park } 412baa489faSSeongJae Park 413baa489faSSeongJae Park void sig_chld(int x) 414baa489faSSeongJae Park { 415baa489faSSeongJae Park dprint_in_signal = 1; 416baa489faSSeongJae Park dprintf2("[%d] SIGCHLD: %d\n", getpid(), x); 417baa489faSSeongJae Park dprint_in_signal = 0; 418baa489faSSeongJae Park } 419baa489faSSeongJae Park 420baa489faSSeongJae Park void setup_sigsegv_handler(void) 421baa489faSSeongJae Park { 422baa489faSSeongJae Park int r, rs; 423baa489faSSeongJae Park struct sigaction newact; 424baa489faSSeongJae Park struct sigaction oldact; 425baa489faSSeongJae Park 426baa489faSSeongJae Park /* #PF is mapped to sigsegv */ 427baa489faSSeongJae Park int signum = SIGSEGV; 428baa489faSSeongJae Park 429baa489faSSeongJae Park newact.sa_handler = 0; 430baa489faSSeongJae Park newact.sa_sigaction = signal_handler; 431baa489faSSeongJae Park 432baa489faSSeongJae Park /*sigset_t - signals to block while in the handler */ 433baa489faSSeongJae Park /* get the old signal mask. */ 434baa489faSSeongJae Park rs = sigprocmask(SIG_SETMASK, 0, &newact.sa_mask); 435baa489faSSeongJae Park pkey_assert(rs == 0); 436baa489faSSeongJae Park 437baa489faSSeongJae Park /* call sa_sigaction, not sa_handler*/ 438baa489faSSeongJae Park newact.sa_flags = SA_SIGINFO; 439baa489faSSeongJae Park 440baa489faSSeongJae Park newact.sa_restorer = 0; /* void(*)(), obsolete */ 441baa489faSSeongJae Park r = sigaction(signum, &newact, &oldact); 442baa489faSSeongJae Park r = sigaction(SIGALRM, &newact, &oldact); 443baa489faSSeongJae Park pkey_assert(r == 0); 444baa489faSSeongJae Park } 445baa489faSSeongJae Park 446baa489faSSeongJae Park void setup_handlers(void) 447baa489faSSeongJae Park { 448baa489faSSeongJae Park signal(SIGCHLD, &sig_chld); 449baa489faSSeongJae Park setup_sigsegv_handler(); 450baa489faSSeongJae Park } 451baa489faSSeongJae Park 452baa489faSSeongJae Park pid_t fork_lazy_child(void) 453baa489faSSeongJae Park { 454baa489faSSeongJae Park pid_t forkret; 455baa489faSSeongJae Park 456baa489faSSeongJae Park forkret = fork(); 457baa489faSSeongJae Park pkey_assert(forkret >= 0); 458baa489faSSeongJae Park dprintf3("[%d] fork() ret: %d\n", getpid(), forkret); 459baa489faSSeongJae Park 460baa489faSSeongJae Park if (!forkret) { 461baa489faSSeongJae Park /* in the child */ 462baa489faSSeongJae Park while (1) { 463baa489faSSeongJae Park dprintf1("child sleeping...\n"); 464baa489faSSeongJae Park sleep(30); 465baa489faSSeongJae Park } 466baa489faSSeongJae Park } 467baa489faSSeongJae Park return forkret; 468baa489faSSeongJae Park } 469baa489faSSeongJae Park 470baa489faSSeongJae Park int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, 471baa489faSSeongJae Park unsigned long pkey) 472baa489faSSeongJae Park { 473baa489faSSeongJae Park int sret; 474baa489faSSeongJae Park 475baa489faSSeongJae Park dprintf2("%s(0x%p, %zx, prot=%lx, pkey=%lx)\n", __func__, 476baa489faSSeongJae Park ptr, size, orig_prot, pkey); 477baa489faSSeongJae Park 478baa489faSSeongJae Park errno = 0; 479baa489faSSeongJae Park sret = syscall(SYS_mprotect_key, ptr, size, orig_prot, pkey); 480baa489faSSeongJae Park if (errno) { 481baa489faSSeongJae Park dprintf2("SYS_mprotect_key sret: %d\n", sret); 482baa489faSSeongJae Park dprintf2("SYS_mprotect_key prot: 0x%lx\n", orig_prot); 483baa489faSSeongJae Park dprintf2("SYS_mprotect_key failed, errno: %d\n", errno); 484baa489faSSeongJae Park if (DEBUG_LEVEL >= 2) 485baa489faSSeongJae Park perror("SYS_mprotect_pkey"); 486baa489faSSeongJae Park } 487baa489faSSeongJae Park return sret; 488baa489faSSeongJae Park } 489baa489faSSeongJae Park 490baa489faSSeongJae Park int sys_pkey_alloc(unsigned long flags, unsigned long init_val) 491baa489faSSeongJae Park { 492baa489faSSeongJae Park int ret = syscall(SYS_pkey_alloc, flags, init_val); 493baa489faSSeongJae Park dprintf1("%s(flags=%lx, init_val=%lx) syscall ret: %d errno: %d\n", 494baa489faSSeongJae Park __func__, flags, init_val, ret, errno); 495baa489faSSeongJae Park return ret; 496baa489faSSeongJae Park } 497baa489faSSeongJae Park 498baa489faSSeongJae Park int alloc_pkey(void) 499baa489faSSeongJae Park { 500baa489faSSeongJae Park int ret; 501baa489faSSeongJae Park unsigned long init_val = 0x0; 502baa489faSSeongJae Park 503baa489faSSeongJae Park dprintf1("%s()::%d, pkey_reg: 0x%016llx shadow: %016llx\n", 504baa489faSSeongJae Park __func__, __LINE__, __read_pkey_reg(), shadow_pkey_reg); 505baa489faSSeongJae Park ret = sys_pkey_alloc(0, init_val); 506baa489faSSeongJae Park /* 507baa489faSSeongJae Park * pkey_alloc() sets PKEY register, so we need to reflect it in 508baa489faSSeongJae Park * shadow_pkey_reg: 509baa489faSSeongJae Park */ 510baa489faSSeongJae Park dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016llx" 511baa489faSSeongJae Park " shadow: 0x%016llx\n", 512baa489faSSeongJae Park __func__, __LINE__, ret, __read_pkey_reg(), 513baa489faSSeongJae Park shadow_pkey_reg); 514baa489faSSeongJae Park if (ret > 0) { 515baa489faSSeongJae Park /* clear both the bits: */ 516baa489faSSeongJae Park shadow_pkey_reg = set_pkey_bits(shadow_pkey_reg, ret, 517baa489faSSeongJae Park ~PKEY_MASK); 518baa489faSSeongJae Park dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016llx" 519baa489faSSeongJae Park " shadow: 0x%016llx\n", 520baa489faSSeongJae Park __func__, 521baa489faSSeongJae Park __LINE__, ret, __read_pkey_reg(), 522baa489faSSeongJae Park shadow_pkey_reg); 523baa489faSSeongJae Park /* 524baa489faSSeongJae Park * move the new state in from init_val 525baa489faSSeongJae Park * (remember, we cheated and init_val == pkey_reg format) 526baa489faSSeongJae Park */ 527baa489faSSeongJae Park shadow_pkey_reg = set_pkey_bits(shadow_pkey_reg, ret, 528baa489faSSeongJae Park init_val); 529baa489faSSeongJae Park } 530baa489faSSeongJae Park dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016llx" 531baa489faSSeongJae Park " shadow: 0x%016llx\n", 532baa489faSSeongJae Park __func__, __LINE__, ret, __read_pkey_reg(), 533baa489faSSeongJae Park shadow_pkey_reg); 534baa489faSSeongJae Park dprintf1("%s()::%d errno: %d\n", __func__, __LINE__, errno); 535baa489faSSeongJae Park /* for shadow checking: */ 536baa489faSSeongJae Park read_pkey_reg(); 537baa489faSSeongJae Park dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016llx" 538baa489faSSeongJae Park " shadow: 0x%016llx\n", 539baa489faSSeongJae Park __func__, __LINE__, ret, __read_pkey_reg(), 540baa489faSSeongJae Park shadow_pkey_reg); 541baa489faSSeongJae Park return ret; 542baa489faSSeongJae Park } 543baa489faSSeongJae Park 544baa489faSSeongJae Park int sys_pkey_free(unsigned long pkey) 545baa489faSSeongJae Park { 546baa489faSSeongJae Park int ret = syscall(SYS_pkey_free, pkey); 547baa489faSSeongJae Park dprintf1("%s(pkey=%ld) syscall ret: %d\n", __func__, pkey, ret); 548baa489faSSeongJae Park return ret; 549baa489faSSeongJae Park } 550baa489faSSeongJae Park 551baa489faSSeongJae Park /* 552baa489faSSeongJae Park * I had a bug where pkey bits could be set by mprotect() but 553baa489faSSeongJae Park * not cleared. This ensures we get lots of random bit sets 554baa489faSSeongJae Park * and clears on the vma and pte pkey bits. 555baa489faSSeongJae Park */ 556baa489faSSeongJae Park int alloc_random_pkey(void) 557baa489faSSeongJae Park { 558baa489faSSeongJae Park int max_nr_pkey_allocs; 559baa489faSSeongJae Park int ret; 560baa489faSSeongJae Park int i; 561baa489faSSeongJae Park int alloced_pkeys[NR_PKEYS]; 562baa489faSSeongJae Park int nr_alloced = 0; 563baa489faSSeongJae Park int random_index; 564baa489faSSeongJae Park memset(alloced_pkeys, 0, sizeof(alloced_pkeys)); 565baa489faSSeongJae Park 566baa489faSSeongJae Park /* allocate every possible key and make a note of which ones we got */ 567baa489faSSeongJae Park max_nr_pkey_allocs = NR_PKEYS; 568baa489faSSeongJae Park for (i = 0; i < max_nr_pkey_allocs; i++) { 569baa489faSSeongJae Park int new_pkey = alloc_pkey(); 570baa489faSSeongJae Park if (new_pkey < 0) 571baa489faSSeongJae Park break; 572baa489faSSeongJae Park alloced_pkeys[nr_alloced++] = new_pkey; 573baa489faSSeongJae Park } 574baa489faSSeongJae Park 575baa489faSSeongJae Park pkey_assert(nr_alloced > 0); 576baa489faSSeongJae Park /* select a random one out of the allocated ones */ 577baa489faSSeongJae Park random_index = rand() % nr_alloced; 578baa489faSSeongJae Park ret = alloced_pkeys[random_index]; 579baa489faSSeongJae Park /* now zero it out so we don't free it next */ 580baa489faSSeongJae Park alloced_pkeys[random_index] = 0; 581baa489faSSeongJae Park 582baa489faSSeongJae Park /* go through the allocated ones that we did not want and free them */ 583baa489faSSeongJae Park for (i = 0; i < nr_alloced; i++) { 584baa489faSSeongJae Park int free_ret; 585baa489faSSeongJae Park if (!alloced_pkeys[i]) 586baa489faSSeongJae Park continue; 587baa489faSSeongJae Park free_ret = sys_pkey_free(alloced_pkeys[i]); 588baa489faSSeongJae Park pkey_assert(!free_ret); 589baa489faSSeongJae Park } 590baa489faSSeongJae Park dprintf1("%s()::%d, ret: %d pkey_reg: 0x%016llx" 591baa489faSSeongJae Park " shadow: 0x%016llx\n", __func__, 592baa489faSSeongJae Park __LINE__, ret, __read_pkey_reg(), shadow_pkey_reg); 593baa489faSSeongJae Park return ret; 594baa489faSSeongJae Park } 595baa489faSSeongJae Park 596baa489faSSeongJae Park int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, 597baa489faSSeongJae Park unsigned long pkey) 598baa489faSSeongJae Park { 599baa489faSSeongJae Park int nr_iterations = random() % 100; 600baa489faSSeongJae Park int ret; 601baa489faSSeongJae Park 602baa489faSSeongJae Park while (0) { 603baa489faSSeongJae Park int rpkey = alloc_random_pkey(); 604baa489faSSeongJae Park ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey); 605baa489faSSeongJae Park dprintf1("sys_mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n", 606baa489faSSeongJae Park ptr, size, orig_prot, pkey, ret); 607baa489faSSeongJae Park if (nr_iterations-- < 0) 608baa489faSSeongJae Park break; 609baa489faSSeongJae Park 610baa489faSSeongJae Park dprintf1("%s()::%d, ret: %d pkey_reg: 0x%016llx" 611baa489faSSeongJae Park " shadow: 0x%016llx\n", 612baa489faSSeongJae Park __func__, __LINE__, ret, __read_pkey_reg(), 613baa489faSSeongJae Park shadow_pkey_reg); 614baa489faSSeongJae Park sys_pkey_free(rpkey); 615baa489faSSeongJae Park dprintf1("%s()::%d, ret: %d pkey_reg: 0x%016llx" 616baa489faSSeongJae Park " shadow: 0x%016llx\n", 617baa489faSSeongJae Park __func__, __LINE__, ret, __read_pkey_reg(), 618baa489faSSeongJae Park shadow_pkey_reg); 619baa489faSSeongJae Park } 620baa489faSSeongJae Park pkey_assert(pkey < NR_PKEYS); 621baa489faSSeongJae Park 622baa489faSSeongJae Park ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey); 623baa489faSSeongJae Park dprintf1("mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n", 624baa489faSSeongJae Park ptr, size, orig_prot, pkey, ret); 625baa489faSSeongJae Park pkey_assert(!ret); 626baa489faSSeongJae Park dprintf1("%s()::%d, ret: %d pkey_reg: 0x%016llx" 627baa489faSSeongJae Park " shadow: 0x%016llx\n", __func__, 628baa489faSSeongJae Park __LINE__, ret, __read_pkey_reg(), shadow_pkey_reg); 629baa489faSSeongJae Park return ret; 630baa489faSSeongJae Park } 631baa489faSSeongJae Park 632baa489faSSeongJae Park struct pkey_malloc_record { 633baa489faSSeongJae Park void *ptr; 634baa489faSSeongJae Park long size; 635baa489faSSeongJae Park int prot; 636baa489faSSeongJae Park }; 637baa489faSSeongJae Park struct pkey_malloc_record *pkey_malloc_records; 638baa489faSSeongJae Park struct pkey_malloc_record *pkey_last_malloc_record; 639baa489faSSeongJae Park long nr_pkey_malloc_records; 640baa489faSSeongJae Park void record_pkey_malloc(void *ptr, long size, int prot) 641baa489faSSeongJae Park { 642baa489faSSeongJae Park long i; 643baa489faSSeongJae Park struct pkey_malloc_record *rec = NULL; 644baa489faSSeongJae Park 645baa489faSSeongJae Park for (i = 0; i < nr_pkey_malloc_records; i++) { 646baa489faSSeongJae Park rec = &pkey_malloc_records[i]; 647baa489faSSeongJae Park /* find a free record */ 648baa489faSSeongJae Park if (rec) 649baa489faSSeongJae Park break; 650baa489faSSeongJae Park } 651baa489faSSeongJae Park if (!rec) { 652baa489faSSeongJae Park /* every record is full */ 653baa489faSSeongJae Park size_t old_nr_records = nr_pkey_malloc_records; 654baa489faSSeongJae Park size_t new_nr_records = (nr_pkey_malloc_records * 2 + 1); 655baa489faSSeongJae Park size_t new_size = new_nr_records * sizeof(struct pkey_malloc_record); 656baa489faSSeongJae Park dprintf2("new_nr_records: %zd\n", new_nr_records); 657baa489faSSeongJae Park dprintf2("new_size: %zd\n", new_size); 658baa489faSSeongJae Park pkey_malloc_records = realloc(pkey_malloc_records, new_size); 659baa489faSSeongJae Park pkey_assert(pkey_malloc_records != NULL); 660baa489faSSeongJae Park rec = &pkey_malloc_records[nr_pkey_malloc_records]; 661baa489faSSeongJae Park /* 662baa489faSSeongJae Park * realloc() does not initialize memory, so zero it from 663baa489faSSeongJae Park * the first new record all the way to the end. 664baa489faSSeongJae Park */ 665baa489faSSeongJae Park for (i = 0; i < new_nr_records - old_nr_records; i++) 666baa489faSSeongJae Park memset(rec + i, 0, sizeof(*rec)); 667baa489faSSeongJae Park } 668baa489faSSeongJae Park dprintf3("filling malloc record[%d/%p]: {%p, %ld}\n", 669baa489faSSeongJae Park (int)(rec - pkey_malloc_records), rec, ptr, size); 670baa489faSSeongJae Park rec->ptr = ptr; 671baa489faSSeongJae Park rec->size = size; 672baa489faSSeongJae Park rec->prot = prot; 673baa489faSSeongJae Park pkey_last_malloc_record = rec; 674baa489faSSeongJae Park nr_pkey_malloc_records++; 675baa489faSSeongJae Park } 676baa489faSSeongJae Park 677baa489faSSeongJae Park void free_pkey_malloc(void *ptr) 678baa489faSSeongJae Park { 679baa489faSSeongJae Park long i; 680baa489faSSeongJae Park int ret; 681baa489faSSeongJae Park dprintf3("%s(%p)\n", __func__, ptr); 682baa489faSSeongJae Park for (i = 0; i < nr_pkey_malloc_records; i++) { 683baa489faSSeongJae Park struct pkey_malloc_record *rec = &pkey_malloc_records[i]; 684baa489faSSeongJae Park dprintf4("looking for ptr %p at record[%ld/%p]: {%p, %ld}\n", 685baa489faSSeongJae Park ptr, i, rec, rec->ptr, rec->size); 686baa489faSSeongJae Park if ((ptr < rec->ptr) || 687baa489faSSeongJae Park (ptr >= rec->ptr + rec->size)) 688baa489faSSeongJae Park continue; 689baa489faSSeongJae Park 690baa489faSSeongJae Park dprintf3("found ptr %p at record[%ld/%p]: {%p, %ld}\n", 691baa489faSSeongJae Park ptr, i, rec, rec->ptr, rec->size); 692baa489faSSeongJae Park nr_pkey_malloc_records--; 693baa489faSSeongJae Park ret = munmap(rec->ptr, rec->size); 694baa489faSSeongJae Park dprintf3("munmap ret: %d\n", ret); 695baa489faSSeongJae Park pkey_assert(!ret); 696baa489faSSeongJae Park dprintf3("clearing rec->ptr, rec: %p\n", rec); 697baa489faSSeongJae Park rec->ptr = NULL; 698baa489faSSeongJae Park dprintf3("done clearing rec->ptr, rec: %p\n", rec); 699baa489faSSeongJae Park return; 700baa489faSSeongJae Park } 701baa489faSSeongJae Park pkey_assert(false); 702baa489faSSeongJae Park } 703baa489faSSeongJae Park 704baa489faSSeongJae Park 705baa489faSSeongJae Park void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey) 706baa489faSSeongJae Park { 707baa489faSSeongJae Park void *ptr; 708baa489faSSeongJae Park int ret; 709baa489faSSeongJae Park 710baa489faSSeongJae Park read_pkey_reg(); 711baa489faSSeongJae Park dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__, 712baa489faSSeongJae Park size, prot, pkey); 713baa489faSSeongJae Park pkey_assert(pkey < NR_PKEYS); 714baa489faSSeongJae Park ptr = mmap(NULL, size, prot, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); 715baa489faSSeongJae Park pkey_assert(ptr != (void *)-1); 716baa489faSSeongJae Park ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey); 717baa489faSSeongJae Park pkey_assert(!ret); 718baa489faSSeongJae Park record_pkey_malloc(ptr, size, prot); 719baa489faSSeongJae Park read_pkey_reg(); 720baa489faSSeongJae Park 721baa489faSSeongJae Park dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr); 722baa489faSSeongJae Park return ptr; 723baa489faSSeongJae Park } 724baa489faSSeongJae Park 725baa489faSSeongJae Park void *malloc_pkey_anon_huge(long size, int prot, u16 pkey) 726baa489faSSeongJae Park { 727baa489faSSeongJae Park int ret; 728baa489faSSeongJae Park void *ptr; 729baa489faSSeongJae Park 730baa489faSSeongJae Park dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__, 731baa489faSSeongJae Park size, prot, pkey); 732baa489faSSeongJae Park /* 733baa489faSSeongJae Park * Guarantee we can fit at least one huge page in the resulting 734baa489faSSeongJae Park * allocation by allocating space for 2: 735baa489faSSeongJae Park */ 736baa489faSSeongJae Park size = ALIGN_UP(size, HPAGE_SIZE * 2); 737baa489faSSeongJae Park ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); 738baa489faSSeongJae Park pkey_assert(ptr != (void *)-1); 739baa489faSSeongJae Park record_pkey_malloc(ptr, size, prot); 740baa489faSSeongJae Park mprotect_pkey(ptr, size, prot, pkey); 741baa489faSSeongJae Park 742baa489faSSeongJae Park dprintf1("unaligned ptr: %p\n", ptr); 743baa489faSSeongJae Park ptr = ALIGN_PTR_UP(ptr, HPAGE_SIZE); 744baa489faSSeongJae Park dprintf1(" aligned ptr: %p\n", ptr); 745baa489faSSeongJae Park ret = madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE); 746baa489faSSeongJae Park dprintf1("MADV_HUGEPAGE ret: %d\n", ret); 747baa489faSSeongJae Park ret = madvise(ptr, HPAGE_SIZE, MADV_WILLNEED); 748baa489faSSeongJae Park dprintf1("MADV_WILLNEED ret: %d\n", ret); 749baa489faSSeongJae Park memset(ptr, 0, HPAGE_SIZE); 750baa489faSSeongJae Park 751baa489faSSeongJae Park dprintf1("mmap()'d thp for pkey %d @ %p\n", pkey, ptr); 752baa489faSSeongJae Park return ptr; 753baa489faSSeongJae Park } 754baa489faSSeongJae Park 755baa489faSSeongJae Park int hugetlb_setup_ok; 756baa489faSSeongJae Park #define SYSFS_FMT_NR_HUGE_PAGES "/sys/kernel/mm/hugepages/hugepages-%ldkB/nr_hugepages" 757baa489faSSeongJae Park #define GET_NR_HUGE_PAGES 10 758baa489faSSeongJae Park void setup_hugetlbfs(void) 759baa489faSSeongJae Park { 760baa489faSSeongJae Park int err; 761baa489faSSeongJae Park int fd; 762baa489faSSeongJae Park char buf[256]; 763baa489faSSeongJae Park long hpagesz_kb; 764baa489faSSeongJae Park long hpagesz_mb; 765baa489faSSeongJae Park 766baa489faSSeongJae Park if (geteuid() != 0) { 767baa489faSSeongJae Park fprintf(stderr, "WARNING: not run as root, can not do hugetlb test\n"); 768baa489faSSeongJae Park return; 769baa489faSSeongJae Park } 770baa489faSSeongJae Park 771baa489faSSeongJae Park cat_into_file(__stringify(GET_NR_HUGE_PAGES), "/proc/sys/vm/nr_hugepages"); 772baa489faSSeongJae Park 773baa489faSSeongJae Park /* 774baa489faSSeongJae Park * Now go make sure that we got the pages and that they 775baa489faSSeongJae Park * are PMD-level pages. Someone might have made PUD-level 776baa489faSSeongJae Park * pages the default. 777baa489faSSeongJae Park */ 778baa489faSSeongJae Park hpagesz_kb = HPAGE_SIZE / 1024; 779baa489faSSeongJae Park hpagesz_mb = hpagesz_kb / 1024; 780baa489faSSeongJae Park sprintf(buf, SYSFS_FMT_NR_HUGE_PAGES, hpagesz_kb); 781baa489faSSeongJae Park fd = open(buf, O_RDONLY); 782baa489faSSeongJae Park if (fd < 0) { 783baa489faSSeongJae Park fprintf(stderr, "opening sysfs %ldM hugetlb config: %s\n", 784baa489faSSeongJae Park hpagesz_mb, strerror(errno)); 785baa489faSSeongJae Park return; 786baa489faSSeongJae Park } 787baa489faSSeongJae Park 788baa489faSSeongJae Park /* -1 to guarantee leaving the trailing \0 */ 789baa489faSSeongJae Park err = read(fd, buf, sizeof(buf)-1); 790baa489faSSeongJae Park close(fd); 791baa489faSSeongJae Park if (err <= 0) { 792baa489faSSeongJae Park fprintf(stderr, "reading sysfs %ldM hugetlb config: %s\n", 793baa489faSSeongJae Park hpagesz_mb, strerror(errno)); 794baa489faSSeongJae Park return; 795baa489faSSeongJae Park } 796baa489faSSeongJae Park 797baa489faSSeongJae Park if (atoi(buf) != GET_NR_HUGE_PAGES) { 798baa489faSSeongJae Park fprintf(stderr, "could not confirm %ldM pages, got: '%s' expected %d\n", 799baa489faSSeongJae Park hpagesz_mb, buf, GET_NR_HUGE_PAGES); 800baa489faSSeongJae Park return; 801baa489faSSeongJae Park } 802baa489faSSeongJae Park 803baa489faSSeongJae Park hugetlb_setup_ok = 1; 804baa489faSSeongJae Park } 805baa489faSSeongJae Park 806baa489faSSeongJae Park void *malloc_pkey_hugetlb(long size, int prot, u16 pkey) 807baa489faSSeongJae Park { 808baa489faSSeongJae Park void *ptr; 809baa489faSSeongJae Park int flags = MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB; 810baa489faSSeongJae Park 811baa489faSSeongJae Park if (!hugetlb_setup_ok) 812baa489faSSeongJae Park return PTR_ERR_ENOTSUP; 813baa489faSSeongJae Park 814baa489faSSeongJae Park dprintf1("doing %s(%ld, %x, %x)\n", __func__, size, prot, pkey); 815baa489faSSeongJae Park size = ALIGN_UP(size, HPAGE_SIZE * 2); 816baa489faSSeongJae Park pkey_assert(pkey < NR_PKEYS); 817baa489faSSeongJae Park ptr = mmap(NULL, size, PROT_NONE, flags, -1, 0); 818baa489faSSeongJae Park pkey_assert(ptr != (void *)-1); 819baa489faSSeongJae Park mprotect_pkey(ptr, size, prot, pkey); 820baa489faSSeongJae Park 821baa489faSSeongJae Park record_pkey_malloc(ptr, size, prot); 822baa489faSSeongJae Park 823baa489faSSeongJae Park dprintf1("mmap()'d hugetlbfs for pkey %d @ %p\n", pkey, ptr); 824baa489faSSeongJae Park return ptr; 825baa489faSSeongJae Park } 826baa489faSSeongJae Park 827baa489faSSeongJae Park void *malloc_pkey_mmap_dax(long size, int prot, u16 pkey) 828baa489faSSeongJae Park { 829baa489faSSeongJae Park void *ptr; 830baa489faSSeongJae Park int fd; 831baa489faSSeongJae Park 832baa489faSSeongJae Park dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__, 833baa489faSSeongJae Park size, prot, pkey); 834baa489faSSeongJae Park pkey_assert(pkey < NR_PKEYS); 835baa489faSSeongJae Park fd = open("/dax/foo", O_RDWR); 836baa489faSSeongJae Park pkey_assert(fd >= 0); 837baa489faSSeongJae Park 838baa489faSSeongJae Park ptr = mmap(0, size, prot, MAP_SHARED, fd, 0); 839baa489faSSeongJae Park pkey_assert(ptr != (void *)-1); 840baa489faSSeongJae Park 841baa489faSSeongJae Park mprotect_pkey(ptr, size, prot, pkey); 842baa489faSSeongJae Park 843baa489faSSeongJae Park record_pkey_malloc(ptr, size, prot); 844baa489faSSeongJae Park 845baa489faSSeongJae Park dprintf1("mmap()'d for pkey %d @ %p\n", pkey, ptr); 846baa489faSSeongJae Park close(fd); 847baa489faSSeongJae Park return ptr; 848baa489faSSeongJae Park } 849baa489faSSeongJae Park 850baa489faSSeongJae Park void *(*pkey_malloc[])(long size, int prot, u16 pkey) = { 851baa489faSSeongJae Park 852baa489faSSeongJae Park malloc_pkey_with_mprotect, 853baa489faSSeongJae Park malloc_pkey_with_mprotect_subpage, 854baa489faSSeongJae Park malloc_pkey_anon_huge, 855baa489faSSeongJae Park malloc_pkey_hugetlb 856baa489faSSeongJae Park /* can not do direct with the pkey_mprotect() API: 857baa489faSSeongJae Park malloc_pkey_mmap_direct, 858baa489faSSeongJae Park malloc_pkey_mmap_dax, 859baa489faSSeongJae Park */ 860baa489faSSeongJae Park }; 861baa489faSSeongJae Park 862baa489faSSeongJae Park void *malloc_pkey(long size, int prot, u16 pkey) 863baa489faSSeongJae Park { 864baa489faSSeongJae Park void *ret; 865baa489faSSeongJae Park static int malloc_type; 866baa489faSSeongJae Park int nr_malloc_types = ARRAY_SIZE(pkey_malloc); 867baa489faSSeongJae Park 868baa489faSSeongJae Park pkey_assert(pkey < NR_PKEYS); 869baa489faSSeongJae Park 870baa489faSSeongJae Park while (1) { 871baa489faSSeongJae Park pkey_assert(malloc_type < nr_malloc_types); 872baa489faSSeongJae Park 873baa489faSSeongJae Park ret = pkey_malloc[malloc_type](size, prot, pkey); 874baa489faSSeongJae Park pkey_assert(ret != (void *)-1); 875baa489faSSeongJae Park 876baa489faSSeongJae Park malloc_type++; 877baa489faSSeongJae Park if (malloc_type >= nr_malloc_types) 878baa489faSSeongJae Park malloc_type = (random()%nr_malloc_types); 879baa489faSSeongJae Park 880baa489faSSeongJae Park /* try again if the malloc_type we tried is unsupported */ 881baa489faSSeongJae Park if (ret == PTR_ERR_ENOTSUP) 882baa489faSSeongJae Park continue; 883baa489faSSeongJae Park 884baa489faSSeongJae Park break; 885baa489faSSeongJae Park } 886baa489faSSeongJae Park 887baa489faSSeongJae Park dprintf3("%s(%ld, prot=%x, pkey=%x) returning: %p\n", __func__, 888baa489faSSeongJae Park size, prot, pkey, ret); 889baa489faSSeongJae Park return ret; 890baa489faSSeongJae Park } 891baa489faSSeongJae Park 892baa489faSSeongJae Park int last_pkey_faults; 893baa489faSSeongJae Park #define UNKNOWN_PKEY -2 894baa489faSSeongJae Park void expected_pkey_fault(int pkey) 895baa489faSSeongJae Park { 896baa489faSSeongJae Park dprintf2("%s(): last_pkey_faults: %d pkey_faults: %d\n", 897baa489faSSeongJae Park __func__, last_pkey_faults, pkey_faults); 898baa489faSSeongJae Park dprintf2("%s(%d): last_si_pkey: %d\n", __func__, pkey, last_si_pkey); 899baa489faSSeongJae Park pkey_assert(last_pkey_faults + 1 == pkey_faults); 900baa489faSSeongJae Park 901baa489faSSeongJae Park /* 902baa489faSSeongJae Park * For exec-only memory, we do not know the pkey in 903baa489faSSeongJae Park * advance, so skip this check. 904baa489faSSeongJae Park */ 905baa489faSSeongJae Park if (pkey != UNKNOWN_PKEY) 906baa489faSSeongJae Park pkey_assert(last_si_pkey == pkey); 907baa489faSSeongJae Park 908baa489faSSeongJae Park #if defined(__i386__) || defined(__x86_64__) /* arch */ 909baa489faSSeongJae Park /* 910baa489faSSeongJae Park * The signal handler shold have cleared out PKEY register to let the 911baa489faSSeongJae Park * test program continue. We now have to restore it. 912baa489faSSeongJae Park */ 913baa489faSSeongJae Park if (__read_pkey_reg() != 0) 914baa489faSSeongJae Park #else /* arch */ 915baa489faSSeongJae Park if (__read_pkey_reg() != shadow_pkey_reg) 916baa489faSSeongJae Park #endif /* arch */ 917baa489faSSeongJae Park pkey_assert(0); 918baa489faSSeongJae Park 919baa489faSSeongJae Park __write_pkey_reg(shadow_pkey_reg); 920baa489faSSeongJae Park dprintf1("%s() set pkey_reg=%016llx to restore state after signal " 921baa489faSSeongJae Park "nuked it\n", __func__, shadow_pkey_reg); 922baa489faSSeongJae Park last_pkey_faults = pkey_faults; 923baa489faSSeongJae Park last_si_pkey = -1; 924baa489faSSeongJae Park } 925baa489faSSeongJae Park 926baa489faSSeongJae Park #define do_not_expect_pkey_fault(msg) do { \ 927baa489faSSeongJae Park if (last_pkey_faults != pkey_faults) \ 928baa489faSSeongJae Park dprintf0("unexpected PKey fault: %s\n", msg); \ 929baa489faSSeongJae Park pkey_assert(last_pkey_faults == pkey_faults); \ 930baa489faSSeongJae Park } while (0) 931baa489faSSeongJae Park 932baa489faSSeongJae Park int test_fds[10] = { -1 }; 933baa489faSSeongJae Park int nr_test_fds; 934baa489faSSeongJae Park void __save_test_fd(int fd) 935baa489faSSeongJae Park { 936baa489faSSeongJae Park pkey_assert(fd >= 0); 937baa489faSSeongJae Park pkey_assert(nr_test_fds < ARRAY_SIZE(test_fds)); 938baa489faSSeongJae Park test_fds[nr_test_fds] = fd; 939baa489faSSeongJae Park nr_test_fds++; 940baa489faSSeongJae Park } 941baa489faSSeongJae Park 942baa489faSSeongJae Park int get_test_read_fd(void) 943baa489faSSeongJae Park { 944baa489faSSeongJae Park int test_fd = open("/etc/passwd", O_RDONLY); 945baa489faSSeongJae Park __save_test_fd(test_fd); 946baa489faSSeongJae Park return test_fd; 947baa489faSSeongJae Park } 948baa489faSSeongJae Park 949baa489faSSeongJae Park void close_test_fds(void) 950baa489faSSeongJae Park { 951baa489faSSeongJae Park int i; 952baa489faSSeongJae Park 953baa489faSSeongJae Park for (i = 0; i < nr_test_fds; i++) { 954baa489faSSeongJae Park if (test_fds[i] < 0) 955baa489faSSeongJae Park continue; 956baa489faSSeongJae Park close(test_fds[i]); 957baa489faSSeongJae Park test_fds[i] = -1; 958baa489faSSeongJae Park } 959baa489faSSeongJae Park nr_test_fds = 0; 960baa489faSSeongJae Park } 961baa489faSSeongJae Park 962baa489faSSeongJae Park #define barrier() __asm__ __volatile__("": : :"memory") 963baa489faSSeongJae Park __attribute__((noinline)) int read_ptr(int *ptr) 964baa489faSSeongJae Park { 965baa489faSSeongJae Park /* 966baa489faSSeongJae Park * Keep GCC from optimizing this away somehow 967baa489faSSeongJae Park */ 968baa489faSSeongJae Park barrier(); 969baa489faSSeongJae Park return *ptr; 970baa489faSSeongJae Park } 971baa489faSSeongJae Park 972baa489faSSeongJae Park void test_pkey_alloc_free_attach_pkey0(int *ptr, u16 pkey) 973baa489faSSeongJae Park { 974baa489faSSeongJae Park int i, err; 975baa489faSSeongJae Park int max_nr_pkey_allocs; 976baa489faSSeongJae Park int alloced_pkeys[NR_PKEYS]; 977baa489faSSeongJae Park int nr_alloced = 0; 978baa489faSSeongJae Park long size; 979baa489faSSeongJae Park 980baa489faSSeongJae Park pkey_assert(pkey_last_malloc_record); 981baa489faSSeongJae Park size = pkey_last_malloc_record->size; 982baa489faSSeongJae Park /* 983baa489faSSeongJae Park * This is a bit of a hack. But mprotect() requires 984baa489faSSeongJae Park * huge-page-aligned sizes when operating on hugetlbfs. 985baa489faSSeongJae Park * So, make sure that we use something that's a multiple 986baa489faSSeongJae Park * of a huge page when we can. 987baa489faSSeongJae Park */ 988baa489faSSeongJae Park if (size >= HPAGE_SIZE) 989baa489faSSeongJae Park size = HPAGE_SIZE; 990baa489faSSeongJae Park 991baa489faSSeongJae Park /* allocate every possible key and make sure key-0 never got allocated */ 992baa489faSSeongJae Park max_nr_pkey_allocs = NR_PKEYS; 993baa489faSSeongJae Park for (i = 0; i < max_nr_pkey_allocs; i++) { 994baa489faSSeongJae Park int new_pkey = alloc_pkey(); 995baa489faSSeongJae Park pkey_assert(new_pkey != 0); 996baa489faSSeongJae Park 997baa489faSSeongJae Park if (new_pkey < 0) 998baa489faSSeongJae Park break; 999baa489faSSeongJae Park alloced_pkeys[nr_alloced++] = new_pkey; 1000baa489faSSeongJae Park } 1001baa489faSSeongJae Park /* free all the allocated keys */ 1002baa489faSSeongJae Park for (i = 0; i < nr_alloced; i++) { 1003baa489faSSeongJae Park int free_ret; 1004baa489faSSeongJae Park 1005baa489faSSeongJae Park if (!alloced_pkeys[i]) 1006baa489faSSeongJae Park continue; 1007baa489faSSeongJae Park free_ret = sys_pkey_free(alloced_pkeys[i]); 1008baa489faSSeongJae Park pkey_assert(!free_ret); 1009baa489faSSeongJae Park } 1010baa489faSSeongJae Park 1011baa489faSSeongJae Park /* attach key-0 in various modes */ 1012baa489faSSeongJae Park err = sys_mprotect_pkey(ptr, size, PROT_READ, 0); 1013baa489faSSeongJae Park pkey_assert(!err); 1014baa489faSSeongJae Park err = sys_mprotect_pkey(ptr, size, PROT_WRITE, 0); 1015baa489faSSeongJae Park pkey_assert(!err); 1016baa489faSSeongJae Park err = sys_mprotect_pkey(ptr, size, PROT_EXEC, 0); 1017baa489faSSeongJae Park pkey_assert(!err); 1018baa489faSSeongJae Park err = sys_mprotect_pkey(ptr, size, PROT_READ|PROT_WRITE, 0); 1019baa489faSSeongJae Park pkey_assert(!err); 1020baa489faSSeongJae Park err = sys_mprotect_pkey(ptr, size, PROT_READ|PROT_WRITE|PROT_EXEC, 0); 1021baa489faSSeongJae Park pkey_assert(!err); 1022baa489faSSeongJae Park } 1023baa489faSSeongJae Park 1024baa489faSSeongJae Park void test_read_of_write_disabled_region(int *ptr, u16 pkey) 1025baa489faSSeongJae Park { 1026baa489faSSeongJae Park int ptr_contents; 1027baa489faSSeongJae Park 1028baa489faSSeongJae Park dprintf1("disabling write access to PKEY[1], doing read\n"); 1029baa489faSSeongJae Park pkey_write_deny(pkey); 1030baa489faSSeongJae Park ptr_contents = read_ptr(ptr); 1031baa489faSSeongJae Park dprintf1("*ptr: %d\n", ptr_contents); 1032baa489faSSeongJae Park dprintf1("\n"); 1033baa489faSSeongJae Park } 1034baa489faSSeongJae Park void test_read_of_access_disabled_region(int *ptr, u16 pkey) 1035baa489faSSeongJae Park { 1036baa489faSSeongJae Park int ptr_contents; 1037baa489faSSeongJae Park 1038baa489faSSeongJae Park dprintf1("disabling access to PKEY[%02d], doing read @ %p\n", pkey, ptr); 1039baa489faSSeongJae Park read_pkey_reg(); 1040baa489faSSeongJae Park pkey_access_deny(pkey); 1041baa489faSSeongJae Park ptr_contents = read_ptr(ptr); 1042baa489faSSeongJae Park dprintf1("*ptr: %d\n", ptr_contents); 1043baa489faSSeongJae Park expected_pkey_fault(pkey); 1044baa489faSSeongJae Park } 1045baa489faSSeongJae Park 1046baa489faSSeongJae Park void test_read_of_access_disabled_region_with_page_already_mapped(int *ptr, 1047baa489faSSeongJae Park u16 pkey) 1048baa489faSSeongJae Park { 1049baa489faSSeongJae Park int ptr_contents; 1050baa489faSSeongJae Park 1051baa489faSSeongJae Park dprintf1("disabling access to PKEY[%02d], doing read @ %p\n", 1052baa489faSSeongJae Park pkey, ptr); 1053baa489faSSeongJae Park ptr_contents = read_ptr(ptr); 1054baa489faSSeongJae Park dprintf1("reading ptr before disabling the read : %d\n", 1055baa489faSSeongJae Park ptr_contents); 1056baa489faSSeongJae Park read_pkey_reg(); 1057baa489faSSeongJae Park pkey_access_deny(pkey); 1058baa489faSSeongJae Park ptr_contents = read_ptr(ptr); 1059baa489faSSeongJae Park dprintf1("*ptr: %d\n", ptr_contents); 1060baa489faSSeongJae Park expected_pkey_fault(pkey); 1061baa489faSSeongJae Park } 1062baa489faSSeongJae Park 1063baa489faSSeongJae Park void test_write_of_write_disabled_region_with_page_already_mapped(int *ptr, 1064baa489faSSeongJae Park u16 pkey) 1065baa489faSSeongJae Park { 1066baa489faSSeongJae Park *ptr = __LINE__; 1067baa489faSSeongJae Park dprintf1("disabling write access; after accessing the page, " 1068baa489faSSeongJae Park "to PKEY[%02d], doing write\n", pkey); 1069baa489faSSeongJae Park pkey_write_deny(pkey); 1070baa489faSSeongJae Park *ptr = __LINE__; 1071baa489faSSeongJae Park expected_pkey_fault(pkey); 1072baa489faSSeongJae Park } 1073baa489faSSeongJae Park 1074baa489faSSeongJae Park void test_write_of_write_disabled_region(int *ptr, u16 pkey) 1075baa489faSSeongJae Park { 1076baa489faSSeongJae Park dprintf1("disabling write access to PKEY[%02d], doing write\n", pkey); 1077baa489faSSeongJae Park pkey_write_deny(pkey); 1078baa489faSSeongJae Park *ptr = __LINE__; 1079baa489faSSeongJae Park expected_pkey_fault(pkey); 1080baa489faSSeongJae Park } 1081baa489faSSeongJae Park void test_write_of_access_disabled_region(int *ptr, u16 pkey) 1082baa489faSSeongJae Park { 1083baa489faSSeongJae Park dprintf1("disabling access to PKEY[%02d], doing write\n", pkey); 1084baa489faSSeongJae Park pkey_access_deny(pkey); 1085baa489faSSeongJae Park *ptr = __LINE__; 1086baa489faSSeongJae Park expected_pkey_fault(pkey); 1087baa489faSSeongJae Park } 1088baa489faSSeongJae Park 1089baa489faSSeongJae Park void test_write_of_access_disabled_region_with_page_already_mapped(int *ptr, 1090baa489faSSeongJae Park u16 pkey) 1091baa489faSSeongJae Park { 1092baa489faSSeongJae Park *ptr = __LINE__; 1093baa489faSSeongJae Park dprintf1("disabling access; after accessing the page, " 1094baa489faSSeongJae Park " to PKEY[%02d], doing write\n", pkey); 1095baa489faSSeongJae Park pkey_access_deny(pkey); 1096baa489faSSeongJae Park *ptr = __LINE__; 1097baa489faSSeongJae Park expected_pkey_fault(pkey); 1098baa489faSSeongJae Park } 1099baa489faSSeongJae Park 1100baa489faSSeongJae Park void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey) 1101baa489faSSeongJae Park { 1102baa489faSSeongJae Park int ret; 1103baa489faSSeongJae Park int test_fd = get_test_read_fd(); 1104baa489faSSeongJae Park 1105baa489faSSeongJae Park dprintf1("disabling access to PKEY[%02d], " 1106baa489faSSeongJae Park "having kernel read() to buffer\n", pkey); 1107baa489faSSeongJae Park pkey_access_deny(pkey); 1108baa489faSSeongJae Park ret = read(test_fd, ptr, 1); 1109baa489faSSeongJae Park dprintf1("read ret: %d\n", ret); 1110baa489faSSeongJae Park pkey_assert(ret); 1111baa489faSSeongJae Park } 1112baa489faSSeongJae Park void test_kernel_write_of_write_disabled_region(int *ptr, u16 pkey) 1113baa489faSSeongJae Park { 1114baa489faSSeongJae Park int ret; 1115baa489faSSeongJae Park int test_fd = get_test_read_fd(); 1116baa489faSSeongJae Park 1117baa489faSSeongJae Park pkey_write_deny(pkey); 1118baa489faSSeongJae Park ret = read(test_fd, ptr, 100); 1119baa489faSSeongJae Park dprintf1("read ret: %d\n", ret); 1120baa489faSSeongJae Park if (ret < 0 && (DEBUG_LEVEL > 0)) 1121baa489faSSeongJae Park perror("verbose read result (OK for this to be bad)"); 1122baa489faSSeongJae Park pkey_assert(ret); 1123baa489faSSeongJae Park } 1124baa489faSSeongJae Park 1125baa489faSSeongJae Park void test_kernel_gup_of_access_disabled_region(int *ptr, u16 pkey) 1126baa489faSSeongJae Park { 1127baa489faSSeongJae Park int pipe_ret, vmsplice_ret; 1128baa489faSSeongJae Park struct iovec iov; 1129baa489faSSeongJae Park int pipe_fds[2]; 1130baa489faSSeongJae Park 1131baa489faSSeongJae Park pipe_ret = pipe(pipe_fds); 1132baa489faSSeongJae Park 1133baa489faSSeongJae Park pkey_assert(pipe_ret == 0); 1134baa489faSSeongJae Park dprintf1("disabling access to PKEY[%02d], " 1135baa489faSSeongJae Park "having kernel vmsplice from buffer\n", pkey); 1136baa489faSSeongJae Park pkey_access_deny(pkey); 1137baa489faSSeongJae Park iov.iov_base = ptr; 1138baa489faSSeongJae Park iov.iov_len = PAGE_SIZE; 1139baa489faSSeongJae Park vmsplice_ret = vmsplice(pipe_fds[1], &iov, 1, SPLICE_F_GIFT); 1140baa489faSSeongJae Park dprintf1("vmsplice() ret: %d\n", vmsplice_ret); 1141baa489faSSeongJae Park pkey_assert(vmsplice_ret == -1); 1142baa489faSSeongJae Park 1143baa489faSSeongJae Park close(pipe_fds[0]); 1144baa489faSSeongJae Park close(pipe_fds[1]); 1145baa489faSSeongJae Park } 1146baa489faSSeongJae Park 1147baa489faSSeongJae Park void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey) 1148baa489faSSeongJae Park { 1149baa489faSSeongJae Park int ignored = 0xdada; 1150baa489faSSeongJae Park int futex_ret; 1151baa489faSSeongJae Park int some_int = __LINE__; 1152baa489faSSeongJae Park 1153baa489faSSeongJae Park dprintf1("disabling write to PKEY[%02d], " 1154baa489faSSeongJae Park "doing futex gunk in buffer\n", pkey); 1155baa489faSSeongJae Park *ptr = some_int; 1156baa489faSSeongJae Park pkey_write_deny(pkey); 1157baa489faSSeongJae Park futex_ret = syscall(SYS_futex, ptr, FUTEX_WAIT, some_int-1, NULL, 1158baa489faSSeongJae Park &ignored, ignored); 1159baa489faSSeongJae Park if (DEBUG_LEVEL > 0) 1160baa489faSSeongJae Park perror("futex"); 1161baa489faSSeongJae Park dprintf1("futex() ret: %d\n", futex_ret); 1162baa489faSSeongJae Park } 1163baa489faSSeongJae Park 1164baa489faSSeongJae Park /* Assumes that all pkeys other than 'pkey' are unallocated */ 1165baa489faSSeongJae Park void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey) 1166baa489faSSeongJae Park { 1167baa489faSSeongJae Park int err; 1168baa489faSSeongJae Park int i; 1169baa489faSSeongJae Park 1170baa489faSSeongJae Park /* Note: 0 is the default pkey, so don't mess with it */ 1171baa489faSSeongJae Park for (i = 1; i < NR_PKEYS; i++) { 1172baa489faSSeongJae Park if (pkey == i) 1173baa489faSSeongJae Park continue; 1174baa489faSSeongJae Park 1175baa489faSSeongJae Park dprintf1("trying get/set/free to non-allocated pkey: %2d\n", i); 1176baa489faSSeongJae Park err = sys_pkey_free(i); 1177baa489faSSeongJae Park pkey_assert(err); 1178baa489faSSeongJae Park 1179baa489faSSeongJae Park err = sys_pkey_free(i); 1180baa489faSSeongJae Park pkey_assert(err); 1181baa489faSSeongJae Park 1182baa489faSSeongJae Park err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, i); 1183baa489faSSeongJae Park pkey_assert(err); 1184baa489faSSeongJae Park } 1185baa489faSSeongJae Park } 1186baa489faSSeongJae Park 1187baa489faSSeongJae Park /* Assumes that all pkeys other than 'pkey' are unallocated */ 1188baa489faSSeongJae Park void test_pkey_syscalls_bad_args(int *ptr, u16 pkey) 1189baa489faSSeongJae Park { 1190baa489faSSeongJae Park int err; 1191baa489faSSeongJae Park int bad_pkey = NR_PKEYS+99; 1192baa489faSSeongJae Park 1193baa489faSSeongJae Park /* pass a known-invalid pkey in: */ 1194baa489faSSeongJae Park err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, bad_pkey); 1195baa489faSSeongJae Park pkey_assert(err); 1196baa489faSSeongJae Park } 1197baa489faSSeongJae Park 1198baa489faSSeongJae Park void become_child(void) 1199baa489faSSeongJae Park { 1200baa489faSSeongJae Park pid_t forkret; 1201baa489faSSeongJae Park 1202baa489faSSeongJae Park forkret = fork(); 1203baa489faSSeongJae Park pkey_assert(forkret >= 0); 1204baa489faSSeongJae Park dprintf3("[%d] fork() ret: %d\n", getpid(), forkret); 1205baa489faSSeongJae Park 1206baa489faSSeongJae Park if (!forkret) { 1207baa489faSSeongJae Park /* in the child */ 1208baa489faSSeongJae Park return; 1209baa489faSSeongJae Park } 1210baa489faSSeongJae Park exit(0); 1211baa489faSSeongJae Park } 1212baa489faSSeongJae Park 1213baa489faSSeongJae Park /* Assumes that all pkeys other than 'pkey' are unallocated */ 1214baa489faSSeongJae Park void test_pkey_alloc_exhaust(int *ptr, u16 pkey) 1215baa489faSSeongJae Park { 1216baa489faSSeongJae Park int err; 1217baa489faSSeongJae Park int allocated_pkeys[NR_PKEYS] = {0}; 1218baa489faSSeongJae Park int nr_allocated_pkeys = 0; 1219baa489faSSeongJae Park int i; 1220baa489faSSeongJae Park 1221baa489faSSeongJae Park for (i = 0; i < NR_PKEYS*3; i++) { 1222baa489faSSeongJae Park int new_pkey; 1223baa489faSSeongJae Park dprintf1("%s() alloc loop: %d\n", __func__, i); 1224baa489faSSeongJae Park new_pkey = alloc_pkey(); 1225baa489faSSeongJae Park dprintf4("%s()::%d, err: %d pkey_reg: 0x%016llx" 1226baa489faSSeongJae Park " shadow: 0x%016llx\n", 1227baa489faSSeongJae Park __func__, __LINE__, err, __read_pkey_reg(), 1228baa489faSSeongJae Park shadow_pkey_reg); 1229baa489faSSeongJae Park read_pkey_reg(); /* for shadow checking */ 1230baa489faSSeongJae Park dprintf2("%s() errno: %d ENOSPC: %d\n", __func__, errno, ENOSPC); 1231baa489faSSeongJae Park if ((new_pkey == -1) && (errno == ENOSPC)) { 1232baa489faSSeongJae Park dprintf2("%s() failed to allocate pkey after %d tries\n", 1233baa489faSSeongJae Park __func__, nr_allocated_pkeys); 1234baa489faSSeongJae Park } else { 1235baa489faSSeongJae Park /* 1236baa489faSSeongJae Park * Ensure the number of successes never 1237baa489faSSeongJae Park * exceeds the number of keys supported 1238baa489faSSeongJae Park * in the hardware. 1239baa489faSSeongJae Park */ 1240baa489faSSeongJae Park pkey_assert(nr_allocated_pkeys < NR_PKEYS); 1241baa489faSSeongJae Park allocated_pkeys[nr_allocated_pkeys++] = new_pkey; 1242baa489faSSeongJae Park } 1243baa489faSSeongJae Park 1244baa489faSSeongJae Park /* 1245baa489faSSeongJae Park * Make sure that allocation state is properly 1246baa489faSSeongJae Park * preserved across fork(). 1247baa489faSSeongJae Park */ 1248baa489faSSeongJae Park if (i == NR_PKEYS*2) 1249baa489faSSeongJae Park become_child(); 1250baa489faSSeongJae Park } 1251baa489faSSeongJae Park 1252baa489faSSeongJae Park dprintf3("%s()::%d\n", __func__, __LINE__); 1253baa489faSSeongJae Park 1254baa489faSSeongJae Park /* 1255baa489faSSeongJae Park * On x86: 1256baa489faSSeongJae Park * There are 16 pkeys supported in hardware. Three are 1257baa489faSSeongJae Park * allocated by the time we get here: 1258baa489faSSeongJae Park * 1. The default key (0) 1259baa489faSSeongJae Park * 2. One possibly consumed by an execute-only mapping. 1260baa489faSSeongJae Park * 3. One allocated by the test code and passed in via 1261baa489faSSeongJae Park * 'pkey' to this function. 1262baa489faSSeongJae Park * Ensure that we can allocate at least another 13 (16-3). 1263baa489faSSeongJae Park * 1264baa489faSSeongJae Park * On powerpc: 1265baa489faSSeongJae Park * There are either 5, 28, 29 or 32 pkeys supported in 1266baa489faSSeongJae Park * hardware depending on the page size (4K or 64K) and 1267baa489faSSeongJae Park * platform (powernv or powervm). Four are allocated by 1268baa489faSSeongJae Park * the time we get here. These include pkey-0, pkey-1, 1269baa489faSSeongJae Park * exec-only pkey and the one allocated by the test code. 1270baa489faSSeongJae Park * Ensure that we can allocate the remaining. 1271baa489faSSeongJae Park */ 1272baa489faSSeongJae Park pkey_assert(i >= (NR_PKEYS - get_arch_reserved_keys() - 1)); 1273baa489faSSeongJae Park 1274baa489faSSeongJae Park for (i = 0; i < nr_allocated_pkeys; i++) { 1275baa489faSSeongJae Park err = sys_pkey_free(allocated_pkeys[i]); 1276baa489faSSeongJae Park pkey_assert(!err); 1277baa489faSSeongJae Park read_pkey_reg(); /* for shadow checking */ 1278baa489faSSeongJae Park } 1279baa489faSSeongJae Park } 1280baa489faSSeongJae Park 1281baa489faSSeongJae Park void arch_force_pkey_reg_init(void) 1282baa489faSSeongJae Park { 1283baa489faSSeongJae Park #if defined(__i386__) || defined(__x86_64__) /* arch */ 1284baa489faSSeongJae Park u64 *buf; 1285baa489faSSeongJae Park 1286baa489faSSeongJae Park /* 1287baa489faSSeongJae Park * All keys should be allocated and set to allow reads and 1288baa489faSSeongJae Park * writes, so the register should be all 0. If not, just 1289baa489faSSeongJae Park * skip the test. 1290baa489faSSeongJae Park */ 1291baa489faSSeongJae Park if (read_pkey_reg()) 1292baa489faSSeongJae Park return; 1293baa489faSSeongJae Park 1294baa489faSSeongJae Park /* 1295baa489faSSeongJae Park * Just allocate an absurd about of memory rather than 1296baa489faSSeongJae Park * doing the XSAVE size enumeration dance. 1297baa489faSSeongJae Park */ 1298baa489faSSeongJae Park buf = mmap(NULL, 1*MB, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); 1299baa489faSSeongJae Park 1300baa489faSSeongJae Park /* These __builtins require compiling with -mxsave */ 1301baa489faSSeongJae Park 1302baa489faSSeongJae Park /* XSAVE to build a valid buffer: */ 1303baa489faSSeongJae Park __builtin_ia32_xsave(buf, XSTATE_PKEY); 1304baa489faSSeongJae Park /* Clear XSTATE_BV[PKRU]: */ 1305baa489faSSeongJae Park buf[XSTATE_BV_OFFSET/sizeof(u64)] &= ~XSTATE_PKEY; 1306baa489faSSeongJae Park /* XRSTOR will likely get PKRU back to the init state: */ 1307baa489faSSeongJae Park __builtin_ia32_xrstor(buf, XSTATE_PKEY); 1308baa489faSSeongJae Park 1309baa489faSSeongJae Park munmap(buf, 1*MB); 1310baa489faSSeongJae Park #endif 1311baa489faSSeongJae Park } 1312baa489faSSeongJae Park 1313baa489faSSeongJae Park 1314baa489faSSeongJae Park /* 1315baa489faSSeongJae Park * This is mostly useless on ppc for now. But it will not 1316baa489faSSeongJae Park * hurt anything and should give some better coverage as 1317baa489faSSeongJae Park * a long-running test that continually checks the pkey 1318baa489faSSeongJae Park * register. 1319baa489faSSeongJae Park */ 1320baa489faSSeongJae Park void test_pkey_init_state(int *ptr, u16 pkey) 1321baa489faSSeongJae Park { 1322baa489faSSeongJae Park int err; 1323baa489faSSeongJae Park int allocated_pkeys[NR_PKEYS] = {0}; 1324baa489faSSeongJae Park int nr_allocated_pkeys = 0; 1325baa489faSSeongJae Park int i; 1326baa489faSSeongJae Park 1327baa489faSSeongJae Park for (i = 0; i < NR_PKEYS; i++) { 1328baa489faSSeongJae Park int new_pkey = alloc_pkey(); 1329baa489faSSeongJae Park 1330baa489faSSeongJae Park if (new_pkey < 0) 1331baa489faSSeongJae Park continue; 1332baa489faSSeongJae Park allocated_pkeys[nr_allocated_pkeys++] = new_pkey; 1333baa489faSSeongJae Park } 1334baa489faSSeongJae Park 1335baa489faSSeongJae Park dprintf3("%s()::%d\n", __func__, __LINE__); 1336baa489faSSeongJae Park 1337baa489faSSeongJae Park arch_force_pkey_reg_init(); 1338baa489faSSeongJae Park 1339baa489faSSeongJae Park /* 1340baa489faSSeongJae Park * Loop for a bit, hoping to get exercise the kernel 1341baa489faSSeongJae Park * context switch code. 1342baa489faSSeongJae Park */ 1343baa489faSSeongJae Park for (i = 0; i < 1000000; i++) 1344baa489faSSeongJae Park read_pkey_reg(); 1345baa489faSSeongJae Park 1346baa489faSSeongJae Park for (i = 0; i < nr_allocated_pkeys; i++) { 1347baa489faSSeongJae Park err = sys_pkey_free(allocated_pkeys[i]); 1348baa489faSSeongJae Park pkey_assert(!err); 1349baa489faSSeongJae Park read_pkey_reg(); /* for shadow checking */ 1350baa489faSSeongJae Park } 1351baa489faSSeongJae Park } 1352baa489faSSeongJae Park 1353baa489faSSeongJae Park /* 1354baa489faSSeongJae Park * pkey 0 is special. It is allocated by default, so you do not 1355baa489faSSeongJae Park * have to call pkey_alloc() to use it first. Make sure that it 1356baa489faSSeongJae Park * is usable. 1357baa489faSSeongJae Park */ 1358baa489faSSeongJae Park void test_mprotect_with_pkey_0(int *ptr, u16 pkey) 1359baa489faSSeongJae Park { 1360baa489faSSeongJae Park long size; 1361baa489faSSeongJae Park int prot; 1362baa489faSSeongJae Park 1363baa489faSSeongJae Park assert(pkey_last_malloc_record); 1364baa489faSSeongJae Park size = pkey_last_malloc_record->size; 1365baa489faSSeongJae Park /* 1366baa489faSSeongJae Park * This is a bit of a hack. But mprotect() requires 1367baa489faSSeongJae Park * huge-page-aligned sizes when operating on hugetlbfs. 1368baa489faSSeongJae Park * So, make sure that we use something that's a multiple 1369baa489faSSeongJae Park * of a huge page when we can. 1370baa489faSSeongJae Park */ 1371baa489faSSeongJae Park if (size >= HPAGE_SIZE) 1372baa489faSSeongJae Park size = HPAGE_SIZE; 1373baa489faSSeongJae Park prot = pkey_last_malloc_record->prot; 1374baa489faSSeongJae Park 1375baa489faSSeongJae Park /* Use pkey 0 */ 1376baa489faSSeongJae Park mprotect_pkey(ptr, size, prot, 0); 1377baa489faSSeongJae Park 1378baa489faSSeongJae Park /* Make sure that we can set it back to the original pkey. */ 1379baa489faSSeongJae Park mprotect_pkey(ptr, size, prot, pkey); 1380baa489faSSeongJae Park } 1381baa489faSSeongJae Park 1382baa489faSSeongJae Park void test_ptrace_of_child(int *ptr, u16 pkey) 1383baa489faSSeongJae Park { 1384baa489faSSeongJae Park __attribute__((__unused__)) int peek_result; 1385baa489faSSeongJae Park pid_t child_pid; 1386baa489faSSeongJae Park void *ignored = 0; 1387baa489faSSeongJae Park long ret; 1388baa489faSSeongJae Park int status; 1389baa489faSSeongJae Park /* 1390baa489faSSeongJae Park * This is the "control" for our little expermient. Make sure 1391baa489faSSeongJae Park * we can always access it when ptracing. 1392baa489faSSeongJae Park */ 1393baa489faSSeongJae Park int *plain_ptr_unaligned = malloc(HPAGE_SIZE); 1394baa489faSSeongJae Park int *plain_ptr = ALIGN_PTR_UP(plain_ptr_unaligned, PAGE_SIZE); 1395baa489faSSeongJae Park 1396baa489faSSeongJae Park /* 1397baa489faSSeongJae Park * Fork a child which is an exact copy of this process, of course. 1398baa489faSSeongJae Park * That means we can do all of our tests via ptrace() and then plain 1399baa489faSSeongJae Park * memory access and ensure they work differently. 1400baa489faSSeongJae Park */ 1401baa489faSSeongJae Park child_pid = fork_lazy_child(); 1402baa489faSSeongJae Park dprintf1("[%d] child pid: %d\n", getpid(), child_pid); 1403baa489faSSeongJae Park 1404baa489faSSeongJae Park ret = ptrace(PTRACE_ATTACH, child_pid, ignored, ignored); 1405baa489faSSeongJae Park if (ret) 1406baa489faSSeongJae Park perror("attach"); 1407baa489faSSeongJae Park dprintf1("[%d] attach ret: %ld %d\n", getpid(), ret, __LINE__); 1408baa489faSSeongJae Park pkey_assert(ret != -1); 1409baa489faSSeongJae Park ret = waitpid(child_pid, &status, WUNTRACED); 1410baa489faSSeongJae Park if ((ret != child_pid) || !(WIFSTOPPED(status))) { 1411baa489faSSeongJae Park fprintf(stderr, "weird waitpid result %ld stat %x\n", 1412baa489faSSeongJae Park ret, status); 1413baa489faSSeongJae Park pkey_assert(0); 1414baa489faSSeongJae Park } 1415baa489faSSeongJae Park dprintf2("waitpid ret: %ld\n", ret); 1416baa489faSSeongJae Park dprintf2("waitpid status: %d\n", status); 1417baa489faSSeongJae Park 1418baa489faSSeongJae Park pkey_access_deny(pkey); 1419baa489faSSeongJae Park pkey_write_deny(pkey); 1420baa489faSSeongJae Park 1421baa489faSSeongJae Park /* Write access, untested for now: 1422baa489faSSeongJae Park ret = ptrace(PTRACE_POKEDATA, child_pid, peek_at, data); 1423baa489faSSeongJae Park pkey_assert(ret != -1); 1424baa489faSSeongJae Park dprintf1("poke at %p: %ld\n", peek_at, ret); 1425baa489faSSeongJae Park */ 1426baa489faSSeongJae Park 1427baa489faSSeongJae Park /* 1428baa489faSSeongJae Park * Try to access the pkey-protected "ptr" via ptrace: 1429baa489faSSeongJae Park */ 1430baa489faSSeongJae Park ret = ptrace(PTRACE_PEEKDATA, child_pid, ptr, ignored); 1431baa489faSSeongJae Park /* expect it to work, without an error: */ 1432baa489faSSeongJae Park pkey_assert(ret != -1); 1433baa489faSSeongJae Park /* Now access from the current task, and expect an exception: */ 1434baa489faSSeongJae Park peek_result = read_ptr(ptr); 1435baa489faSSeongJae Park expected_pkey_fault(pkey); 1436baa489faSSeongJae Park 1437baa489faSSeongJae Park /* 1438baa489faSSeongJae Park * Try to access the NON-pkey-protected "plain_ptr" via ptrace: 1439baa489faSSeongJae Park */ 1440baa489faSSeongJae Park ret = ptrace(PTRACE_PEEKDATA, child_pid, plain_ptr, ignored); 1441baa489faSSeongJae Park /* expect it to work, without an error: */ 1442baa489faSSeongJae Park pkey_assert(ret != -1); 1443baa489faSSeongJae Park /* Now access from the current task, and expect NO exception: */ 1444baa489faSSeongJae Park peek_result = read_ptr(plain_ptr); 1445baa489faSSeongJae Park do_not_expect_pkey_fault("read plain pointer after ptrace"); 1446baa489faSSeongJae Park 1447baa489faSSeongJae Park ret = ptrace(PTRACE_DETACH, child_pid, ignored, 0); 1448baa489faSSeongJae Park pkey_assert(ret != -1); 1449baa489faSSeongJae Park 1450baa489faSSeongJae Park ret = kill(child_pid, SIGKILL); 1451baa489faSSeongJae Park pkey_assert(ret != -1); 1452baa489faSSeongJae Park 1453baa489faSSeongJae Park wait(&status); 1454baa489faSSeongJae Park 1455baa489faSSeongJae Park free(plain_ptr_unaligned); 1456baa489faSSeongJae Park } 1457baa489faSSeongJae Park 1458baa489faSSeongJae Park void *get_pointer_to_instructions(void) 1459baa489faSSeongJae Park { 1460baa489faSSeongJae Park void *p1; 1461baa489faSSeongJae Park 1462baa489faSSeongJae Park p1 = ALIGN_PTR_UP(&lots_o_noops_around_write, PAGE_SIZE); 1463baa489faSSeongJae Park dprintf3("&lots_o_noops: %p\n", &lots_o_noops_around_write); 1464baa489faSSeongJae Park /* lots_o_noops_around_write should be page-aligned already */ 1465baa489faSSeongJae Park assert(p1 == &lots_o_noops_around_write); 1466baa489faSSeongJae Park 1467baa489faSSeongJae Park /* Point 'p1' at the *second* page of the function: */ 1468baa489faSSeongJae Park p1 += PAGE_SIZE; 1469baa489faSSeongJae Park 1470baa489faSSeongJae Park /* 1471baa489faSSeongJae Park * Try to ensure we fault this in on next touch to ensure 1472baa489faSSeongJae Park * we get an instruction fault as opposed to a data one 1473baa489faSSeongJae Park */ 1474baa489faSSeongJae Park madvise(p1, PAGE_SIZE, MADV_DONTNEED); 1475baa489faSSeongJae Park 1476baa489faSSeongJae Park return p1; 1477baa489faSSeongJae Park } 1478baa489faSSeongJae Park 1479baa489faSSeongJae Park void test_executing_on_unreadable_memory(int *ptr, u16 pkey) 1480baa489faSSeongJae Park { 1481baa489faSSeongJae Park void *p1; 1482baa489faSSeongJae Park int scratch; 1483baa489faSSeongJae Park int ptr_contents; 1484baa489faSSeongJae Park int ret; 1485baa489faSSeongJae Park 1486baa489faSSeongJae Park p1 = get_pointer_to_instructions(); 1487baa489faSSeongJae Park lots_o_noops_around_write(&scratch); 1488baa489faSSeongJae Park ptr_contents = read_ptr(p1); 1489baa489faSSeongJae Park dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); 1490baa489faSSeongJae Park 1491baa489faSSeongJae Park ret = mprotect_pkey(p1, PAGE_SIZE, PROT_EXEC, (u64)pkey); 1492baa489faSSeongJae Park pkey_assert(!ret); 1493baa489faSSeongJae Park pkey_access_deny(pkey); 1494baa489faSSeongJae Park 1495baa489faSSeongJae Park dprintf2("pkey_reg: %016llx\n", read_pkey_reg()); 1496baa489faSSeongJae Park 1497baa489faSSeongJae Park /* 1498baa489faSSeongJae Park * Make sure this is an *instruction* fault 1499baa489faSSeongJae Park */ 1500baa489faSSeongJae Park madvise(p1, PAGE_SIZE, MADV_DONTNEED); 1501baa489faSSeongJae Park lots_o_noops_around_write(&scratch); 1502baa489faSSeongJae Park do_not_expect_pkey_fault("executing on PROT_EXEC memory"); 1503baa489faSSeongJae Park expect_fault_on_read_execonly_key(p1, pkey); 1504baa489faSSeongJae Park } 1505baa489faSSeongJae Park 1506baa489faSSeongJae Park void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey) 1507baa489faSSeongJae Park { 1508baa489faSSeongJae Park void *p1; 1509baa489faSSeongJae Park int scratch; 1510baa489faSSeongJae Park int ptr_contents; 1511baa489faSSeongJae Park int ret; 1512baa489faSSeongJae Park 1513baa489faSSeongJae Park dprintf1("%s() start\n", __func__); 1514baa489faSSeongJae Park 1515baa489faSSeongJae Park p1 = get_pointer_to_instructions(); 1516baa489faSSeongJae Park lots_o_noops_around_write(&scratch); 1517baa489faSSeongJae Park ptr_contents = read_ptr(p1); 1518baa489faSSeongJae Park dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); 1519baa489faSSeongJae Park 1520baa489faSSeongJae Park /* Use a *normal* mprotect(), not mprotect_pkey(): */ 1521baa489faSSeongJae Park ret = mprotect(p1, PAGE_SIZE, PROT_EXEC); 1522baa489faSSeongJae Park pkey_assert(!ret); 1523baa489faSSeongJae Park 1524baa489faSSeongJae Park /* 1525baa489faSSeongJae Park * Reset the shadow, assuming that the above mprotect() 1526baa489faSSeongJae Park * correctly changed PKRU, but to an unknown value since 1527baa489faSSeongJae Park * the actual allocated pkey is unknown. 1528baa489faSSeongJae Park */ 1529baa489faSSeongJae Park shadow_pkey_reg = __read_pkey_reg(); 1530baa489faSSeongJae Park 1531baa489faSSeongJae Park dprintf2("pkey_reg: %016llx\n", read_pkey_reg()); 1532baa489faSSeongJae Park 1533baa489faSSeongJae Park /* Make sure this is an *instruction* fault */ 1534baa489faSSeongJae Park madvise(p1, PAGE_SIZE, MADV_DONTNEED); 1535baa489faSSeongJae Park lots_o_noops_around_write(&scratch); 1536baa489faSSeongJae Park do_not_expect_pkey_fault("executing on PROT_EXEC memory"); 1537baa489faSSeongJae Park expect_fault_on_read_execonly_key(p1, UNKNOWN_PKEY); 1538baa489faSSeongJae Park 1539baa489faSSeongJae Park /* 1540baa489faSSeongJae Park * Put the memory back to non-PROT_EXEC. Should clear the 1541baa489faSSeongJae Park * exec-only pkey off the VMA and allow it to be readable 1542baa489faSSeongJae Park * again. Go to PROT_NONE first to check for a kernel bug 1543baa489faSSeongJae Park * that did not clear the pkey when doing PROT_NONE. 1544baa489faSSeongJae Park */ 1545baa489faSSeongJae Park ret = mprotect(p1, PAGE_SIZE, PROT_NONE); 1546baa489faSSeongJae Park pkey_assert(!ret); 1547baa489faSSeongJae Park 1548baa489faSSeongJae Park ret = mprotect(p1, PAGE_SIZE, PROT_READ|PROT_EXEC); 1549baa489faSSeongJae Park pkey_assert(!ret); 1550baa489faSSeongJae Park ptr_contents = read_ptr(p1); 1551baa489faSSeongJae Park do_not_expect_pkey_fault("plain read on recently PROT_EXEC area"); 1552baa489faSSeongJae Park } 1553baa489faSSeongJae Park 1554baa489faSSeongJae Park #if defined(__i386__) || defined(__x86_64__) 1555baa489faSSeongJae Park void test_ptrace_modifies_pkru(int *ptr, u16 pkey) 1556baa489faSSeongJae Park { 1557baa489faSSeongJae Park u32 new_pkru; 1558baa489faSSeongJae Park pid_t child; 1559baa489faSSeongJae Park int status, ret; 1560baa489faSSeongJae Park int pkey_offset = pkey_reg_xstate_offset(); 1561baa489faSSeongJae Park size_t xsave_size = cpu_max_xsave_size(); 1562baa489faSSeongJae Park void *xsave; 1563baa489faSSeongJae Park u32 *pkey_register; 1564baa489faSSeongJae Park u64 *xstate_bv; 1565baa489faSSeongJae Park struct iovec iov; 1566baa489faSSeongJae Park 1567baa489faSSeongJae Park new_pkru = ~read_pkey_reg(); 1568baa489faSSeongJae Park /* Don't make PROT_EXEC mappings inaccessible */ 1569baa489faSSeongJae Park new_pkru &= ~3; 1570baa489faSSeongJae Park 1571baa489faSSeongJae Park child = fork(); 1572baa489faSSeongJae Park pkey_assert(child >= 0); 1573baa489faSSeongJae Park dprintf3("[%d] fork() ret: %d\n", getpid(), child); 1574baa489faSSeongJae Park if (!child) { 1575baa489faSSeongJae Park ptrace(PTRACE_TRACEME, 0, 0, 0); 1576baa489faSSeongJae Park /* Stop and allow the tracer to modify PKRU directly */ 1577baa489faSSeongJae Park raise(SIGSTOP); 1578baa489faSSeongJae Park 1579baa489faSSeongJae Park /* 1580baa489faSSeongJae Park * need __read_pkey_reg() version so we do not do shadow_pkey_reg 1581baa489faSSeongJae Park * checking 1582baa489faSSeongJae Park */ 1583baa489faSSeongJae Park if (__read_pkey_reg() != new_pkru) 1584baa489faSSeongJae Park exit(1); 1585baa489faSSeongJae Park 1586baa489faSSeongJae Park /* Stop and allow the tracer to clear XSTATE_BV for PKRU */ 1587baa489faSSeongJae Park raise(SIGSTOP); 1588baa489faSSeongJae Park 1589baa489faSSeongJae Park if (__read_pkey_reg() != 0) 1590baa489faSSeongJae Park exit(1); 1591baa489faSSeongJae Park 1592baa489faSSeongJae Park /* Stop and allow the tracer to examine PKRU */ 1593baa489faSSeongJae Park raise(SIGSTOP); 1594baa489faSSeongJae Park 1595baa489faSSeongJae Park exit(0); 1596baa489faSSeongJae Park } 1597baa489faSSeongJae Park 1598baa489faSSeongJae Park pkey_assert(child == waitpid(child, &status, 0)); 1599baa489faSSeongJae Park dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status); 1600baa489faSSeongJae Park pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP); 1601baa489faSSeongJae Park 1602baa489faSSeongJae Park xsave = (void *)malloc(xsave_size); 1603baa489faSSeongJae Park pkey_assert(xsave > 0); 1604baa489faSSeongJae Park 1605baa489faSSeongJae Park /* Modify the PKRU register directly */ 1606baa489faSSeongJae Park iov.iov_base = xsave; 1607baa489faSSeongJae Park iov.iov_len = xsave_size; 1608baa489faSSeongJae Park ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov); 1609baa489faSSeongJae Park pkey_assert(ret == 0); 1610baa489faSSeongJae Park 1611baa489faSSeongJae Park pkey_register = (u32 *)(xsave + pkey_offset); 1612baa489faSSeongJae Park pkey_assert(*pkey_register == read_pkey_reg()); 1613baa489faSSeongJae Park 1614baa489faSSeongJae Park *pkey_register = new_pkru; 1615baa489faSSeongJae Park 1616baa489faSSeongJae Park ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_X86_XSTATE, &iov); 1617baa489faSSeongJae Park pkey_assert(ret == 0); 1618baa489faSSeongJae Park 1619baa489faSSeongJae Park /* Test that the modification is visible in ptrace before any execution */ 1620baa489faSSeongJae Park memset(xsave, 0xCC, xsave_size); 1621baa489faSSeongJae Park ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov); 1622baa489faSSeongJae Park pkey_assert(ret == 0); 1623baa489faSSeongJae Park pkey_assert(*pkey_register == new_pkru); 1624baa489faSSeongJae Park 1625baa489faSSeongJae Park /* Execute the tracee */ 1626baa489faSSeongJae Park ret = ptrace(PTRACE_CONT, child, 0, 0); 1627baa489faSSeongJae Park pkey_assert(ret == 0); 1628baa489faSSeongJae Park 1629baa489faSSeongJae Park /* Test that the tracee saw the PKRU value change */ 1630baa489faSSeongJae Park pkey_assert(child == waitpid(child, &status, 0)); 1631baa489faSSeongJae Park dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status); 1632baa489faSSeongJae Park pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP); 1633baa489faSSeongJae Park 1634baa489faSSeongJae Park /* Test that the modification is visible in ptrace after execution */ 1635baa489faSSeongJae Park memset(xsave, 0xCC, xsave_size); 1636baa489faSSeongJae Park ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov); 1637baa489faSSeongJae Park pkey_assert(ret == 0); 1638baa489faSSeongJae Park pkey_assert(*pkey_register == new_pkru); 1639baa489faSSeongJae Park 1640baa489faSSeongJae Park /* Clear the PKRU bit from XSTATE_BV */ 1641baa489faSSeongJae Park xstate_bv = (u64 *)(xsave + 512); 1642baa489faSSeongJae Park *xstate_bv &= ~(1 << 9); 1643baa489faSSeongJae Park 1644baa489faSSeongJae Park ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_X86_XSTATE, &iov); 1645baa489faSSeongJae Park pkey_assert(ret == 0); 1646baa489faSSeongJae Park 1647baa489faSSeongJae Park /* Test that the modification is visible in ptrace before any execution */ 1648baa489faSSeongJae Park memset(xsave, 0xCC, xsave_size); 1649baa489faSSeongJae Park ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov); 1650baa489faSSeongJae Park pkey_assert(ret == 0); 1651baa489faSSeongJae Park pkey_assert(*pkey_register == 0); 1652baa489faSSeongJae Park 1653baa489faSSeongJae Park ret = ptrace(PTRACE_CONT, child, 0, 0); 1654baa489faSSeongJae Park pkey_assert(ret == 0); 1655baa489faSSeongJae Park 1656baa489faSSeongJae Park /* Test that the tracee saw the PKRU value go to 0 */ 1657baa489faSSeongJae Park pkey_assert(child == waitpid(child, &status, 0)); 1658baa489faSSeongJae Park dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status); 1659baa489faSSeongJae Park pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP); 1660baa489faSSeongJae Park 1661baa489faSSeongJae Park /* Test that the modification is visible in ptrace after execution */ 1662baa489faSSeongJae Park memset(xsave, 0xCC, xsave_size); 1663baa489faSSeongJae Park ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov); 1664baa489faSSeongJae Park pkey_assert(ret == 0); 1665baa489faSSeongJae Park pkey_assert(*pkey_register == 0); 1666baa489faSSeongJae Park 1667baa489faSSeongJae Park ret = ptrace(PTRACE_CONT, child, 0, 0); 1668baa489faSSeongJae Park pkey_assert(ret == 0); 1669baa489faSSeongJae Park pkey_assert(child == waitpid(child, &status, 0)); 1670baa489faSSeongJae Park dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status); 1671baa489faSSeongJae Park pkey_assert(WIFEXITED(status)); 1672baa489faSSeongJae Park pkey_assert(WEXITSTATUS(status) == 0); 1673baa489faSSeongJae Park free(xsave); 1674baa489faSSeongJae Park } 1675baa489faSSeongJae Park #endif 1676baa489faSSeongJae Park 1677baa489faSSeongJae Park void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) 1678baa489faSSeongJae Park { 1679baa489faSSeongJae Park int size = PAGE_SIZE; 1680baa489faSSeongJae Park int sret; 1681baa489faSSeongJae Park 1682baa489faSSeongJae Park if (cpu_has_pkeys()) { 1683baa489faSSeongJae Park dprintf1("SKIP: %s: no CPU support\n", __func__); 1684baa489faSSeongJae Park return; 1685baa489faSSeongJae Park } 1686baa489faSSeongJae Park 1687baa489faSSeongJae Park sret = syscall(SYS_mprotect_key, ptr, size, PROT_READ, pkey); 1688baa489faSSeongJae Park pkey_assert(sret < 0); 1689baa489faSSeongJae Park } 1690baa489faSSeongJae Park 1691baa489faSSeongJae Park void (*pkey_tests[])(int *ptr, u16 pkey) = { 1692baa489faSSeongJae Park test_read_of_write_disabled_region, 1693baa489faSSeongJae Park test_read_of_access_disabled_region, 1694baa489faSSeongJae Park test_read_of_access_disabled_region_with_page_already_mapped, 1695baa489faSSeongJae Park test_write_of_write_disabled_region, 1696baa489faSSeongJae Park test_write_of_write_disabled_region_with_page_already_mapped, 1697baa489faSSeongJae Park test_write_of_access_disabled_region, 1698baa489faSSeongJae Park test_write_of_access_disabled_region_with_page_already_mapped, 1699baa489faSSeongJae Park test_kernel_write_of_access_disabled_region, 1700baa489faSSeongJae Park test_kernel_write_of_write_disabled_region, 1701baa489faSSeongJae Park test_kernel_gup_of_access_disabled_region, 1702baa489faSSeongJae Park test_kernel_gup_write_to_write_disabled_region, 1703baa489faSSeongJae Park test_executing_on_unreadable_memory, 1704baa489faSSeongJae Park test_implicit_mprotect_exec_only_memory, 1705baa489faSSeongJae Park test_mprotect_with_pkey_0, 1706baa489faSSeongJae Park test_ptrace_of_child, 1707baa489faSSeongJae Park test_pkey_init_state, 1708baa489faSSeongJae Park test_pkey_syscalls_on_non_allocated_pkey, 1709baa489faSSeongJae Park test_pkey_syscalls_bad_args, 1710baa489faSSeongJae Park test_pkey_alloc_exhaust, 1711baa489faSSeongJae Park test_pkey_alloc_free_attach_pkey0, 1712baa489faSSeongJae Park #if defined(__i386__) || defined(__x86_64__) 1713baa489faSSeongJae Park test_ptrace_modifies_pkru, 1714baa489faSSeongJae Park #endif 1715baa489faSSeongJae Park }; 1716baa489faSSeongJae Park 1717baa489faSSeongJae Park void run_tests_once(void) 1718baa489faSSeongJae Park { 1719baa489faSSeongJae Park int *ptr; 1720baa489faSSeongJae Park int prot = PROT_READ|PROT_WRITE; 1721baa489faSSeongJae Park 1722baa489faSSeongJae Park for (test_nr = 0; test_nr < ARRAY_SIZE(pkey_tests); test_nr++) { 1723baa489faSSeongJae Park int pkey; 1724baa489faSSeongJae Park int orig_pkey_faults = pkey_faults; 1725baa489faSSeongJae Park 1726baa489faSSeongJae Park dprintf1("======================\n"); 1727baa489faSSeongJae Park dprintf1("test %d preparing...\n", test_nr); 1728baa489faSSeongJae Park 1729baa489faSSeongJae Park tracing_on(); 1730baa489faSSeongJae Park pkey = alloc_random_pkey(); 1731baa489faSSeongJae Park dprintf1("test %d starting with pkey: %d\n", test_nr, pkey); 1732baa489faSSeongJae Park ptr = malloc_pkey(PAGE_SIZE, prot, pkey); 1733baa489faSSeongJae Park dprintf1("test %d starting...\n", test_nr); 1734baa489faSSeongJae Park pkey_tests[test_nr](ptr, pkey); 1735baa489faSSeongJae Park dprintf1("freeing test memory: %p\n", ptr); 1736baa489faSSeongJae Park free_pkey_malloc(ptr); 1737baa489faSSeongJae Park sys_pkey_free(pkey); 1738baa489faSSeongJae Park 1739baa489faSSeongJae Park dprintf1("pkey_faults: %d\n", pkey_faults); 1740baa489faSSeongJae Park dprintf1("orig_pkey_faults: %d\n", orig_pkey_faults); 1741baa489faSSeongJae Park 1742baa489faSSeongJae Park tracing_off(); 1743baa489faSSeongJae Park close_test_fds(); 1744baa489faSSeongJae Park 1745baa489faSSeongJae Park printf("test %2d PASSED (iteration %d)\n", test_nr, iteration_nr); 1746baa489faSSeongJae Park dprintf1("======================\n\n"); 1747baa489faSSeongJae Park } 1748baa489faSSeongJae Park iteration_nr++; 1749baa489faSSeongJae Park } 1750baa489faSSeongJae Park 1751baa489faSSeongJae Park void pkey_setup_shadow(void) 1752baa489faSSeongJae Park { 1753baa489faSSeongJae Park shadow_pkey_reg = __read_pkey_reg(); 1754baa489faSSeongJae Park } 1755baa489faSSeongJae Park 1756baa489faSSeongJae Park int main(void) 1757baa489faSSeongJae Park { 1758baa489faSSeongJae Park int nr_iterations = 22; 1759baa489faSSeongJae Park int pkeys_supported = is_pkeys_supported(); 1760baa489faSSeongJae Park 1761baa489faSSeongJae Park srand((unsigned int)time(NULL)); 1762baa489faSSeongJae Park 1763baa489faSSeongJae Park setup_handlers(); 1764baa489faSSeongJae Park 1765baa489faSSeongJae Park printf("has pkeys: %d\n", pkeys_supported); 1766baa489faSSeongJae Park 1767baa489faSSeongJae Park if (!pkeys_supported) { 1768baa489faSSeongJae Park int size = PAGE_SIZE; 1769baa489faSSeongJae Park int *ptr; 1770baa489faSSeongJae Park 1771baa489faSSeongJae Park printf("running PKEY tests for unsupported CPU/OS\n"); 1772baa489faSSeongJae Park 1773baa489faSSeongJae Park ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); 1774baa489faSSeongJae Park assert(ptr != (void *)-1); 1775baa489faSSeongJae Park test_mprotect_pkey_on_unsupported_cpu(ptr, 1); 1776baa489faSSeongJae Park exit(0); 1777baa489faSSeongJae Park } 1778baa489faSSeongJae Park 1779baa489faSSeongJae Park pkey_setup_shadow(); 1780baa489faSSeongJae Park printf("startup pkey_reg: %016llx\n", read_pkey_reg()); 1781baa489faSSeongJae Park setup_hugetlbfs(); 1782baa489faSSeongJae Park 1783baa489faSSeongJae Park while (nr_iterations-- > 0) 1784baa489faSSeongJae Park run_tests_once(); 1785baa489faSSeongJae Park 1786baa489faSSeongJae Park printf("done (all tests OK)\n"); 1787baa489faSSeongJae Park return 0; 1788baa489faSSeongJae Park } 1789