1 /* 2 * os-posix-lib.c 3 * 4 * Copyright (c) 2003-2008 Fabrice Bellard 5 * Copyright (c) 2010 Red Hat, Inc. 6 * 7 * QEMU library functions on POSIX which are shared between QEMU and 8 * the QEMU tools. 9 * 10 * Permission is hereby granted, free of charge, to any person obtaining a copy 11 * of this software and associated documentation files (the "Software"), to deal 12 * in the Software without restriction, including without limitation the rights 13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 14 * copies of the Software, and to permit persons to whom the Software is 15 * furnished to do so, subject to the following conditions: 16 * 17 * The above copyright notice and this permission notice shall be included in 18 * all copies or substantial portions of the Software. 19 * 20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 26 * THE SOFTWARE. 27 */ 28 29 #if defined(__linux__) && \ 30 (defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)) 31 /* Use 2 MiB alignment so transparent hugepages can be used by KVM. 32 Valgrind does not support alignments larger than 1 MiB, 33 therefore we need special code which handles running on Valgrind. */ 34 # define QEMU_VMALLOC_ALIGN (512 * 4096) 35 #elif defined(__linux__) && defined(__s390x__) 36 /* Use 1 MiB (segment size) alignment so gmap can be used by KVM. */ 37 # define QEMU_VMALLOC_ALIGN (256 * 4096) 38 #else 39 # define QEMU_VMALLOC_ALIGN getpagesize() 40 #endif 41 42 #include "qemu/osdep.h" 43 #include <termios.h> 44 #include <termios.h> 45 46 #include <glib/gprintf.h> 47 48 #include "sysemu/sysemu.h" 49 #include "trace.h" 50 #include "qapi/error.h" 51 #include "qemu/sockets.h" 52 #include <sys/mman.h> 53 #include <libgen.h> 54 #include <sys/signal.h> 55 #include "qemu/cutils.h" 56 57 #ifdef CONFIG_LINUX 58 #include <sys/syscall.h> 59 #endif 60 61 #ifdef __FreeBSD__ 62 #include <sys/sysctl.h> 63 #endif 64 65 #include <qemu/mmap-alloc.h> 66 67 int qemu_get_thread_id(void) 68 { 69 #if defined(__linux__) 70 return syscall(SYS_gettid); 71 #else 72 return getpid(); 73 #endif 74 } 75 76 int qemu_daemon(int nochdir, int noclose) 77 { 78 return daemon(nochdir, noclose); 79 } 80 81 void *qemu_oom_check(void *ptr) 82 { 83 if (ptr == NULL) { 84 fprintf(stderr, "Failed to allocate memory: %s\n", strerror(errno)); 85 abort(); 86 } 87 return ptr; 88 } 89 90 void *qemu_try_memalign(size_t alignment, size_t size) 91 { 92 void *ptr; 93 94 if (alignment < sizeof(void*)) { 95 alignment = sizeof(void*); 96 } 97 98 #if defined(_POSIX_C_SOURCE) && !defined(__sun__) 99 int ret; 100 ret = posix_memalign(&ptr, alignment, size); 101 if (ret != 0) { 102 errno = ret; 103 ptr = NULL; 104 } 105 #elif defined(CONFIG_BSD) 106 ptr = valloc(size); 107 #else 108 ptr = memalign(alignment, size); 109 #endif 110 trace_qemu_memalign(alignment, size, ptr); 111 return ptr; 112 } 113 114 void *qemu_memalign(size_t alignment, size_t size) 115 { 116 return qemu_oom_check(qemu_try_memalign(alignment, size)); 117 } 118 119 /* alloc shared memory pages */ 120 void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment) 121 { 122 size_t align = QEMU_VMALLOC_ALIGN; 123 void *ptr = qemu_ram_mmap(-1, size, align, false); 124 125 if (ptr == MAP_FAILED) { 126 return NULL; 127 } 128 129 if (alignment) { 130 *alignment = align; 131 } 132 133 trace_qemu_anon_ram_alloc(size, ptr); 134 return ptr; 135 } 136 137 void qemu_vfree(void *ptr) 138 { 139 trace_qemu_vfree(ptr); 140 free(ptr); 141 } 142 143 void qemu_anon_ram_free(void *ptr, size_t size) 144 { 145 trace_qemu_anon_ram_free(ptr, size); 146 qemu_ram_munmap(ptr, size); 147 } 148 149 void qemu_set_block(int fd) 150 { 151 int f; 152 f = fcntl(fd, F_GETFL); 153 fcntl(fd, F_SETFL, f & ~O_NONBLOCK); 154 } 155 156 void qemu_set_nonblock(int fd) 157 { 158 int f; 159 f = fcntl(fd, F_GETFL); 160 fcntl(fd, F_SETFL, f | O_NONBLOCK); 161 } 162 163 int socket_set_fast_reuse(int fd) 164 { 165 int val = 1, ret; 166 167 ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, 168 (const char *)&val, sizeof(val)); 169 170 assert(ret == 0); 171 172 return ret; 173 } 174 175 void qemu_set_cloexec(int fd) 176 { 177 int f; 178 f = fcntl(fd, F_GETFD); 179 fcntl(fd, F_SETFD, f | FD_CLOEXEC); 180 } 181 182 /* 183 * Creates a pipe with FD_CLOEXEC set on both file descriptors 184 */ 185 int qemu_pipe(int pipefd[2]) 186 { 187 int ret; 188 189 #ifdef CONFIG_PIPE2 190 ret = pipe2(pipefd, O_CLOEXEC); 191 if (ret != -1 || errno != ENOSYS) { 192 return ret; 193 } 194 #endif 195 ret = pipe(pipefd); 196 if (ret == 0) { 197 qemu_set_cloexec(pipefd[0]); 198 qemu_set_cloexec(pipefd[1]); 199 } 200 201 return ret; 202 } 203 204 int qemu_utimens(const char *path, const struct timespec *times) 205 { 206 struct timeval tv[2], tv_now; 207 struct stat st; 208 int i; 209 #ifdef CONFIG_UTIMENSAT 210 int ret; 211 212 ret = utimensat(AT_FDCWD, path, times, AT_SYMLINK_NOFOLLOW); 213 if (ret != -1 || errno != ENOSYS) { 214 return ret; 215 } 216 #endif 217 /* Fallback: use utimes() instead of utimensat() */ 218 219 /* happy if special cases */ 220 if (times[0].tv_nsec == UTIME_OMIT && times[1].tv_nsec == UTIME_OMIT) { 221 return 0; 222 } 223 if (times[0].tv_nsec == UTIME_NOW && times[1].tv_nsec == UTIME_NOW) { 224 return utimes(path, NULL); 225 } 226 227 /* prepare for hard cases */ 228 if (times[0].tv_nsec == UTIME_NOW || times[1].tv_nsec == UTIME_NOW) { 229 gettimeofday(&tv_now, NULL); 230 } 231 if (times[0].tv_nsec == UTIME_OMIT || times[1].tv_nsec == UTIME_OMIT) { 232 stat(path, &st); 233 } 234 235 for (i = 0; i < 2; i++) { 236 if (times[i].tv_nsec == UTIME_NOW) { 237 tv[i].tv_sec = tv_now.tv_sec; 238 tv[i].tv_usec = tv_now.tv_usec; 239 } else if (times[i].tv_nsec == UTIME_OMIT) { 240 tv[i].tv_sec = (i == 0) ? st.st_atime : st.st_mtime; 241 tv[i].tv_usec = 0; 242 } else { 243 tv[i].tv_sec = times[i].tv_sec; 244 tv[i].tv_usec = times[i].tv_nsec / 1000; 245 } 246 } 247 248 return utimes(path, &tv[0]); 249 } 250 251 char * 252 qemu_get_local_state_pathname(const char *relative_pathname) 253 { 254 return g_strdup_printf("%s/%s", CONFIG_QEMU_LOCALSTATEDIR, 255 relative_pathname); 256 } 257 258 void qemu_set_tty_echo(int fd, bool echo) 259 { 260 struct termios tty; 261 262 tcgetattr(fd, &tty); 263 264 if (echo) { 265 tty.c_lflag |= ECHO | ECHONL | ICANON | IEXTEN; 266 } else { 267 tty.c_lflag &= ~(ECHO | ECHONL | ICANON | IEXTEN); 268 } 269 270 tcsetattr(fd, TCSANOW, &tty); 271 } 272 273 static char exec_dir[PATH_MAX]; 274 275 void qemu_init_exec_dir(const char *argv0) 276 { 277 char *dir; 278 char *p = NULL; 279 char buf[PATH_MAX]; 280 281 assert(!exec_dir[0]); 282 283 #if defined(__linux__) 284 { 285 int len; 286 len = readlink("/proc/self/exe", buf, sizeof(buf) - 1); 287 if (len > 0) { 288 buf[len] = 0; 289 p = buf; 290 } 291 } 292 #elif defined(__FreeBSD__) 293 { 294 static int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1}; 295 size_t len = sizeof(buf) - 1; 296 297 *buf = '\0'; 298 if (!sysctl(mib, ARRAY_SIZE(mib), buf, &len, NULL, 0) && 299 *buf) { 300 buf[sizeof(buf) - 1] = '\0'; 301 p = buf; 302 } 303 } 304 #endif 305 /* If we don't have any way of figuring out the actual executable 306 location then try argv[0]. */ 307 if (!p) { 308 if (!argv0) { 309 return; 310 } 311 p = realpath(argv0, buf); 312 if (!p) { 313 return; 314 } 315 } 316 dir = dirname(p); 317 318 pstrcpy(exec_dir, sizeof(exec_dir), dir); 319 } 320 321 char *qemu_get_exec_dir(void) 322 { 323 return g_strdup(exec_dir); 324 } 325 326 static sigjmp_buf sigjump; 327 328 static void sigbus_handler(int signal) 329 { 330 siglongjmp(sigjump, 1); 331 } 332 333 void os_mem_prealloc(int fd, char *area, size_t memory) 334 { 335 int ret; 336 struct sigaction act, oldact; 337 sigset_t set, oldset; 338 339 memset(&act, 0, sizeof(act)); 340 act.sa_handler = &sigbus_handler; 341 act.sa_flags = 0; 342 343 ret = sigaction(SIGBUS, &act, &oldact); 344 if (ret) { 345 perror("os_mem_prealloc: failed to install signal handler"); 346 exit(1); 347 } 348 349 /* unblock SIGBUS */ 350 sigemptyset(&set); 351 sigaddset(&set, SIGBUS); 352 pthread_sigmask(SIG_UNBLOCK, &set, &oldset); 353 354 if (sigsetjmp(sigjump, 1)) { 355 fprintf(stderr, "os_mem_prealloc: Insufficient free host memory " 356 "pages available to allocate guest RAM\n"); 357 exit(1); 358 } else { 359 int i; 360 size_t hpagesize = qemu_fd_getpagesize(fd); 361 size_t numpages = DIV_ROUND_UP(memory, hpagesize); 362 363 /* MAP_POPULATE silently ignores failures */ 364 for (i = 0; i < numpages; i++) { 365 memset(area + (hpagesize * i), 0, 1); 366 } 367 368 ret = sigaction(SIGBUS, &oldact, NULL); 369 if (ret) { 370 perror("os_mem_prealloc: failed to reinstall signal handler"); 371 exit(1); 372 } 373 374 pthread_sigmask(SIG_SETMASK, &oldset, NULL); 375 } 376 } 377 378 379 static struct termios oldtty; 380 381 static void term_exit(void) 382 { 383 tcsetattr(0, TCSANOW, &oldtty); 384 } 385 386 static void term_init(void) 387 { 388 struct termios tty; 389 390 tcgetattr(0, &tty); 391 oldtty = tty; 392 393 tty.c_iflag &= ~(IGNBRK|BRKINT|PARMRK|ISTRIP 394 |INLCR|IGNCR|ICRNL|IXON); 395 tty.c_oflag |= OPOST; 396 tty.c_lflag &= ~(ECHO|ECHONL|ICANON|IEXTEN); 397 tty.c_cflag &= ~(CSIZE|PARENB); 398 tty.c_cflag |= CS8; 399 tty.c_cc[VMIN] = 1; 400 tty.c_cc[VTIME] = 0; 401 402 tcsetattr(0, TCSANOW, &tty); 403 404 atexit(term_exit); 405 } 406 407 int qemu_read_password(char *buf, int buf_size) 408 { 409 uint8_t ch; 410 int i, ret; 411 412 printf("password: "); 413 fflush(stdout); 414 term_init(); 415 i = 0; 416 for (;;) { 417 ret = read(0, &ch, 1); 418 if (ret == -1) { 419 if (errno == EAGAIN || errno == EINTR) { 420 continue; 421 } else { 422 break; 423 } 424 } else if (ret == 0) { 425 ret = -1; 426 break; 427 } else { 428 if (ch == '\r' || 429 ch == '\n') { 430 ret = 0; 431 break; 432 } 433 if (i < (buf_size - 1)) { 434 buf[i++] = ch; 435 } 436 } 437 } 438 term_exit(); 439 buf[i] = '\0'; 440 printf("\n"); 441 return ret; 442 } 443 444 445 pid_t qemu_fork(Error **errp) 446 { 447 sigset_t oldmask, newmask; 448 struct sigaction sig_action; 449 int saved_errno; 450 pid_t pid; 451 452 /* 453 * Need to block signals now, so that child process can safely 454 * kill off caller's signal handlers without a race. 455 */ 456 sigfillset(&newmask); 457 if (pthread_sigmask(SIG_SETMASK, &newmask, &oldmask) != 0) { 458 error_setg_errno(errp, errno, 459 "cannot block signals"); 460 return -1; 461 } 462 463 pid = fork(); 464 saved_errno = errno; 465 466 if (pid < 0) { 467 /* attempt to restore signal mask, but ignore failure, to 468 * avoid obscuring the fork failure */ 469 (void)pthread_sigmask(SIG_SETMASK, &oldmask, NULL); 470 error_setg_errno(errp, saved_errno, 471 "cannot fork child process"); 472 errno = saved_errno; 473 return -1; 474 } else if (pid) { 475 /* parent process */ 476 477 /* Restore our original signal mask now that the child is 478 * safely running. Only documented failures are EFAULT (not 479 * possible, since we are using just-grabbed mask) or EINVAL 480 * (not possible, since we are using correct arguments). */ 481 (void)pthread_sigmask(SIG_SETMASK, &oldmask, NULL); 482 } else { 483 /* child process */ 484 size_t i; 485 486 /* Clear out all signal handlers from parent so nothing 487 * unexpected can happen in our child once we unblock 488 * signals */ 489 sig_action.sa_handler = SIG_DFL; 490 sig_action.sa_flags = 0; 491 sigemptyset(&sig_action.sa_mask); 492 493 for (i = 1; i < NSIG; i++) { 494 /* Only possible errors are EFAULT or EINVAL The former 495 * won't happen, the latter we expect, so no need to check 496 * return value */ 497 (void)sigaction(i, &sig_action, NULL); 498 } 499 500 /* Unmask all signals in child, since we've no idea what the 501 * caller's done with their signal mask and don't want to 502 * propagate that to children */ 503 sigemptyset(&newmask); 504 if (pthread_sigmask(SIG_SETMASK, &newmask, NULL) != 0) { 505 Error *local_err = NULL; 506 error_setg_errno(&local_err, errno, 507 "cannot unblock signals"); 508 error_report_err(local_err); 509 _exit(1); 510 } 511 } 512 return pid; 513 } 514