1 /* 2 * os-posix-lib.c 3 * 4 * Copyright (c) 2003-2008 Fabrice Bellard 5 * Copyright (c) 2010 Red Hat, Inc. 6 * 7 * QEMU library functions on POSIX which are shared between QEMU and 8 * the QEMU tools. 9 * 10 * Permission is hereby granted, free of charge, to any person obtaining a copy 11 * of this software and associated documentation files (the "Software"), to deal 12 * in the Software without restriction, including without limitation the rights 13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 14 * copies of the Software, and to permit persons to whom the Software is 15 * furnished to do so, subject to the following conditions: 16 * 17 * The above copyright notice and this permission notice shall be included in 18 * all copies or substantial portions of the Software. 19 * 20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 26 * THE SOFTWARE. 27 */ 28 29 /* The following block of code temporarily renames the daemon() function so the 30 compiler does not see the warning associated with it in stdlib.h on OSX */ 31 #ifdef __APPLE__ 32 #define daemon qemu_fake_daemon_function 33 #include <stdlib.h> 34 #undef daemon 35 extern int daemon(int, int); 36 #endif 37 38 #if defined(__linux__) && (defined(__x86_64__) || defined(__arm__)) 39 /* Use 2 MiB alignment so transparent hugepages can be used by KVM. 40 Valgrind does not support alignments larger than 1 MiB, 41 therefore we need special code which handles running on Valgrind. */ 42 # define QEMU_VMALLOC_ALIGN (512 * 4096) 43 #elif defined(__linux__) && defined(__s390x__) 44 /* Use 1 MiB (segment size) alignment so gmap can be used by KVM. */ 45 # define QEMU_VMALLOC_ALIGN (256 * 4096) 46 #else 47 # define QEMU_VMALLOC_ALIGN getpagesize() 48 #endif 49 50 #include <termios.h> 51 #include <unistd.h> 52 #include <termios.h> 53 54 #include <glib/gprintf.h> 55 56 #include "config-host.h" 57 #include "sysemu/sysemu.h" 58 #include "trace.h" 59 #include "qemu/sockets.h" 60 #include <sys/mman.h> 61 #include <libgen.h> 62 #include <setjmp.h> 63 #include <sys/signal.h> 64 65 #ifdef CONFIG_LINUX 66 #include <sys/syscall.h> 67 #endif 68 69 #ifdef __FreeBSD__ 70 #include <sys/sysctl.h> 71 #endif 72 73 #include <qemu/mmap-alloc.h> 74 75 int qemu_get_thread_id(void) 76 { 77 #if defined(__linux__) 78 return syscall(SYS_gettid); 79 #else 80 return getpid(); 81 #endif 82 } 83 84 int qemu_daemon(int nochdir, int noclose) 85 { 86 return daemon(nochdir, noclose); 87 } 88 89 void *qemu_oom_check(void *ptr) 90 { 91 if (ptr == NULL) { 92 fprintf(stderr, "Failed to allocate memory: %s\n", strerror(errno)); 93 abort(); 94 } 95 return ptr; 96 } 97 98 void *qemu_try_memalign(size_t alignment, size_t size) 99 { 100 void *ptr; 101 102 if (alignment < sizeof(void*)) { 103 alignment = sizeof(void*); 104 } 105 106 #if defined(_POSIX_C_SOURCE) && !defined(__sun__) 107 int ret; 108 ret = posix_memalign(&ptr, alignment, size); 109 if (ret != 0) { 110 errno = ret; 111 ptr = NULL; 112 } 113 #elif defined(CONFIG_BSD) 114 ptr = valloc(size); 115 #else 116 ptr = memalign(alignment, size); 117 #endif 118 trace_qemu_memalign(alignment, size, ptr); 119 return ptr; 120 } 121 122 void *qemu_memalign(size_t alignment, size_t size) 123 { 124 return qemu_oom_check(qemu_try_memalign(alignment, size)); 125 } 126 127 /* alloc shared memory pages */ 128 void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment) 129 { 130 size_t align = QEMU_VMALLOC_ALIGN; 131 void *ptr = qemu_ram_mmap(-1, size, align, false); 132 133 if (ptr == MAP_FAILED) { 134 return NULL; 135 } 136 137 if (alignment) { 138 *alignment = align; 139 } 140 141 trace_qemu_anon_ram_alloc(size, ptr); 142 return ptr; 143 } 144 145 void qemu_vfree(void *ptr) 146 { 147 trace_qemu_vfree(ptr); 148 free(ptr); 149 } 150 151 void qemu_anon_ram_free(void *ptr, size_t size) 152 { 153 trace_qemu_anon_ram_free(ptr, size); 154 qemu_ram_munmap(ptr, size); 155 } 156 157 void qemu_set_block(int fd) 158 { 159 int f; 160 f = fcntl(fd, F_GETFL); 161 fcntl(fd, F_SETFL, f & ~O_NONBLOCK); 162 } 163 164 void qemu_set_nonblock(int fd) 165 { 166 int f; 167 f = fcntl(fd, F_GETFL); 168 fcntl(fd, F_SETFL, f | O_NONBLOCK); 169 } 170 171 int socket_set_fast_reuse(int fd) 172 { 173 int val = 1, ret; 174 175 ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, 176 (const char *)&val, sizeof(val)); 177 178 assert(ret == 0); 179 180 return ret; 181 } 182 183 void qemu_set_cloexec(int fd) 184 { 185 int f; 186 f = fcntl(fd, F_GETFD); 187 fcntl(fd, F_SETFD, f | FD_CLOEXEC); 188 } 189 190 /* 191 * Creates a pipe with FD_CLOEXEC set on both file descriptors 192 */ 193 int qemu_pipe(int pipefd[2]) 194 { 195 int ret; 196 197 #ifdef CONFIG_PIPE2 198 ret = pipe2(pipefd, O_CLOEXEC); 199 if (ret != -1 || errno != ENOSYS) { 200 return ret; 201 } 202 #endif 203 ret = pipe(pipefd); 204 if (ret == 0) { 205 qemu_set_cloexec(pipefd[0]); 206 qemu_set_cloexec(pipefd[1]); 207 } 208 209 return ret; 210 } 211 212 int qemu_utimens(const char *path, const struct timespec *times) 213 { 214 struct timeval tv[2], tv_now; 215 struct stat st; 216 int i; 217 #ifdef CONFIG_UTIMENSAT 218 int ret; 219 220 ret = utimensat(AT_FDCWD, path, times, AT_SYMLINK_NOFOLLOW); 221 if (ret != -1 || errno != ENOSYS) { 222 return ret; 223 } 224 #endif 225 /* Fallback: use utimes() instead of utimensat() */ 226 227 /* happy if special cases */ 228 if (times[0].tv_nsec == UTIME_OMIT && times[1].tv_nsec == UTIME_OMIT) { 229 return 0; 230 } 231 if (times[0].tv_nsec == UTIME_NOW && times[1].tv_nsec == UTIME_NOW) { 232 return utimes(path, NULL); 233 } 234 235 /* prepare for hard cases */ 236 if (times[0].tv_nsec == UTIME_NOW || times[1].tv_nsec == UTIME_NOW) { 237 gettimeofday(&tv_now, NULL); 238 } 239 if (times[0].tv_nsec == UTIME_OMIT || times[1].tv_nsec == UTIME_OMIT) { 240 stat(path, &st); 241 } 242 243 for (i = 0; i < 2; i++) { 244 if (times[i].tv_nsec == UTIME_NOW) { 245 tv[i].tv_sec = tv_now.tv_sec; 246 tv[i].tv_usec = tv_now.tv_usec; 247 } else if (times[i].tv_nsec == UTIME_OMIT) { 248 tv[i].tv_sec = (i == 0) ? st.st_atime : st.st_mtime; 249 tv[i].tv_usec = 0; 250 } else { 251 tv[i].tv_sec = times[i].tv_sec; 252 tv[i].tv_usec = times[i].tv_nsec / 1000; 253 } 254 } 255 256 return utimes(path, &tv[0]); 257 } 258 259 char * 260 qemu_get_local_state_pathname(const char *relative_pathname) 261 { 262 return g_strdup_printf("%s/%s", CONFIG_QEMU_LOCALSTATEDIR, 263 relative_pathname); 264 } 265 266 void qemu_set_tty_echo(int fd, bool echo) 267 { 268 struct termios tty; 269 270 tcgetattr(fd, &tty); 271 272 if (echo) { 273 tty.c_lflag |= ECHO | ECHONL | ICANON | IEXTEN; 274 } else { 275 tty.c_lflag &= ~(ECHO | ECHONL | ICANON | IEXTEN); 276 } 277 278 tcsetattr(fd, TCSANOW, &tty); 279 } 280 281 static char exec_dir[PATH_MAX]; 282 283 void qemu_init_exec_dir(const char *argv0) 284 { 285 char *dir; 286 char *p = NULL; 287 char buf[PATH_MAX]; 288 289 assert(!exec_dir[0]); 290 291 #if defined(__linux__) 292 { 293 int len; 294 len = readlink("/proc/self/exe", buf, sizeof(buf) - 1); 295 if (len > 0) { 296 buf[len] = 0; 297 p = buf; 298 } 299 } 300 #elif defined(__FreeBSD__) 301 { 302 static int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1}; 303 size_t len = sizeof(buf) - 1; 304 305 *buf = '\0'; 306 if (!sysctl(mib, ARRAY_SIZE(mib), buf, &len, NULL, 0) && 307 *buf) { 308 buf[sizeof(buf) - 1] = '\0'; 309 p = buf; 310 } 311 } 312 #endif 313 /* If we don't have any way of figuring out the actual executable 314 location then try argv[0]. */ 315 if (!p) { 316 if (!argv0) { 317 return; 318 } 319 p = realpath(argv0, buf); 320 if (!p) { 321 return; 322 } 323 } 324 dir = dirname(p); 325 326 pstrcpy(exec_dir, sizeof(exec_dir), dir); 327 } 328 329 char *qemu_get_exec_dir(void) 330 { 331 return g_strdup(exec_dir); 332 } 333 334 static sigjmp_buf sigjump; 335 336 static void sigbus_handler(int signal) 337 { 338 siglongjmp(sigjump, 1); 339 } 340 341 void os_mem_prealloc(int fd, char *area, size_t memory) 342 { 343 int ret; 344 struct sigaction act, oldact; 345 sigset_t set, oldset; 346 347 memset(&act, 0, sizeof(act)); 348 act.sa_handler = &sigbus_handler; 349 act.sa_flags = 0; 350 351 ret = sigaction(SIGBUS, &act, &oldact); 352 if (ret) { 353 perror("os_mem_prealloc: failed to install signal handler"); 354 exit(1); 355 } 356 357 /* unblock SIGBUS */ 358 sigemptyset(&set); 359 sigaddset(&set, SIGBUS); 360 pthread_sigmask(SIG_UNBLOCK, &set, &oldset); 361 362 if (sigsetjmp(sigjump, 1)) { 363 fprintf(stderr, "os_mem_prealloc: Insufficient free host memory " 364 "pages available to allocate guest RAM\n"); 365 exit(1); 366 } else { 367 int i; 368 size_t hpagesize = qemu_fd_getpagesize(fd); 369 size_t numpages = DIV_ROUND_UP(memory, hpagesize); 370 371 /* MAP_POPULATE silently ignores failures */ 372 for (i = 0; i < numpages; i++) { 373 memset(area + (hpagesize * i), 0, 1); 374 } 375 376 ret = sigaction(SIGBUS, &oldact, NULL); 377 if (ret) { 378 perror("os_mem_prealloc: failed to reinstall signal handler"); 379 exit(1); 380 } 381 382 pthread_sigmask(SIG_SETMASK, &oldset, NULL); 383 } 384 } 385 386 387 static struct termios oldtty; 388 389 static void term_exit(void) 390 { 391 tcsetattr(0, TCSANOW, &oldtty); 392 } 393 394 static void term_init(void) 395 { 396 struct termios tty; 397 398 tcgetattr(0, &tty); 399 oldtty = tty; 400 401 tty.c_iflag &= ~(IGNBRK|BRKINT|PARMRK|ISTRIP 402 |INLCR|IGNCR|ICRNL|IXON); 403 tty.c_oflag |= OPOST; 404 tty.c_lflag &= ~(ECHO|ECHONL|ICANON|IEXTEN); 405 tty.c_cflag &= ~(CSIZE|PARENB); 406 tty.c_cflag |= CS8; 407 tty.c_cc[VMIN] = 1; 408 tty.c_cc[VTIME] = 0; 409 410 tcsetattr(0, TCSANOW, &tty); 411 412 atexit(term_exit); 413 } 414 415 int qemu_read_password(char *buf, int buf_size) 416 { 417 uint8_t ch; 418 int i, ret; 419 420 printf("password: "); 421 fflush(stdout); 422 term_init(); 423 i = 0; 424 for (;;) { 425 ret = read(0, &ch, 1); 426 if (ret == -1) { 427 if (errno == EAGAIN || errno == EINTR) { 428 continue; 429 } else { 430 break; 431 } 432 } else if (ret == 0) { 433 ret = -1; 434 break; 435 } else { 436 if (ch == '\r' || 437 ch == '\n') { 438 ret = 0; 439 break; 440 } 441 if (i < (buf_size - 1)) { 442 buf[i++] = ch; 443 } 444 } 445 } 446 term_exit(); 447 buf[i] = '\0'; 448 printf("\n"); 449 return ret; 450 } 451 452 453 pid_t qemu_fork(Error **errp) 454 { 455 sigset_t oldmask, newmask; 456 struct sigaction sig_action; 457 int saved_errno; 458 pid_t pid; 459 460 /* 461 * Need to block signals now, so that child process can safely 462 * kill off caller's signal handlers without a race. 463 */ 464 sigfillset(&newmask); 465 if (pthread_sigmask(SIG_SETMASK, &newmask, &oldmask) != 0) { 466 error_setg_errno(errp, errno, 467 "cannot block signals"); 468 return -1; 469 } 470 471 pid = fork(); 472 saved_errno = errno; 473 474 if (pid < 0) { 475 /* attempt to restore signal mask, but ignore failure, to 476 * avoid obscuring the fork failure */ 477 (void)pthread_sigmask(SIG_SETMASK, &oldmask, NULL); 478 error_setg_errno(errp, saved_errno, 479 "cannot fork child process"); 480 errno = saved_errno; 481 return -1; 482 } else if (pid) { 483 /* parent process */ 484 485 /* Restore our original signal mask now that the child is 486 * safely running. Only documented failures are EFAULT (not 487 * possible, since we are using just-grabbed mask) or EINVAL 488 * (not possible, since we are using correct arguments). */ 489 (void)pthread_sigmask(SIG_SETMASK, &oldmask, NULL); 490 } else { 491 /* child process */ 492 size_t i; 493 494 /* Clear out all signal handlers from parent so nothing 495 * unexpected can happen in our child once we unblock 496 * signals */ 497 sig_action.sa_handler = SIG_DFL; 498 sig_action.sa_flags = 0; 499 sigemptyset(&sig_action.sa_mask); 500 501 for (i = 1; i < NSIG; i++) { 502 /* Only possible errors are EFAULT or EINVAL The former 503 * won't happen, the latter we expect, so no need to check 504 * return value */ 505 (void)sigaction(i, &sig_action, NULL); 506 } 507 508 /* Unmask all signals in child, since we've no idea what the 509 * caller's done with their signal mask and don't want to 510 * propagate that to children */ 511 sigemptyset(&newmask); 512 if (pthread_sigmask(SIG_SETMASK, &newmask, NULL) != 0) { 513 Error *local_err = NULL; 514 error_setg_errno(&local_err, errno, 515 "cannot unblock signals"); 516 error_report_err(local_err); 517 _exit(1); 518 } 519 } 520 return pid; 521 } 522