1 /* 2 * os-posix-lib.c 3 * 4 * Copyright (c) 2003-2008 Fabrice Bellard 5 * Copyright (c) 2010 Red Hat, Inc. 6 * 7 * QEMU library functions on POSIX which are shared between QEMU and 8 * the QEMU tools. 9 * 10 * Permission is hereby granted, free of charge, to any person obtaining a copy 11 * of this software and associated documentation files (the "Software"), to deal 12 * in the Software without restriction, including without limitation the rights 13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 14 * copies of the Software, and to permit persons to whom the Software is 15 * furnished to do so, subject to the following conditions: 16 * 17 * The above copyright notice and this permission notice shall be included in 18 * all copies or substantial portions of the Software. 19 * 20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 26 * THE SOFTWARE. 27 */ 28 29 #if defined(__linux__) && (defined(__x86_64__) || defined(__arm__)) 30 /* Use 2 MiB alignment so transparent hugepages can be used by KVM. 31 Valgrind does not support alignments larger than 1 MiB, 32 therefore we need special code which handles running on Valgrind. */ 33 # define QEMU_VMALLOC_ALIGN (512 * 4096) 34 #elif defined(__linux__) && defined(__s390x__) 35 /* Use 1 MiB (segment size) alignment so gmap can be used by KVM. */ 36 # define QEMU_VMALLOC_ALIGN (256 * 4096) 37 #else 38 # define QEMU_VMALLOC_ALIGN getpagesize() 39 #endif 40 41 #include "qemu/osdep.h" 42 #include <termios.h> 43 #include <termios.h> 44 45 #include <glib/gprintf.h> 46 47 #include "sysemu/sysemu.h" 48 #include "trace.h" 49 #include "qapi/error.h" 50 #include "qemu/sockets.h" 51 #include <sys/mman.h> 52 #include <libgen.h> 53 #include <sys/signal.h> 54 #include "qemu/cutils.h" 55 56 #ifdef CONFIG_LINUX 57 #include <sys/syscall.h> 58 #endif 59 60 #ifdef __FreeBSD__ 61 #include <sys/sysctl.h> 62 #endif 63 64 #include <qemu/mmap-alloc.h> 65 66 int qemu_get_thread_id(void) 67 { 68 #if defined(__linux__) 69 return syscall(SYS_gettid); 70 #else 71 return getpid(); 72 #endif 73 } 74 75 int qemu_daemon(int nochdir, int noclose) 76 { 77 return daemon(nochdir, noclose); 78 } 79 80 void *qemu_oom_check(void *ptr) 81 { 82 if (ptr == NULL) { 83 fprintf(stderr, "Failed to allocate memory: %s\n", strerror(errno)); 84 abort(); 85 } 86 return ptr; 87 } 88 89 void *qemu_try_memalign(size_t alignment, size_t size) 90 { 91 void *ptr; 92 93 if (alignment < sizeof(void*)) { 94 alignment = sizeof(void*); 95 } 96 97 #if defined(_POSIX_C_SOURCE) && !defined(__sun__) 98 int ret; 99 ret = posix_memalign(&ptr, alignment, size); 100 if (ret != 0) { 101 errno = ret; 102 ptr = NULL; 103 } 104 #elif defined(CONFIG_BSD) 105 ptr = valloc(size); 106 #else 107 ptr = memalign(alignment, size); 108 #endif 109 trace_qemu_memalign(alignment, size, ptr); 110 return ptr; 111 } 112 113 void *qemu_memalign(size_t alignment, size_t size) 114 { 115 return qemu_oom_check(qemu_try_memalign(alignment, size)); 116 } 117 118 /* alloc shared memory pages */ 119 void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment) 120 { 121 size_t align = QEMU_VMALLOC_ALIGN; 122 void *ptr = qemu_ram_mmap(-1, size, align, false); 123 124 if (ptr == MAP_FAILED) { 125 return NULL; 126 } 127 128 if (alignment) { 129 *alignment = align; 130 } 131 132 trace_qemu_anon_ram_alloc(size, ptr); 133 return ptr; 134 } 135 136 void qemu_vfree(void *ptr) 137 { 138 trace_qemu_vfree(ptr); 139 free(ptr); 140 } 141 142 void qemu_anon_ram_free(void *ptr, size_t size) 143 { 144 trace_qemu_anon_ram_free(ptr, size); 145 qemu_ram_munmap(ptr, size); 146 } 147 148 void qemu_set_block(int fd) 149 { 150 int f; 151 f = fcntl(fd, F_GETFL); 152 fcntl(fd, F_SETFL, f & ~O_NONBLOCK); 153 } 154 155 void qemu_set_nonblock(int fd) 156 { 157 int f; 158 f = fcntl(fd, F_GETFL); 159 fcntl(fd, F_SETFL, f | O_NONBLOCK); 160 } 161 162 int socket_set_fast_reuse(int fd) 163 { 164 int val = 1, ret; 165 166 ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, 167 (const char *)&val, sizeof(val)); 168 169 assert(ret == 0); 170 171 return ret; 172 } 173 174 void qemu_set_cloexec(int fd) 175 { 176 int f; 177 f = fcntl(fd, F_GETFD); 178 fcntl(fd, F_SETFD, f | FD_CLOEXEC); 179 } 180 181 /* 182 * Creates a pipe with FD_CLOEXEC set on both file descriptors 183 */ 184 int qemu_pipe(int pipefd[2]) 185 { 186 int ret; 187 188 #ifdef CONFIG_PIPE2 189 ret = pipe2(pipefd, O_CLOEXEC); 190 if (ret != -1 || errno != ENOSYS) { 191 return ret; 192 } 193 #endif 194 ret = pipe(pipefd); 195 if (ret == 0) { 196 qemu_set_cloexec(pipefd[0]); 197 qemu_set_cloexec(pipefd[1]); 198 } 199 200 return ret; 201 } 202 203 int qemu_utimens(const char *path, const struct timespec *times) 204 { 205 struct timeval tv[2], tv_now; 206 struct stat st; 207 int i; 208 #ifdef CONFIG_UTIMENSAT 209 int ret; 210 211 ret = utimensat(AT_FDCWD, path, times, AT_SYMLINK_NOFOLLOW); 212 if (ret != -1 || errno != ENOSYS) { 213 return ret; 214 } 215 #endif 216 /* Fallback: use utimes() instead of utimensat() */ 217 218 /* happy if special cases */ 219 if (times[0].tv_nsec == UTIME_OMIT && times[1].tv_nsec == UTIME_OMIT) { 220 return 0; 221 } 222 if (times[0].tv_nsec == UTIME_NOW && times[1].tv_nsec == UTIME_NOW) { 223 return utimes(path, NULL); 224 } 225 226 /* prepare for hard cases */ 227 if (times[0].tv_nsec == UTIME_NOW || times[1].tv_nsec == UTIME_NOW) { 228 gettimeofday(&tv_now, NULL); 229 } 230 if (times[0].tv_nsec == UTIME_OMIT || times[1].tv_nsec == UTIME_OMIT) { 231 stat(path, &st); 232 } 233 234 for (i = 0; i < 2; i++) { 235 if (times[i].tv_nsec == UTIME_NOW) { 236 tv[i].tv_sec = tv_now.tv_sec; 237 tv[i].tv_usec = tv_now.tv_usec; 238 } else if (times[i].tv_nsec == UTIME_OMIT) { 239 tv[i].tv_sec = (i == 0) ? st.st_atime : st.st_mtime; 240 tv[i].tv_usec = 0; 241 } else { 242 tv[i].tv_sec = times[i].tv_sec; 243 tv[i].tv_usec = times[i].tv_nsec / 1000; 244 } 245 } 246 247 return utimes(path, &tv[0]); 248 } 249 250 char * 251 qemu_get_local_state_pathname(const char *relative_pathname) 252 { 253 return g_strdup_printf("%s/%s", CONFIG_QEMU_LOCALSTATEDIR, 254 relative_pathname); 255 } 256 257 void qemu_set_tty_echo(int fd, bool echo) 258 { 259 struct termios tty; 260 261 tcgetattr(fd, &tty); 262 263 if (echo) { 264 tty.c_lflag |= ECHO | ECHONL | ICANON | IEXTEN; 265 } else { 266 tty.c_lflag &= ~(ECHO | ECHONL | ICANON | IEXTEN); 267 } 268 269 tcsetattr(fd, TCSANOW, &tty); 270 } 271 272 static char exec_dir[PATH_MAX]; 273 274 void qemu_init_exec_dir(const char *argv0) 275 { 276 char *dir; 277 char *p = NULL; 278 char buf[PATH_MAX]; 279 280 assert(!exec_dir[0]); 281 282 #if defined(__linux__) 283 { 284 int len; 285 len = readlink("/proc/self/exe", buf, sizeof(buf) - 1); 286 if (len > 0) { 287 buf[len] = 0; 288 p = buf; 289 } 290 } 291 #elif defined(__FreeBSD__) 292 { 293 static int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1}; 294 size_t len = sizeof(buf) - 1; 295 296 *buf = '\0'; 297 if (!sysctl(mib, ARRAY_SIZE(mib), buf, &len, NULL, 0) && 298 *buf) { 299 buf[sizeof(buf) - 1] = '\0'; 300 p = buf; 301 } 302 } 303 #endif 304 /* If we don't have any way of figuring out the actual executable 305 location then try argv[0]. */ 306 if (!p) { 307 if (!argv0) { 308 return; 309 } 310 p = realpath(argv0, buf); 311 if (!p) { 312 return; 313 } 314 } 315 dir = dirname(p); 316 317 pstrcpy(exec_dir, sizeof(exec_dir), dir); 318 } 319 320 char *qemu_get_exec_dir(void) 321 { 322 return g_strdup(exec_dir); 323 } 324 325 static sigjmp_buf sigjump; 326 327 static void sigbus_handler(int signal) 328 { 329 siglongjmp(sigjump, 1); 330 } 331 332 void os_mem_prealloc(int fd, char *area, size_t memory) 333 { 334 int ret; 335 struct sigaction act, oldact; 336 sigset_t set, oldset; 337 338 memset(&act, 0, sizeof(act)); 339 act.sa_handler = &sigbus_handler; 340 act.sa_flags = 0; 341 342 ret = sigaction(SIGBUS, &act, &oldact); 343 if (ret) { 344 perror("os_mem_prealloc: failed to install signal handler"); 345 exit(1); 346 } 347 348 /* unblock SIGBUS */ 349 sigemptyset(&set); 350 sigaddset(&set, SIGBUS); 351 pthread_sigmask(SIG_UNBLOCK, &set, &oldset); 352 353 if (sigsetjmp(sigjump, 1)) { 354 fprintf(stderr, "os_mem_prealloc: Insufficient free host memory " 355 "pages available to allocate guest RAM\n"); 356 exit(1); 357 } else { 358 int i; 359 size_t hpagesize = qemu_fd_getpagesize(fd); 360 size_t numpages = DIV_ROUND_UP(memory, hpagesize); 361 362 /* MAP_POPULATE silently ignores failures */ 363 for (i = 0; i < numpages; i++) { 364 memset(area + (hpagesize * i), 0, 1); 365 } 366 367 ret = sigaction(SIGBUS, &oldact, NULL); 368 if (ret) { 369 perror("os_mem_prealloc: failed to reinstall signal handler"); 370 exit(1); 371 } 372 373 pthread_sigmask(SIG_SETMASK, &oldset, NULL); 374 } 375 } 376 377 378 static struct termios oldtty; 379 380 static void term_exit(void) 381 { 382 tcsetattr(0, TCSANOW, &oldtty); 383 } 384 385 static void term_init(void) 386 { 387 struct termios tty; 388 389 tcgetattr(0, &tty); 390 oldtty = tty; 391 392 tty.c_iflag &= ~(IGNBRK|BRKINT|PARMRK|ISTRIP 393 |INLCR|IGNCR|ICRNL|IXON); 394 tty.c_oflag |= OPOST; 395 tty.c_lflag &= ~(ECHO|ECHONL|ICANON|IEXTEN); 396 tty.c_cflag &= ~(CSIZE|PARENB); 397 tty.c_cflag |= CS8; 398 tty.c_cc[VMIN] = 1; 399 tty.c_cc[VTIME] = 0; 400 401 tcsetattr(0, TCSANOW, &tty); 402 403 atexit(term_exit); 404 } 405 406 int qemu_read_password(char *buf, int buf_size) 407 { 408 uint8_t ch; 409 int i, ret; 410 411 printf("password: "); 412 fflush(stdout); 413 term_init(); 414 i = 0; 415 for (;;) { 416 ret = read(0, &ch, 1); 417 if (ret == -1) { 418 if (errno == EAGAIN || errno == EINTR) { 419 continue; 420 } else { 421 break; 422 } 423 } else if (ret == 0) { 424 ret = -1; 425 break; 426 } else { 427 if (ch == '\r' || 428 ch == '\n') { 429 ret = 0; 430 break; 431 } 432 if (i < (buf_size - 1)) { 433 buf[i++] = ch; 434 } 435 } 436 } 437 term_exit(); 438 buf[i] = '\0'; 439 printf("\n"); 440 return ret; 441 } 442 443 444 pid_t qemu_fork(Error **errp) 445 { 446 sigset_t oldmask, newmask; 447 struct sigaction sig_action; 448 int saved_errno; 449 pid_t pid; 450 451 /* 452 * Need to block signals now, so that child process can safely 453 * kill off caller's signal handlers without a race. 454 */ 455 sigfillset(&newmask); 456 if (pthread_sigmask(SIG_SETMASK, &newmask, &oldmask) != 0) { 457 error_setg_errno(errp, errno, 458 "cannot block signals"); 459 return -1; 460 } 461 462 pid = fork(); 463 saved_errno = errno; 464 465 if (pid < 0) { 466 /* attempt to restore signal mask, but ignore failure, to 467 * avoid obscuring the fork failure */ 468 (void)pthread_sigmask(SIG_SETMASK, &oldmask, NULL); 469 error_setg_errno(errp, saved_errno, 470 "cannot fork child process"); 471 errno = saved_errno; 472 return -1; 473 } else if (pid) { 474 /* parent process */ 475 476 /* Restore our original signal mask now that the child is 477 * safely running. Only documented failures are EFAULT (not 478 * possible, since we are using just-grabbed mask) or EINVAL 479 * (not possible, since we are using correct arguments). */ 480 (void)pthread_sigmask(SIG_SETMASK, &oldmask, NULL); 481 } else { 482 /* child process */ 483 size_t i; 484 485 /* Clear out all signal handlers from parent so nothing 486 * unexpected can happen in our child once we unblock 487 * signals */ 488 sig_action.sa_handler = SIG_DFL; 489 sig_action.sa_flags = 0; 490 sigemptyset(&sig_action.sa_mask); 491 492 for (i = 1; i < NSIG; i++) { 493 /* Only possible errors are EFAULT or EINVAL The former 494 * won't happen, the latter we expect, so no need to check 495 * return value */ 496 (void)sigaction(i, &sig_action, NULL); 497 } 498 499 /* Unmask all signals in child, since we've no idea what the 500 * caller's done with their signal mask and don't want to 501 * propagate that to children */ 502 sigemptyset(&newmask); 503 if (pthread_sigmask(SIG_SETMASK, &newmask, NULL) != 0) { 504 Error *local_err = NULL; 505 error_setg_errno(&local_err, errno, 506 "cannot unblock signals"); 507 error_report_err(local_err); 508 _exit(1); 509 } 510 } 511 return pid; 512 } 513