1 /* 2 * QEMU low level functions 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 #include "qemu/osdep.h" 25 #include "qapi/error.h" 26 27 /* Needed early for CONFIG_BSD etc. */ 28 29 #ifdef CONFIG_SOLARIS 30 #include <sys/statvfs.h> 31 /* See MySQL bug #7156 (http://bugs.mysql.com/bug.php?id=7156) for 32 discussion about Solaris header problems */ 33 extern int madvise(char *, size_t, int); 34 #endif 35 36 #include "qemu-common.h" 37 #include "qemu/cutils.h" 38 #include "qemu/sockets.h" 39 #include "qemu/error-report.h" 40 #include "monitor/monitor.h" 41 42 static bool fips_enabled = false; 43 44 static const char *hw_version = QEMU_HW_VERSION; 45 46 int socket_set_cork(int fd, int v) 47 { 48 #if defined(SOL_TCP) && defined(TCP_CORK) 49 return qemu_setsockopt(fd, SOL_TCP, TCP_CORK, &v, sizeof(v)); 50 #else 51 return 0; 52 #endif 53 } 54 55 int socket_set_nodelay(int fd) 56 { 57 int v = 1; 58 return qemu_setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &v, sizeof(v)); 59 } 60 61 int qemu_madvise(void *addr, size_t len, int advice) 62 { 63 if (advice == QEMU_MADV_INVALID) { 64 errno = EINVAL; 65 return -1; 66 } 67 #if defined(CONFIG_MADVISE) 68 return madvise(addr, len, advice); 69 #elif defined(CONFIG_POSIX_MADVISE) 70 return posix_madvise(addr, len, advice); 71 #else 72 errno = EINVAL; 73 return -1; 74 #endif 75 } 76 77 static int qemu_mprotect__osdep(void *addr, size_t size, int prot) 78 { 79 g_assert(!((uintptr_t)addr & ~qemu_real_host_page_mask)); 80 g_assert(!(size & ~qemu_real_host_page_mask)); 81 82 #ifdef _WIN32 83 DWORD old_protect; 84 85 if (!VirtualProtect(addr, size, prot, &old_protect)) { 86 g_autofree gchar *emsg = g_win32_error_message(GetLastError()); 87 error_report("%s: VirtualProtect failed: %s", __func__, emsg); 88 return -1; 89 } 90 return 0; 91 #else 92 if (mprotect(addr, size, prot)) { 93 error_report("%s: mprotect failed: %s", __func__, strerror(errno)); 94 return -1; 95 } 96 return 0; 97 #endif 98 } 99 100 int qemu_mprotect_rwx(void *addr, size_t size) 101 { 102 #ifdef _WIN32 103 return qemu_mprotect__osdep(addr, size, PAGE_EXECUTE_READWRITE); 104 #else 105 return qemu_mprotect__osdep(addr, size, PROT_READ | PROT_WRITE | PROT_EXEC); 106 #endif 107 } 108 109 int qemu_mprotect_none(void *addr, size_t size) 110 { 111 #ifdef _WIN32 112 return qemu_mprotect__osdep(addr, size, PAGE_NOACCESS); 113 #else 114 return qemu_mprotect__osdep(addr, size, PROT_NONE); 115 #endif 116 } 117 118 #ifndef _WIN32 119 120 static int fcntl_op_setlk = -1; 121 static int fcntl_op_getlk = -1; 122 123 /* 124 * Dups an fd and sets the flags 125 */ 126 int qemu_dup_flags(int fd, int flags) 127 { 128 int ret; 129 int serrno; 130 int dup_flags; 131 132 ret = qemu_dup(fd); 133 if (ret == -1) { 134 goto fail; 135 } 136 137 dup_flags = fcntl(ret, F_GETFL); 138 if (dup_flags == -1) { 139 goto fail; 140 } 141 142 if ((flags & O_SYNC) != (dup_flags & O_SYNC)) { 143 errno = EINVAL; 144 goto fail; 145 } 146 147 /* Set/unset flags that we can with fcntl */ 148 if (fcntl(ret, F_SETFL, flags) == -1) { 149 goto fail; 150 } 151 152 /* Truncate the file in the cases that open() would truncate it */ 153 if (flags & O_TRUNC || 154 ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))) { 155 if (ftruncate(ret, 0) == -1) { 156 goto fail; 157 } 158 } 159 160 return ret; 161 162 fail: 163 serrno = errno; 164 if (ret != -1) { 165 close(ret); 166 } 167 errno = serrno; 168 return -1; 169 } 170 171 int qemu_dup(int fd) 172 { 173 int ret; 174 #ifdef F_DUPFD_CLOEXEC 175 ret = fcntl(fd, F_DUPFD_CLOEXEC, 0); 176 #else 177 ret = dup(fd); 178 if (ret != -1) { 179 qemu_set_cloexec(ret); 180 } 181 #endif 182 return ret; 183 } 184 185 static int qemu_parse_fdset(const char *param) 186 { 187 return qemu_parse_fd(param); 188 } 189 190 static void qemu_probe_lock_ops(void) 191 { 192 if (fcntl_op_setlk == -1) { 193 #ifdef F_OFD_SETLK 194 int fd; 195 int ret; 196 struct flock fl = { 197 .l_whence = SEEK_SET, 198 .l_start = 0, 199 .l_len = 0, 200 .l_type = F_WRLCK, 201 }; 202 203 fd = open("/dev/null", O_RDWR); 204 if (fd < 0) { 205 fprintf(stderr, 206 "Failed to open /dev/null for OFD lock probing: %s\n", 207 strerror(errno)); 208 fcntl_op_setlk = F_SETLK; 209 fcntl_op_getlk = F_GETLK; 210 return; 211 } 212 ret = fcntl(fd, F_OFD_GETLK, &fl); 213 close(fd); 214 if (!ret) { 215 fcntl_op_setlk = F_OFD_SETLK; 216 fcntl_op_getlk = F_OFD_GETLK; 217 } else { 218 fcntl_op_setlk = F_SETLK; 219 fcntl_op_getlk = F_GETLK; 220 } 221 #else 222 fcntl_op_setlk = F_SETLK; 223 fcntl_op_getlk = F_GETLK; 224 #endif 225 } 226 } 227 228 bool qemu_has_ofd_lock(void) 229 { 230 qemu_probe_lock_ops(); 231 #ifdef F_OFD_SETLK 232 return fcntl_op_setlk == F_OFD_SETLK; 233 #else 234 return false; 235 #endif 236 } 237 238 static int qemu_lock_fcntl(int fd, int64_t start, int64_t len, int fl_type) 239 { 240 int ret; 241 struct flock fl = { 242 .l_whence = SEEK_SET, 243 .l_start = start, 244 .l_len = len, 245 .l_type = fl_type, 246 }; 247 qemu_probe_lock_ops(); 248 do { 249 ret = fcntl(fd, fcntl_op_setlk, &fl); 250 } while (ret == -1 && errno == EINTR); 251 return ret == -1 ? -errno : 0; 252 } 253 254 int qemu_lock_fd(int fd, int64_t start, int64_t len, bool exclusive) 255 { 256 return qemu_lock_fcntl(fd, start, len, exclusive ? F_WRLCK : F_RDLCK); 257 } 258 259 int qemu_unlock_fd(int fd, int64_t start, int64_t len) 260 { 261 return qemu_lock_fcntl(fd, start, len, F_UNLCK); 262 } 263 264 int qemu_lock_fd_test(int fd, int64_t start, int64_t len, bool exclusive) 265 { 266 int ret; 267 struct flock fl = { 268 .l_whence = SEEK_SET, 269 .l_start = start, 270 .l_len = len, 271 .l_type = exclusive ? F_WRLCK : F_RDLCK, 272 }; 273 qemu_probe_lock_ops(); 274 ret = fcntl(fd, fcntl_op_getlk, &fl); 275 if (ret == -1) { 276 return -errno; 277 } else { 278 return fl.l_type == F_UNLCK ? 0 : -EAGAIN; 279 } 280 } 281 #endif 282 283 static int qemu_open_cloexec(const char *name, int flags, mode_t mode) 284 { 285 int ret; 286 #ifdef O_CLOEXEC 287 ret = open(name, flags | O_CLOEXEC, mode); 288 #else 289 ret = open(name, flags, mode); 290 if (ret >= 0) { 291 qemu_set_cloexec(ret); 292 } 293 #endif 294 return ret; 295 } 296 297 /* 298 * Opens a file with FD_CLOEXEC set 299 */ 300 static int 301 qemu_open_internal(const char *name, int flags, mode_t mode, Error **errp) 302 { 303 int ret; 304 305 #ifndef _WIN32 306 const char *fdset_id_str; 307 308 /* Attempt dup of fd from fd set */ 309 if (strstart(name, "/dev/fdset/", &fdset_id_str)) { 310 int64_t fdset_id; 311 int dupfd; 312 313 fdset_id = qemu_parse_fdset(fdset_id_str); 314 if (fdset_id == -1) { 315 error_setg(errp, "Could not parse fdset %s", name); 316 errno = EINVAL; 317 return -1; 318 } 319 320 dupfd = monitor_fdset_dup_fd_add(fdset_id, flags); 321 if (dupfd == -1) { 322 error_setg_errno(errp, errno, "Could not dup FD for %s flags %x", 323 name, flags); 324 return -1; 325 } 326 327 return dupfd; 328 } 329 #endif 330 331 ret = qemu_open_cloexec(name, flags, mode); 332 333 if (ret == -1) { 334 const char *action = flags & O_CREAT ? "create" : "open"; 335 #ifdef O_DIRECT 336 /* Give more helpful error message for O_DIRECT */ 337 if (errno == EINVAL && (flags & O_DIRECT)) { 338 ret = open(name, flags & ~O_DIRECT, mode); 339 if (ret != -1) { 340 close(ret); 341 error_setg(errp, "Could not %s '%s': " 342 "filesystem does not support O_DIRECT", 343 action, name); 344 errno = EINVAL; /* restore first open()'s errno */ 345 return -1; 346 } 347 } 348 #endif /* O_DIRECT */ 349 error_setg_errno(errp, errno, "Could not %s '%s'", 350 action, name); 351 } 352 353 return ret; 354 } 355 356 357 int qemu_open(const char *name, int flags, Error **errp) 358 { 359 assert(!(flags & O_CREAT)); 360 361 return qemu_open_internal(name, flags, 0, errp); 362 } 363 364 365 int qemu_create(const char *name, int flags, mode_t mode, Error **errp) 366 { 367 assert(!(flags & O_CREAT)); 368 369 return qemu_open_internal(name, flags | O_CREAT, mode, errp); 370 } 371 372 373 int qemu_open_old(const char *name, int flags, ...) 374 { 375 va_list ap; 376 mode_t mode = 0; 377 int ret; 378 379 va_start(ap, flags); 380 if (flags & O_CREAT) { 381 mode = va_arg(ap, int); 382 } 383 va_end(ap); 384 385 ret = qemu_open_internal(name, flags, mode, NULL); 386 387 #ifdef O_DIRECT 388 if (ret == -1 && errno == EINVAL && (flags & O_DIRECT)) { 389 error_report("file system may not support O_DIRECT"); 390 errno = EINVAL; /* in case it was clobbered */ 391 } 392 #endif /* O_DIRECT */ 393 394 return ret; 395 } 396 397 int qemu_close(int fd) 398 { 399 int64_t fdset_id; 400 401 /* Close fd that was dup'd from an fdset */ 402 fdset_id = monitor_fdset_dup_fd_find(fd); 403 if (fdset_id != -1) { 404 int ret; 405 406 ret = close(fd); 407 if (ret == 0) { 408 monitor_fdset_dup_fd_remove(fd); 409 } 410 411 return ret; 412 } 413 414 return close(fd); 415 } 416 417 /* 418 * Delete a file from the filesystem, unless the filename is /dev/fdset/... 419 * 420 * Returns: On success, zero is returned. On error, -1 is returned, 421 * and errno is set appropriately. 422 */ 423 int qemu_unlink(const char *name) 424 { 425 if (g_str_has_prefix(name, "/dev/fdset/")) { 426 return 0; 427 } 428 429 return unlink(name); 430 } 431 432 /* 433 * A variant of write(2) which handles partial write. 434 * 435 * Return the number of bytes transferred. 436 * Set errno if fewer than `count' bytes are written. 437 * 438 * This function don't work with non-blocking fd's. 439 * Any of the possibilities with non-bloking fd's is bad: 440 * - return a short write (then name is wrong) 441 * - busy wait adding (errno == EAGAIN) to the loop 442 */ 443 ssize_t qemu_write_full(int fd, const void *buf, size_t count) 444 { 445 ssize_t ret = 0; 446 ssize_t total = 0; 447 448 while (count) { 449 ret = write(fd, buf, count); 450 if (ret < 0) { 451 if (errno == EINTR) 452 continue; 453 break; 454 } 455 456 count -= ret; 457 buf += ret; 458 total += ret; 459 } 460 461 return total; 462 } 463 464 /* 465 * Opens a socket with FD_CLOEXEC set 466 */ 467 int qemu_socket(int domain, int type, int protocol) 468 { 469 int ret; 470 471 #ifdef SOCK_CLOEXEC 472 ret = socket(domain, type | SOCK_CLOEXEC, protocol); 473 if (ret != -1 || errno != EINVAL) { 474 return ret; 475 } 476 #endif 477 ret = socket(domain, type, protocol); 478 if (ret >= 0) { 479 qemu_set_cloexec(ret); 480 } 481 482 return ret; 483 } 484 485 /* 486 * Accept a connection and set FD_CLOEXEC 487 */ 488 int qemu_accept(int s, struct sockaddr *addr, socklen_t *addrlen) 489 { 490 int ret; 491 492 #ifdef CONFIG_ACCEPT4 493 ret = accept4(s, addr, addrlen, SOCK_CLOEXEC); 494 if (ret != -1 || errno != ENOSYS) { 495 return ret; 496 } 497 #endif 498 ret = accept(s, addr, addrlen); 499 if (ret >= 0) { 500 qemu_set_cloexec(ret); 501 } 502 503 return ret; 504 } 505 506 void qemu_set_hw_version(const char *version) 507 { 508 hw_version = version; 509 } 510 511 const char *qemu_hw_version(void) 512 { 513 return hw_version; 514 } 515 516 void fips_set_state(bool requested) 517 { 518 #ifdef __linux__ 519 if (requested) { 520 FILE *fds = fopen("/proc/sys/crypto/fips_enabled", "r"); 521 if (fds != NULL) { 522 fips_enabled = (fgetc(fds) == '1'); 523 fclose(fds); 524 } 525 } 526 #else 527 fips_enabled = false; 528 #endif /* __linux__ */ 529 530 #ifdef _FIPS_DEBUG 531 fprintf(stderr, "FIPS mode %s (requested %s)\n", 532 (fips_enabled ? "enabled" : "disabled"), 533 (requested ? "enabled" : "disabled")); 534 #endif 535 } 536 537 bool fips_get_state(void) 538 { 539 return fips_enabled; 540 } 541 542 #ifdef _WIN32 543 static void socket_cleanup(void) 544 { 545 WSACleanup(); 546 } 547 #endif 548 549 int socket_init(void) 550 { 551 #ifdef _WIN32 552 WSADATA Data; 553 int ret, err; 554 555 ret = WSAStartup(MAKEWORD(2, 2), &Data); 556 if (ret != 0) { 557 err = WSAGetLastError(); 558 fprintf(stderr, "WSAStartup: %d\n", err); 559 return -1; 560 } 561 atexit(socket_cleanup); 562 #endif 563 return 0; 564 } 565 566 567 #ifndef CONFIG_IOVEC 568 /* helper function for iov_send_recv() */ 569 static ssize_t 570 readv_writev(int fd, const struct iovec *iov, int iov_cnt, bool do_write) 571 { 572 unsigned i = 0; 573 ssize_t ret = 0; 574 while (i < iov_cnt) { 575 ssize_t r = do_write 576 ? write(fd, iov[i].iov_base, iov[i].iov_len) 577 : read(fd, iov[i].iov_base, iov[i].iov_len); 578 if (r > 0) { 579 ret += r; 580 } else if (!r) { 581 break; 582 } else if (errno == EINTR) { 583 continue; 584 } else { 585 /* else it is some "other" error, 586 * only return if there was no data processed. */ 587 if (ret == 0) { 588 ret = -1; 589 } 590 break; 591 } 592 i++; 593 } 594 return ret; 595 } 596 597 ssize_t 598 readv(int fd, const struct iovec *iov, int iov_cnt) 599 { 600 return readv_writev(fd, iov, iov_cnt, false); 601 } 602 603 ssize_t 604 writev(int fd, const struct iovec *iov, int iov_cnt) 605 { 606 return readv_writev(fd, iov, iov_cnt, true); 607 } 608 #endif 609