xref: /openbmc/qemu/util/osdep.c (revision c1dc0a1debe1efb7e4e02b580d43cf6cab1d36e8)
1  /*
2   * QEMU low level functions
3   *
4   * Copyright (c) 2003 Fabrice Bellard
5   *
6   * Permission is hereby granted, free of charge, to any person obtaining a copy
7   * of this software and associated documentation files (the "Software"), to deal
8   * in the Software without restriction, including without limitation the rights
9   * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10   * copies of the Software, and to permit persons to whom the Software is
11   * furnished to do so, subject to the following conditions:
12   *
13   * The above copyright notice and this permission notice shall be included in
14   * all copies or substantial portions of the Software.
15   *
16   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19   * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22   * THE SOFTWARE.
23   */
24  #include "qemu/osdep.h"
25  #include "qapi/error.h"
26  #include "qemu/cutils.h"
27  #include "qemu/sockets.h"
28  #include "qemu/error-report.h"
29  #include "qemu/madvise.h"
30  #include "qemu/mprotect.h"
31  #include "qemu/hw-version.h"
32  #include "monitor/monitor.h"
33  
34  static const char *hw_version = QEMU_HW_VERSION;
35  
36  int socket_set_cork(int fd, int v)
37  {
38  #if defined(SOL_TCP) && defined(TCP_CORK)
39      return setsockopt(fd, SOL_TCP, TCP_CORK, &v, sizeof(v));
40  #else
41      return 0;
42  #endif
43  }
44  
45  int socket_set_nodelay(int fd)
46  {
47      int v = 1;
48      return setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &v, sizeof(v));
49  }
50  
51  int qemu_madvise(void *addr, size_t len, int advice)
52  {
53      if (advice == QEMU_MADV_INVALID) {
54          errno = EINVAL;
55          return -1;
56      }
57  #if defined(CONFIG_MADVISE)
58      return madvise(addr, len, advice);
59  #elif defined(CONFIG_POSIX_MADVISE)
60      return posix_madvise(addr, len, advice);
61  #else
62      errno = EINVAL;
63      return -1;
64  #endif
65  }
66  
67  static int qemu_mprotect__osdep(void *addr, size_t size, int prot)
68  {
69      g_assert(!((uintptr_t)addr & ~qemu_real_host_page_mask()));
70      g_assert(!(size & ~qemu_real_host_page_mask()));
71  
72  #ifdef _WIN32
73      DWORD old_protect;
74  
75      if (!VirtualProtect(addr, size, prot, &old_protect)) {
76          g_autofree gchar *emsg = g_win32_error_message(GetLastError());
77          error_report("%s: VirtualProtect failed: %s", __func__, emsg);
78          return -1;
79      }
80      return 0;
81  #else
82      if (mprotect(addr, size, prot)) {
83          error_report("%s: mprotect failed: %s", __func__, strerror(errno));
84          return -1;
85      }
86      return 0;
87  #endif
88  }
89  
90  int qemu_mprotect_rw(void *addr, size_t size)
91  {
92  #ifdef _WIN32
93      return qemu_mprotect__osdep(addr, size, PAGE_READWRITE);
94  #else
95      return qemu_mprotect__osdep(addr, size, PROT_READ | PROT_WRITE);
96  #endif
97  }
98  
99  int qemu_mprotect_rwx(void *addr, size_t size)
100  {
101  #ifdef _WIN32
102      return qemu_mprotect__osdep(addr, size, PAGE_EXECUTE_READWRITE);
103  #else
104      return qemu_mprotect__osdep(addr, size, PROT_READ | PROT_WRITE | PROT_EXEC);
105  #endif
106  }
107  
108  int qemu_mprotect_none(void *addr, size_t size)
109  {
110  #ifdef _WIN32
111      return qemu_mprotect__osdep(addr, size, PAGE_NOACCESS);
112  #else
113      return qemu_mprotect__osdep(addr, size, PROT_NONE);
114  #endif
115  }
116  
117  #ifndef _WIN32
118  
119  static int fcntl_op_setlk = -1;
120  static int fcntl_op_getlk = -1;
121  
122  /*
123   * Dups an fd and sets the flags
124   */
125  int qemu_dup_flags(int fd, int flags)
126  {
127      int ret;
128      int serrno;
129      int dup_flags;
130  
131      ret = qemu_dup(fd);
132      if (ret == -1) {
133          goto fail;
134      }
135  
136      dup_flags = fcntl(ret, F_GETFL);
137      if (dup_flags == -1) {
138          goto fail;
139      }
140  
141      if ((flags & O_SYNC) != (dup_flags & O_SYNC)) {
142          errno = EINVAL;
143          goto fail;
144      }
145  
146      /* Set/unset flags that we can with fcntl */
147      if (fcntl(ret, F_SETFL, flags) == -1) {
148          goto fail;
149      }
150  
151      /* Truncate the file in the cases that open() would truncate it */
152      if (flags & O_TRUNC ||
153              ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))) {
154          if (ftruncate(ret, 0) == -1) {
155              goto fail;
156          }
157      }
158  
159      return ret;
160  
161  fail:
162      serrno = errno;
163      if (ret != -1) {
164          close(ret);
165      }
166      errno = serrno;
167      return -1;
168  }
169  
170  int qemu_dup(int fd)
171  {
172      int ret;
173  #ifdef F_DUPFD_CLOEXEC
174      ret = fcntl(fd, F_DUPFD_CLOEXEC, 0);
175  #else
176      ret = dup(fd);
177      if (ret != -1) {
178          qemu_set_cloexec(ret);
179      }
180  #endif
181      return ret;
182  }
183  
184  static int qemu_parse_fdset(const char *param)
185  {
186      return qemu_parse_fd(param);
187  }
188  
189  static void qemu_probe_lock_ops(void)
190  {
191      if (fcntl_op_setlk == -1) {
192  #ifdef F_OFD_SETLK
193          int fd;
194          int ret;
195          struct flock fl = {
196              .l_whence = SEEK_SET,
197              .l_start  = 0,
198              .l_len    = 0,
199              .l_type   = F_WRLCK,
200          };
201  
202          fd = open("/dev/null", O_RDWR);
203          if (fd < 0) {
204              fprintf(stderr,
205                      "Failed to open /dev/null for OFD lock probing: %s\n",
206                      strerror(errno));
207              fcntl_op_setlk = F_SETLK;
208              fcntl_op_getlk = F_GETLK;
209              return;
210          }
211          ret = fcntl(fd, F_OFD_GETLK, &fl);
212          close(fd);
213          if (!ret) {
214              fcntl_op_setlk = F_OFD_SETLK;
215              fcntl_op_getlk = F_OFD_GETLK;
216          } else {
217              fcntl_op_setlk = F_SETLK;
218              fcntl_op_getlk = F_GETLK;
219          }
220  #else
221          fcntl_op_setlk = F_SETLK;
222          fcntl_op_getlk = F_GETLK;
223  #endif
224      }
225  }
226  
227  bool qemu_has_ofd_lock(void)
228  {
229      qemu_probe_lock_ops();
230  #ifdef F_OFD_SETLK
231      return fcntl_op_setlk == F_OFD_SETLK;
232  #else
233      return false;
234  #endif
235  }
236  
237  static int qemu_lock_fcntl(int fd, int64_t start, int64_t len, int fl_type)
238  {
239      int ret;
240      struct flock fl = {
241          .l_whence = SEEK_SET,
242          .l_start  = start,
243          .l_len    = len,
244          .l_type   = fl_type,
245      };
246      qemu_probe_lock_ops();
247      ret = RETRY_ON_EINTR(fcntl(fd, fcntl_op_setlk, &fl));
248      return ret == -1 ? -errno : 0;
249  }
250  
251  int qemu_lock_fd(int fd, int64_t start, int64_t len, bool exclusive)
252  {
253      return qemu_lock_fcntl(fd, start, len, exclusive ? F_WRLCK : F_RDLCK);
254  }
255  
256  int qemu_unlock_fd(int fd, int64_t start, int64_t len)
257  {
258      return qemu_lock_fcntl(fd, start, len, F_UNLCK);
259  }
260  
261  int qemu_lock_fd_test(int fd, int64_t start, int64_t len, bool exclusive)
262  {
263      int ret;
264      struct flock fl = {
265          .l_whence = SEEK_SET,
266          .l_start  = start,
267          .l_len    = len,
268          .l_type   = exclusive ? F_WRLCK : F_RDLCK,
269      };
270      qemu_probe_lock_ops();
271      ret = fcntl(fd, fcntl_op_getlk, &fl);
272      if (ret == -1) {
273          return -errno;
274      } else {
275          return fl.l_type == F_UNLCK ? 0 : -EAGAIN;
276      }
277  }
278  #endif
279  
280  static int qemu_open_cloexec(const char *name, int flags, mode_t mode)
281  {
282      int ret;
283  #ifdef O_CLOEXEC
284      ret = open(name, flags | O_CLOEXEC, mode);
285  #else
286      ret = open(name, flags, mode);
287      if (ret >= 0) {
288          qemu_set_cloexec(ret);
289      }
290  #endif
291      return ret;
292  }
293  
294  /*
295   * Opens a file with FD_CLOEXEC set
296   */
297  static int
298  qemu_open_internal(const char *name, int flags, mode_t mode, Error **errp)
299  {
300      int ret;
301  
302  #ifndef _WIN32
303      const char *fdset_id_str;
304  
305      /* Attempt dup of fd from fd set */
306      if (strstart(name, "/dev/fdset/", &fdset_id_str)) {
307          int64_t fdset_id;
308          int dupfd;
309  
310          fdset_id = qemu_parse_fdset(fdset_id_str);
311          if (fdset_id == -1) {
312              error_setg(errp, "Could not parse fdset %s", name);
313              errno = EINVAL;
314              return -1;
315          }
316  
317          dupfd = monitor_fdset_dup_fd_add(fdset_id, flags);
318          if (dupfd == -1) {
319              error_setg_errno(errp, errno, "Could not dup FD for %s flags %x",
320                               name, flags);
321              return -1;
322          }
323  
324          return dupfd;
325      }
326  #endif
327  
328      ret = qemu_open_cloexec(name, flags, mode);
329  
330      if (ret == -1) {
331          const char *action = flags & O_CREAT ? "create" : "open";
332  #ifdef O_DIRECT
333          /* Give more helpful error message for O_DIRECT */
334          if (errno == EINVAL && (flags & O_DIRECT)) {
335              ret = open(name, flags & ~O_DIRECT, mode);
336              if (ret != -1) {
337                  close(ret);
338                  error_setg(errp, "Could not %s '%s': "
339                             "filesystem does not support O_DIRECT",
340                             action, name);
341                  errno = EINVAL; /* restore first open()'s errno */
342                  return -1;
343              }
344          }
345  #endif /* O_DIRECT */
346          error_setg_errno(errp, errno, "Could not %s '%s'",
347                           action, name);
348      }
349  
350      return ret;
351  }
352  
353  
354  int qemu_open(const char *name, int flags, Error **errp)
355  {
356      assert(!(flags & O_CREAT));
357  
358      return qemu_open_internal(name, flags, 0, errp);
359  }
360  
361  
362  int qemu_create(const char *name, int flags, mode_t mode, Error **errp)
363  {
364      assert(!(flags & O_CREAT));
365  
366      return qemu_open_internal(name, flags | O_CREAT, mode, errp);
367  }
368  
369  
370  int qemu_open_old(const char *name, int flags, ...)
371  {
372      va_list ap;
373      mode_t mode = 0;
374      int ret;
375  
376      va_start(ap, flags);
377      if (flags & O_CREAT) {
378          mode = va_arg(ap, int);
379      }
380      va_end(ap);
381  
382      ret = qemu_open_internal(name, flags, mode, NULL);
383  
384  #ifdef O_DIRECT
385      if (ret == -1 && errno == EINVAL && (flags & O_DIRECT)) {
386          error_report("file system may not support O_DIRECT");
387          errno = EINVAL; /* in case it was clobbered */
388      }
389  #endif /* O_DIRECT */
390  
391      return ret;
392  }
393  
394  int qemu_close(int fd)
395  {
396      int64_t fdset_id;
397  
398      /* Close fd that was dup'd from an fdset */
399      fdset_id = monitor_fdset_dup_fd_find(fd);
400      if (fdset_id != -1) {
401          int ret;
402  
403          ret = close(fd);
404          if (ret == 0) {
405              monitor_fdset_dup_fd_remove(fd);
406          }
407  
408          return ret;
409      }
410  
411      return close(fd);
412  }
413  
414  /*
415   * Delete a file from the filesystem, unless the filename is /dev/fdset/...
416   *
417   * Returns: On success, zero is returned.  On error, -1 is returned,
418   * and errno is set appropriately.
419   */
420  int qemu_unlink(const char *name)
421  {
422      if (g_str_has_prefix(name, "/dev/fdset/")) {
423          return 0;
424      }
425  
426      return unlink(name);
427  }
428  
429  /*
430   * A variant of write(2) which handles partial write.
431   *
432   * Return the number of bytes transferred.
433   * Set errno if fewer than `count' bytes are written.
434   *
435   * This function don't work with non-blocking fd's.
436   * Any of the possibilities with non-blocking fd's is bad:
437   *   - return a short write (then name is wrong)
438   *   - busy wait adding (errno == EAGAIN) to the loop
439   */
440  ssize_t qemu_write_full(int fd, const void *buf, size_t count)
441  {
442      ssize_t ret = 0;
443      ssize_t total = 0;
444  
445      while (count) {
446          ret = write(fd, buf, count);
447          if (ret < 0) {
448              if (errno == EINTR)
449                  continue;
450              break;
451          }
452  
453          count -= ret;
454          buf += ret;
455          total += ret;
456      }
457  
458      return total;
459  }
460  
461  /*
462   * Opens a socket with FD_CLOEXEC set
463   */
464  int qemu_socket(int domain, int type, int protocol)
465  {
466      int ret;
467  
468  #ifdef SOCK_CLOEXEC
469      ret = socket(domain, type | SOCK_CLOEXEC, protocol);
470      if (ret != -1 || errno != EINVAL) {
471          return ret;
472      }
473  #endif
474      ret = socket(domain, type, protocol);
475      if (ret >= 0) {
476          qemu_set_cloexec(ret);
477      }
478  
479      return ret;
480  }
481  
482  /*
483   * Accept a connection and set FD_CLOEXEC
484   */
485  int qemu_accept(int s, struct sockaddr *addr, socklen_t *addrlen)
486  {
487      int ret;
488  
489  #ifdef CONFIG_ACCEPT4
490      ret = accept4(s, addr, addrlen, SOCK_CLOEXEC);
491      if (ret != -1 || errno != ENOSYS) {
492          return ret;
493      }
494  #endif
495      ret = accept(s, addr, addrlen);
496      if (ret >= 0) {
497          qemu_set_cloexec(ret);
498      }
499  
500      return ret;
501  }
502  
503  ssize_t qemu_send_full(int s, const void *buf, size_t count)
504  {
505      ssize_t ret = 0;
506      ssize_t total = 0;
507  
508      while (count) {
509          ret = send(s, buf, count, 0);
510          if (ret < 0) {
511              if (errno == EINTR) {
512                  continue;
513              }
514              break;
515          }
516  
517          count -= ret;
518          buf += ret;
519          total += ret;
520      }
521  
522      return total;
523  }
524  
525  void qemu_set_hw_version(const char *version)
526  {
527      hw_version = version;
528  }
529  
530  const char *qemu_hw_version(void)
531  {
532      return hw_version;
533  }
534  
535  #ifdef _WIN32
536  static void socket_cleanup(void)
537  {
538      WSACleanup();
539  }
540  #endif
541  
542  int socket_init(void)
543  {
544  #ifdef _WIN32
545      WSADATA Data;
546      int ret, err;
547  
548      ret = WSAStartup(MAKEWORD(2, 2), &Data);
549      if (ret != 0) {
550          err = WSAGetLastError();
551          fprintf(stderr, "WSAStartup: %d\n", err);
552          return -1;
553      }
554      atexit(socket_cleanup);
555  #endif
556      return 0;
557  }
558  
559  
560  #ifndef CONFIG_IOVEC
561  static ssize_t
562  readv_writev(int fd, const struct iovec *iov, int iov_cnt, bool do_write)
563  {
564      unsigned i = 0;
565      ssize_t ret = 0;
566      ssize_t off = 0;
567      while (i < iov_cnt) {
568          ssize_t r = do_write
569              ? write(fd, iov[i].iov_base + off, iov[i].iov_len - off)
570              : read(fd, iov[i].iov_base + off, iov[i].iov_len - off);
571          if (r > 0) {
572              ret += r;
573              off += r;
574              if (off < iov[i].iov_len) {
575                  continue;
576              }
577          } else if (!r) {
578              break;
579          } else if (errno == EINTR) {
580              continue;
581          } else {
582              /* else it is some "other" error,
583               * only return if there was no data processed. */
584              if (ret == 0) {
585                  ret = -1;
586              }
587              break;
588          }
589          off = 0;
590          i++;
591      }
592      return ret;
593  }
594  
595  ssize_t
596  readv(int fd, const struct iovec *iov, int iov_cnt)
597  {
598      return readv_writev(fd, iov, iov_cnt, false);
599  }
600  
601  ssize_t
602  writev(int fd, const struct iovec *iov, int iov_cnt)
603  {
604      return readv_writev(fd, iov, iov_cnt, true);
605  }
606  #endif
607  
608  /*
609   * Make sure data goes on disk, but if possible do not bother to
610   * write out the inode just for timestamp updates.
611   *
612   * Unfortunately even in 2009 many operating systems do not support
613   * fdatasync and have to fall back to fsync.
614   */
615  int qemu_fdatasync(int fd)
616  {
617  #ifdef CONFIG_FDATASYNC
618      return fdatasync(fd);
619  #else
620      return fsync(fd);
621  #endif
622  }
623