xref: /openbmc/qemu/util/osdep.c (revision 9971cbac2f30a89ddb094dc9627d2d16dc6e5875)
1  /*
2   * QEMU low level functions
3   *
4   * Copyright (c) 2003 Fabrice Bellard
5   *
6   * Permission is hereby granted, free of charge, to any person obtaining a copy
7   * of this software and associated documentation files (the "Software"), to deal
8   * in the Software without restriction, including without limitation the rights
9   * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10   * copies of the Software, and to permit persons to whom the Software is
11   * furnished to do so, subject to the following conditions:
12   *
13   * The above copyright notice and this permission notice shall be included in
14   * all copies or substantial portions of the Software.
15   *
16   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19   * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22   * THE SOFTWARE.
23   */
24  #include "qemu/osdep.h"
25  #include "qapi/error.h"
26  #include "qemu/cutils.h"
27  #include "qemu/sockets.h"
28  #include "qemu/error-report.h"
29  #include "qemu/madvise.h"
30  #include "qemu/mprotect.h"
31  #include "qemu/hw-version.h"
32  #include "monitor/monitor.h"
33  
34  static const char *hw_version = QEMU_HW_VERSION;
35  
36  int socket_set_cork(int fd, int v)
37  {
38  #if defined(SOL_TCP) && defined(TCP_CORK)
39      return setsockopt(fd, SOL_TCP, TCP_CORK, &v, sizeof(v));
40  #else
41      return 0;
42  #endif
43  }
44  
45  int socket_set_nodelay(int fd)
46  {
47      int v = 1;
48      return setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &v, sizeof(v));
49  }
50  
51  int qemu_madvise(void *addr, size_t len, int advice)
52  {
53      if (advice == QEMU_MADV_INVALID) {
54          errno = EINVAL;
55          return -1;
56      }
57  #if defined(CONFIG_MADVISE)
58      return madvise(addr, len, advice);
59  #elif defined(CONFIG_POSIX_MADVISE)
60      int rc = posix_madvise(addr, len, advice);
61      if (rc) {
62          errno = rc;
63          return -1;
64      }
65      return 0;
66  #else
67      errno = ENOSYS;
68      return -1;
69  #endif
70  }
71  
72  static int qemu_mprotect__osdep(void *addr, size_t size, int prot)
73  {
74      g_assert(!((uintptr_t)addr & ~qemu_real_host_page_mask()));
75      g_assert(!(size & ~qemu_real_host_page_mask()));
76  
77  #ifdef _WIN32
78      DWORD old_protect;
79  
80      if (!VirtualProtect(addr, size, prot, &old_protect)) {
81          g_autofree gchar *emsg = g_win32_error_message(GetLastError());
82          error_report("%s: VirtualProtect failed: %s", __func__, emsg);
83          return -1;
84      }
85      return 0;
86  #else
87      if (mprotect(addr, size, prot)) {
88          error_report("%s: mprotect failed: %s", __func__, strerror(errno));
89          return -1;
90      }
91      return 0;
92  #endif
93  }
94  
95  int qemu_mprotect_rw(void *addr, size_t size)
96  {
97  #ifdef _WIN32
98      return qemu_mprotect__osdep(addr, size, PAGE_READWRITE);
99  #else
100      return qemu_mprotect__osdep(addr, size, PROT_READ | PROT_WRITE);
101  #endif
102  }
103  
104  int qemu_mprotect_rwx(void *addr, size_t size)
105  {
106  #ifdef _WIN32
107      return qemu_mprotect__osdep(addr, size, PAGE_EXECUTE_READWRITE);
108  #else
109      return qemu_mprotect__osdep(addr, size, PROT_READ | PROT_WRITE | PROT_EXEC);
110  #endif
111  }
112  
113  int qemu_mprotect_none(void *addr, size_t size)
114  {
115  #ifdef _WIN32
116      return qemu_mprotect__osdep(addr, size, PAGE_NOACCESS);
117  #else
118      return qemu_mprotect__osdep(addr, size, PROT_NONE);
119  #endif
120  }
121  
122  #ifndef _WIN32
123  
124  static int fcntl_op_setlk = -1;
125  static int fcntl_op_getlk = -1;
126  
127  /*
128   * Dups an fd and sets the flags
129   */
130  int qemu_dup_flags(int fd, int flags)
131  {
132      int ret;
133      int serrno;
134      int dup_flags;
135  
136      ret = qemu_dup(fd);
137      if (ret == -1) {
138          goto fail;
139      }
140  
141      dup_flags = fcntl(ret, F_GETFL);
142      if (dup_flags == -1) {
143          goto fail;
144      }
145  
146      if ((flags & O_SYNC) != (dup_flags & O_SYNC)) {
147          errno = EINVAL;
148          goto fail;
149      }
150  
151      /* Set/unset flags that we can with fcntl */
152      if (fcntl(ret, F_SETFL, flags) == -1) {
153          goto fail;
154      }
155  
156      /* Truncate the file in the cases that open() would truncate it */
157      if (flags & O_TRUNC ||
158              ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))) {
159          if (ftruncate(ret, 0) == -1) {
160              goto fail;
161          }
162      }
163  
164      return ret;
165  
166  fail:
167      serrno = errno;
168      if (ret != -1) {
169          close(ret);
170      }
171      errno = serrno;
172      return -1;
173  }
174  
175  int qemu_dup(int fd)
176  {
177      int ret;
178  #ifdef F_DUPFD_CLOEXEC
179      ret = fcntl(fd, F_DUPFD_CLOEXEC, 0);
180  #else
181      ret = dup(fd);
182      if (ret != -1) {
183          qemu_set_cloexec(ret);
184      }
185  #endif
186      return ret;
187  }
188  
189  static int qemu_parse_fdset(const char *param)
190  {
191      return qemu_parse_fd(param);
192  }
193  
194  static void qemu_probe_lock_ops(void)
195  {
196      if (fcntl_op_setlk == -1) {
197  #ifdef F_OFD_SETLK
198          int fd;
199          int ret;
200          struct flock fl = {
201              .l_whence = SEEK_SET,
202              .l_start  = 0,
203              .l_len    = 0,
204              .l_type   = F_WRLCK,
205          };
206  
207          fd = open("/dev/null", O_RDWR);
208          if (fd < 0) {
209              fprintf(stderr,
210                      "Failed to open /dev/null for OFD lock probing: %s\n",
211                      strerror(errno));
212              fcntl_op_setlk = F_SETLK;
213              fcntl_op_getlk = F_GETLK;
214              return;
215          }
216          ret = fcntl(fd, F_OFD_GETLK, &fl);
217          close(fd);
218          if (!ret) {
219              fcntl_op_setlk = F_OFD_SETLK;
220              fcntl_op_getlk = F_OFD_GETLK;
221          } else {
222              fcntl_op_setlk = F_SETLK;
223              fcntl_op_getlk = F_GETLK;
224          }
225  #else
226          fcntl_op_setlk = F_SETLK;
227          fcntl_op_getlk = F_GETLK;
228  #endif
229      }
230  }
231  
232  bool qemu_has_ofd_lock(void)
233  {
234      qemu_probe_lock_ops();
235  #ifdef F_OFD_SETLK
236      return fcntl_op_setlk == F_OFD_SETLK;
237  #else
238      return false;
239  #endif
240  }
241  
242  static int qemu_lock_fcntl(int fd, int64_t start, int64_t len, int fl_type)
243  {
244      int ret;
245      struct flock fl = {
246          .l_whence = SEEK_SET,
247          .l_start  = start,
248          .l_len    = len,
249          .l_type   = fl_type,
250      };
251      qemu_probe_lock_ops();
252      ret = RETRY_ON_EINTR(fcntl(fd, fcntl_op_setlk, &fl));
253      return ret == -1 ? -errno : 0;
254  }
255  
256  int qemu_lock_fd(int fd, int64_t start, int64_t len, bool exclusive)
257  {
258      return qemu_lock_fcntl(fd, start, len, exclusive ? F_WRLCK : F_RDLCK);
259  }
260  
261  int qemu_unlock_fd(int fd, int64_t start, int64_t len)
262  {
263      return qemu_lock_fcntl(fd, start, len, F_UNLCK);
264  }
265  
266  int qemu_lock_fd_test(int fd, int64_t start, int64_t len, bool exclusive)
267  {
268      int ret;
269      struct flock fl = {
270          .l_whence = SEEK_SET,
271          .l_start  = start,
272          .l_len    = len,
273          .l_type   = exclusive ? F_WRLCK : F_RDLCK,
274      };
275      qemu_probe_lock_ops();
276      ret = fcntl(fd, fcntl_op_getlk, &fl);
277      if (ret == -1) {
278          return -errno;
279      } else {
280          return fl.l_type == F_UNLCK ? 0 : -EAGAIN;
281      }
282  }
283  #endif
284  
285  bool qemu_has_direct_io(void)
286  {
287  #ifdef O_DIRECT
288      return true;
289  #else
290      return false;
291  #endif
292  }
293  
294  static int qemu_open_cloexec(const char *name, int flags, mode_t mode)
295  {
296      int ret;
297  #ifdef O_CLOEXEC
298      ret = open(name, flags | O_CLOEXEC, mode);
299  #else
300      ret = open(name, flags, mode);
301      if (ret >= 0) {
302          qemu_set_cloexec(ret);
303      }
304  #endif
305      return ret;
306  }
307  
308  /*
309   * Opens a file with FD_CLOEXEC set
310   */
311  static int
312  qemu_open_internal(const char *name, int flags, mode_t mode, Error **errp)
313  {
314      int ret;
315  
316  #ifndef _WIN32
317      const char *fdset_id_str;
318  
319      /* Attempt dup of fd from fd set */
320      if (strstart(name, "/dev/fdset/", &fdset_id_str)) {
321          int64_t fdset_id;
322  
323          fdset_id = qemu_parse_fdset(fdset_id_str);
324          if (fdset_id == -1) {
325              error_setg(errp, "Could not parse fdset %s", name);
326              errno = EINVAL;
327              return -1;
328          }
329  
330          return monitor_fdset_dup_fd_add(fdset_id, flags, errp);
331      }
332  #endif
333  
334      ret = qemu_open_cloexec(name, flags, mode);
335  
336      if (ret == -1) {
337          const char *action = flags & O_CREAT ? "create" : "open";
338  #ifdef O_DIRECT
339          /* Give more helpful error message for O_DIRECT */
340          if (errno == EINVAL && (flags & O_DIRECT)) {
341              ret = open(name, flags & ~O_DIRECT, mode);
342              if (ret != -1) {
343                  close(ret);
344                  error_setg(errp, "Could not %s '%s': "
345                             "filesystem does not support O_DIRECT",
346                             action, name);
347                  errno = EINVAL; /* restore first open()'s errno */
348                  return -1;
349              }
350          }
351  #endif /* O_DIRECT */
352          error_setg_errno(errp, errno, "Could not %s '%s'",
353                           action, name);
354      }
355  
356      return ret;
357  }
358  
359  
360  int qemu_open(const char *name, int flags, Error **errp)
361  {
362      assert(!(flags & O_CREAT));
363  
364      return qemu_open_internal(name, flags, 0, errp);
365  }
366  
367  
368  int qemu_create(const char *name, int flags, mode_t mode, Error **errp)
369  {
370      assert(!(flags & O_CREAT));
371  
372      return qemu_open_internal(name, flags | O_CREAT, mode, errp);
373  }
374  
375  
376  int qemu_open_old(const char *name, int flags, ...)
377  {
378      va_list ap;
379      mode_t mode = 0;
380      int ret;
381  
382      va_start(ap, flags);
383      if (flags & O_CREAT) {
384          mode = va_arg(ap, int);
385      }
386      va_end(ap);
387  
388      ret = qemu_open_internal(name, flags, mode, NULL);
389  
390  #ifdef O_DIRECT
391      if (ret == -1 && errno == EINVAL && (flags & O_DIRECT)) {
392          error_report("file system may not support O_DIRECT");
393          errno = EINVAL; /* in case it was clobbered */
394      }
395  #endif /* O_DIRECT */
396  
397      return ret;
398  }
399  
400  int qemu_close(int fd)
401  {
402      /* Close fd that was dup'd from an fdset */
403      monitor_fdset_dup_fd_remove(fd);
404      return close(fd);
405  }
406  
407  /*
408   * Delete a file from the filesystem, unless the filename is /dev/fdset/...
409   *
410   * Returns: On success, zero is returned.  On error, -1 is returned,
411   * and errno is set appropriately.
412   */
413  int qemu_unlink(const char *name)
414  {
415      if (g_str_has_prefix(name, "/dev/fdset/")) {
416          return 0;
417      }
418  
419      return unlink(name);
420  }
421  
422  /*
423   * A variant of write(2) which handles partial write.
424   *
425   * Return the number of bytes transferred.
426   * Set errno if fewer than `count' bytes are written.
427   *
428   * This function don't work with non-blocking fd's.
429   * Any of the possibilities with non-blocking fd's is bad:
430   *   - return a short write (then name is wrong)
431   *   - busy wait adding (errno == EAGAIN) to the loop
432   */
433  ssize_t qemu_write_full(int fd, const void *buf, size_t count)
434  {
435      ssize_t ret = 0;
436      ssize_t total = 0;
437  
438      while (count) {
439          ret = write(fd, buf, count);
440          if (ret < 0) {
441              if (errno == EINTR)
442                  continue;
443              break;
444          }
445  
446          count -= ret;
447          buf += ret;
448          total += ret;
449      }
450  
451      return total;
452  }
453  
454  /*
455   * Opens a socket with FD_CLOEXEC set
456   */
457  int qemu_socket(int domain, int type, int protocol)
458  {
459      int ret;
460  
461  #ifdef SOCK_CLOEXEC
462      ret = socket(domain, type | SOCK_CLOEXEC, protocol);
463      if (ret != -1 || errno != EINVAL) {
464          return ret;
465      }
466  #endif
467      ret = socket(domain, type, protocol);
468      if (ret >= 0) {
469          qemu_set_cloexec(ret);
470      }
471  
472      return ret;
473  }
474  
475  /*
476   * Accept a connection and set FD_CLOEXEC
477   */
478  int qemu_accept(int s, struct sockaddr *addr, socklen_t *addrlen)
479  {
480      int ret;
481  
482  #ifdef CONFIG_ACCEPT4
483      ret = accept4(s, addr, addrlen, SOCK_CLOEXEC);
484      if (ret != -1 || errno != ENOSYS) {
485          return ret;
486      }
487  #endif
488      ret = accept(s, addr, addrlen);
489      if (ret >= 0) {
490          qemu_set_cloexec(ret);
491      }
492  
493      return ret;
494  }
495  
496  ssize_t qemu_send_full(int s, const void *buf, size_t count)
497  {
498      ssize_t ret = 0;
499      ssize_t total = 0;
500  
501      while (count) {
502          ret = send(s, buf, count, 0);
503          if (ret < 0) {
504              if (errno == EINTR) {
505                  continue;
506              }
507              break;
508          }
509  
510          count -= ret;
511          buf += ret;
512          total += ret;
513      }
514  
515      return total;
516  }
517  
518  void qemu_set_hw_version(const char *version)
519  {
520      hw_version = version;
521  }
522  
523  const char *qemu_hw_version(void)
524  {
525      return hw_version;
526  }
527  
528  #ifdef _WIN32
529  static void socket_cleanup(void)
530  {
531      WSACleanup();
532  }
533  #endif
534  
535  int socket_init(void)
536  {
537  #ifdef _WIN32
538      WSADATA Data;
539      int ret, err;
540  
541      ret = WSAStartup(MAKEWORD(2, 2), &Data);
542      if (ret != 0) {
543          err = WSAGetLastError();
544          fprintf(stderr, "WSAStartup: %d\n", err);
545          return -1;
546      }
547      atexit(socket_cleanup);
548  #endif
549      return 0;
550  }
551  
552  
553  #ifndef CONFIG_IOVEC
554  static ssize_t
555  readv_writev(int fd, const struct iovec *iov, int iov_cnt, bool do_write)
556  {
557      unsigned i = 0;
558      ssize_t ret = 0;
559      ssize_t off = 0;
560      while (i < iov_cnt) {
561          ssize_t r = do_write
562              ? write(fd, iov[i].iov_base + off, iov[i].iov_len - off)
563              : read(fd, iov[i].iov_base + off, iov[i].iov_len - off);
564          if (r > 0) {
565              ret += r;
566              off += r;
567              if (off < iov[i].iov_len) {
568                  continue;
569              }
570          } else if (!r) {
571              break;
572          } else if (errno == EINTR) {
573              continue;
574          } else {
575              /* else it is some "other" error,
576               * only return if there was no data processed. */
577              if (ret == 0) {
578                  ret = -1;
579              }
580              break;
581          }
582          off = 0;
583          i++;
584      }
585      return ret;
586  }
587  
588  ssize_t
589  readv(int fd, const struct iovec *iov, int iov_cnt)
590  {
591      return readv_writev(fd, iov, iov_cnt, false);
592  }
593  
594  ssize_t
595  writev(int fd, const struct iovec *iov, int iov_cnt)
596  {
597      return readv_writev(fd, iov, iov_cnt, true);
598  }
599  #endif
600  
601  /*
602   * Make sure data goes on disk, but if possible do not bother to
603   * write out the inode just for timestamp updates.
604   *
605   * Unfortunately even in 2009 many operating systems do not support
606   * fdatasync and have to fall back to fsync.
607   */
608  int qemu_fdatasync(int fd)
609  {
610  #ifdef CONFIG_FDATASYNC
611      return fdatasync(fd);
612  #else
613      return fsync(fd);
614  #endif
615  }
616