109c434b8SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only 2f87d0fbbSRusty Russell /* 3f87d0fbbSRusty Russell * Helpers for the host side of a virtio ring. 4f87d0fbbSRusty Russell * 5f87d0fbbSRusty Russell * Since these may be in userspace, we use (inline) accessors. 6f87d0fbbSRusty Russell */ 79d1b972fSMark Rutland #include <linux/compiler.h> 8f558a845SDave Jones #include <linux/module.h> 9f87d0fbbSRusty Russell #include <linux/vringh.h> 10f87d0fbbSRusty Russell #include <linux/virtio_ring.h> 11f87d0fbbSRusty Russell #include <linux/kernel.h> 12f87d0fbbSRusty Russell #include <linux/ratelimit.h> 13f87d0fbbSRusty Russell #include <linux/uaccess.h> 14f87d0fbbSRusty Russell #include <linux/slab.h> 15f87d0fbbSRusty Russell #include <linux/export.h> 163302363aSMichael S. Tsirkin #if IS_REACHABLE(CONFIG_VHOST_IOTLB) 179ad9c49cSJason Wang #include <linux/bvec.h> 189ad9c49cSJason Wang #include <linux/highmem.h> 199ad9c49cSJason Wang #include <linux/vhost_iotlb.h> 203302363aSMichael S. Tsirkin #endif 21b9f7ac8cSMichael S. Tsirkin #include <uapi/linux/virtio_config.h> 22f87d0fbbSRusty Russell 23f87d0fbbSRusty Russell static __printf(1,2) __cold void vringh_bad(const char *fmt, ...) 24f87d0fbbSRusty Russell { 25f87d0fbbSRusty Russell static DEFINE_RATELIMIT_STATE(vringh_rs, 26f87d0fbbSRusty Russell DEFAULT_RATELIMIT_INTERVAL, 27f87d0fbbSRusty Russell DEFAULT_RATELIMIT_BURST); 28f87d0fbbSRusty Russell if (__ratelimit(&vringh_rs)) { 29f87d0fbbSRusty Russell va_list ap; 30f87d0fbbSRusty Russell va_start(ap, fmt); 31f87d0fbbSRusty Russell printk(KERN_NOTICE "vringh:"); 32f87d0fbbSRusty Russell vprintk(fmt, ap); 33f87d0fbbSRusty Russell va_end(ap); 34f87d0fbbSRusty Russell } 35f87d0fbbSRusty Russell } 36f87d0fbbSRusty Russell 37f87d0fbbSRusty Russell /* Returns vring->num if empty, -ve on error. */ 38f87d0fbbSRusty Russell static inline int __vringh_get_head(const struct vringh *vrh, 39b9f7ac8cSMichael S. Tsirkin int (*getu16)(const struct vringh *vrh, 40b9f7ac8cSMichael S. Tsirkin u16 *val, const __virtio16 *p), 41f87d0fbbSRusty Russell u16 *last_avail_idx) 42f87d0fbbSRusty Russell { 43f87d0fbbSRusty Russell u16 avail_idx, i, head; 44f87d0fbbSRusty Russell int err; 45f87d0fbbSRusty Russell 46b9f7ac8cSMichael S. Tsirkin err = getu16(vrh, &avail_idx, &vrh->vring.avail->idx); 47f87d0fbbSRusty Russell if (err) { 48f87d0fbbSRusty Russell vringh_bad("Failed to access avail idx at %p", 49f87d0fbbSRusty Russell &vrh->vring.avail->idx); 50f87d0fbbSRusty Russell return err; 51f87d0fbbSRusty Russell } 52f87d0fbbSRusty Russell 53f87d0fbbSRusty Russell if (*last_avail_idx == avail_idx) 54f87d0fbbSRusty Russell return vrh->vring.num; 55f87d0fbbSRusty Russell 56f87d0fbbSRusty Russell /* Only get avail ring entries after they have been exposed by guest. */ 57f87d0fbbSRusty Russell virtio_rmb(vrh->weak_barriers); 58f87d0fbbSRusty Russell 59f87d0fbbSRusty Russell i = *last_avail_idx & (vrh->vring.num - 1); 60f87d0fbbSRusty Russell 61b9f7ac8cSMichael S. Tsirkin err = getu16(vrh, &head, &vrh->vring.avail->ring[i]); 62f87d0fbbSRusty Russell if (err) { 63f87d0fbbSRusty Russell vringh_bad("Failed to read head: idx %d address %p", 64f87d0fbbSRusty Russell *last_avail_idx, &vrh->vring.avail->ring[i]); 65f87d0fbbSRusty Russell return err; 66f87d0fbbSRusty Russell } 67f87d0fbbSRusty Russell 68f87d0fbbSRusty Russell if (head >= vrh->vring.num) { 69f87d0fbbSRusty Russell vringh_bad("Guest says index %u > %u is available", 70f87d0fbbSRusty Russell head, vrh->vring.num); 71f87d0fbbSRusty Russell return -EINVAL; 72f87d0fbbSRusty Russell } 73f87d0fbbSRusty Russell 74f87d0fbbSRusty Russell (*last_avail_idx)++; 75f87d0fbbSRusty Russell return head; 76f87d0fbbSRusty Russell } 77f87d0fbbSRusty Russell 78*b8c06ad4SStefano Garzarella /** 79*b8c06ad4SStefano Garzarella * vringh_kiov_advance - skip bytes from vring_kiov 80*b8c06ad4SStefano Garzarella * @iov: an iov passed to vringh_getdesc_*() (updated as we consume) 81*b8c06ad4SStefano Garzarella * @len: the maximum length to advance 82*b8c06ad4SStefano Garzarella */ 83*b8c06ad4SStefano Garzarella void vringh_kiov_advance(struct vringh_kiov *iov, size_t len) 84*b8c06ad4SStefano Garzarella { 85*b8c06ad4SStefano Garzarella while (len && iov->i < iov->used) { 86*b8c06ad4SStefano Garzarella size_t partlen = min(iov->iov[iov->i].iov_len, len); 87*b8c06ad4SStefano Garzarella 88*b8c06ad4SStefano Garzarella iov->consumed += partlen; 89*b8c06ad4SStefano Garzarella iov->iov[iov->i].iov_len -= partlen; 90*b8c06ad4SStefano Garzarella iov->iov[iov->i].iov_base += partlen; 91*b8c06ad4SStefano Garzarella 92*b8c06ad4SStefano Garzarella if (!iov->iov[iov->i].iov_len) { 93*b8c06ad4SStefano Garzarella /* Fix up old iov element then increment. */ 94*b8c06ad4SStefano Garzarella iov->iov[iov->i].iov_len = iov->consumed; 95*b8c06ad4SStefano Garzarella iov->iov[iov->i].iov_base -= iov->consumed; 96*b8c06ad4SStefano Garzarella 97*b8c06ad4SStefano Garzarella iov->consumed = 0; 98*b8c06ad4SStefano Garzarella iov->i++; 99*b8c06ad4SStefano Garzarella } 100*b8c06ad4SStefano Garzarella 101*b8c06ad4SStefano Garzarella len -= partlen; 102*b8c06ad4SStefano Garzarella } 103*b8c06ad4SStefano Garzarella } 104*b8c06ad4SStefano Garzarella EXPORT_SYMBOL(vringh_kiov_advance); 105*b8c06ad4SStefano Garzarella 106f87d0fbbSRusty Russell /* Copy some bytes to/from the iovec. Returns num copied. */ 1079ad9c49cSJason Wang static inline ssize_t vringh_iov_xfer(struct vringh *vrh, 1089ad9c49cSJason Wang struct vringh_kiov *iov, 109f87d0fbbSRusty Russell void *ptr, size_t len, 1109ad9c49cSJason Wang int (*xfer)(const struct vringh *vrh, 1119ad9c49cSJason Wang void *addr, void *ptr, 112f87d0fbbSRusty Russell size_t len)) 113f87d0fbbSRusty Russell { 114f87d0fbbSRusty Russell int err, done = 0; 115f87d0fbbSRusty Russell 116f87d0fbbSRusty Russell while (len && iov->i < iov->used) { 117f87d0fbbSRusty Russell size_t partlen; 118f87d0fbbSRusty Russell 119f87d0fbbSRusty Russell partlen = min(iov->iov[iov->i].iov_len, len); 1209ad9c49cSJason Wang err = xfer(vrh, iov->iov[iov->i].iov_base, ptr, partlen); 121f87d0fbbSRusty Russell if (err) 122f87d0fbbSRusty Russell return err; 123f87d0fbbSRusty Russell done += partlen; 124f87d0fbbSRusty Russell len -= partlen; 125f87d0fbbSRusty Russell ptr += partlen; 126f87d0fbbSRusty Russell 127*b8c06ad4SStefano Garzarella vringh_kiov_advance(iov, partlen); 128f87d0fbbSRusty Russell } 129f87d0fbbSRusty Russell return done; 130f87d0fbbSRusty Russell } 131f87d0fbbSRusty Russell 132f87d0fbbSRusty Russell /* May reduce *len if range is shorter. */ 133f87d0fbbSRusty Russell static inline bool range_check(struct vringh *vrh, u64 addr, size_t *len, 134f87d0fbbSRusty Russell struct vringh_range *range, 135f87d0fbbSRusty Russell bool (*getrange)(struct vringh *, 136f87d0fbbSRusty Russell u64, struct vringh_range *)) 137f87d0fbbSRusty Russell { 138f87d0fbbSRusty Russell if (addr < range->start || addr > range->end_incl) { 139f87d0fbbSRusty Russell if (!getrange(vrh, addr, range)) 140f87d0fbbSRusty Russell return false; 141f87d0fbbSRusty Russell } 142f87d0fbbSRusty Russell BUG_ON(addr < range->start || addr > range->end_incl); 143f87d0fbbSRusty Russell 144f87d0fbbSRusty Russell /* To end of memory? */ 145f87d0fbbSRusty Russell if (unlikely(addr + *len == 0)) { 146f87d0fbbSRusty Russell if (range->end_incl == -1ULL) 147f87d0fbbSRusty Russell return true; 148f87d0fbbSRusty Russell goto truncate; 149f87d0fbbSRusty Russell } 150f87d0fbbSRusty Russell 151f87d0fbbSRusty Russell /* Otherwise, don't wrap. */ 152f87d0fbbSRusty Russell if (addr + *len < addr) { 153f87d0fbbSRusty Russell vringh_bad("Wrapping descriptor %zu@0x%llx", 154f87d0fbbSRusty Russell *len, (unsigned long long)addr); 155f87d0fbbSRusty Russell return false; 156f87d0fbbSRusty Russell } 157f87d0fbbSRusty Russell 158f87d0fbbSRusty Russell if (unlikely(addr + *len - 1 > range->end_incl)) 159f87d0fbbSRusty Russell goto truncate; 160f87d0fbbSRusty Russell return true; 161f87d0fbbSRusty Russell 162f87d0fbbSRusty Russell truncate: 163f87d0fbbSRusty Russell *len = range->end_incl + 1 - addr; 164f87d0fbbSRusty Russell return true; 165f87d0fbbSRusty Russell } 166f87d0fbbSRusty Russell 167f87d0fbbSRusty Russell static inline bool no_range_check(struct vringh *vrh, u64 addr, size_t *len, 168f87d0fbbSRusty Russell struct vringh_range *range, 169f87d0fbbSRusty Russell bool (*getrange)(struct vringh *, 170f87d0fbbSRusty Russell u64, struct vringh_range *)) 171f87d0fbbSRusty Russell { 172f87d0fbbSRusty Russell return true; 173f87d0fbbSRusty Russell } 174f87d0fbbSRusty Russell 175f87d0fbbSRusty Russell /* No reason for this code to be inline. */ 176b9f7ac8cSMichael S. Tsirkin static int move_to_indirect(const struct vringh *vrh, 177b9f7ac8cSMichael S. Tsirkin int *up_next, u16 *i, void *addr, 178f87d0fbbSRusty Russell const struct vring_desc *desc, 179f87d0fbbSRusty Russell struct vring_desc **descs, int *desc_max) 180f87d0fbbSRusty Russell { 181b9f7ac8cSMichael S. Tsirkin u32 len; 182b9f7ac8cSMichael S. Tsirkin 183f87d0fbbSRusty Russell /* Indirect tables can't have indirect. */ 184f87d0fbbSRusty Russell if (*up_next != -1) { 185f87d0fbbSRusty Russell vringh_bad("Multilevel indirect %u->%u", *up_next, *i); 186f87d0fbbSRusty Russell return -EINVAL; 187f87d0fbbSRusty Russell } 188f87d0fbbSRusty Russell 189b9f7ac8cSMichael S. Tsirkin len = vringh32_to_cpu(vrh, desc->len); 190b9f7ac8cSMichael S. Tsirkin if (unlikely(len % sizeof(struct vring_desc))) { 191f87d0fbbSRusty Russell vringh_bad("Strange indirect len %u", desc->len); 192f87d0fbbSRusty Russell return -EINVAL; 193f87d0fbbSRusty Russell } 194f87d0fbbSRusty Russell 195f87d0fbbSRusty Russell /* We will check this when we follow it! */ 196b9f7ac8cSMichael S. Tsirkin if (desc->flags & cpu_to_vringh16(vrh, VRING_DESC_F_NEXT)) 197b9f7ac8cSMichael S. Tsirkin *up_next = vringh16_to_cpu(vrh, desc->next); 198f87d0fbbSRusty Russell else 199f87d0fbbSRusty Russell *up_next = -2; 200f87d0fbbSRusty Russell *descs = addr; 201b9f7ac8cSMichael S. Tsirkin *desc_max = len / sizeof(struct vring_desc); 202f87d0fbbSRusty Russell 203f87d0fbbSRusty Russell /* Now, start at the first indirect. */ 204f87d0fbbSRusty Russell *i = 0; 205f87d0fbbSRusty Russell return 0; 206f87d0fbbSRusty Russell } 207f87d0fbbSRusty Russell 208f87d0fbbSRusty Russell static int resize_iovec(struct vringh_kiov *iov, gfp_t gfp) 209f87d0fbbSRusty Russell { 210f87d0fbbSRusty Russell struct kvec *new; 211f87d0fbbSRusty Russell unsigned int flag, new_num = (iov->max_num & ~VRINGH_IOV_ALLOCATED) * 2; 212f87d0fbbSRusty Russell 213f87d0fbbSRusty Russell if (new_num < 8) 214f87d0fbbSRusty Russell new_num = 8; 215f87d0fbbSRusty Russell 216f87d0fbbSRusty Russell flag = (iov->max_num & VRINGH_IOV_ALLOCATED); 217f87d0fbbSRusty Russell if (flag) 2183a999748SBartosz Golaszewski new = krealloc_array(iov->iov, new_num, 2193a999748SBartosz Golaszewski sizeof(struct iovec), gfp); 220f87d0fbbSRusty Russell else { 2216da2ec56SKees Cook new = kmalloc_array(new_num, sizeof(struct iovec), gfp); 222f87d0fbbSRusty Russell if (new) { 223f87d0fbbSRusty Russell memcpy(new, iov->iov, 224f87d0fbbSRusty Russell iov->max_num * sizeof(struct iovec)); 225f87d0fbbSRusty Russell flag = VRINGH_IOV_ALLOCATED; 226f87d0fbbSRusty Russell } 227f87d0fbbSRusty Russell } 228f87d0fbbSRusty Russell if (!new) 229f87d0fbbSRusty Russell return -ENOMEM; 230f87d0fbbSRusty Russell iov->iov = new; 231f87d0fbbSRusty Russell iov->max_num = (new_num | flag); 232f87d0fbbSRusty Russell return 0; 233f87d0fbbSRusty Russell } 234f87d0fbbSRusty Russell 235f87d0fbbSRusty Russell static u16 __cold return_from_indirect(const struct vringh *vrh, int *up_next, 236f87d0fbbSRusty Russell struct vring_desc **descs, int *desc_max) 237f87d0fbbSRusty Russell { 238f87d0fbbSRusty Russell u16 i = *up_next; 239f87d0fbbSRusty Russell 240f87d0fbbSRusty Russell *up_next = -1; 241f87d0fbbSRusty Russell *descs = vrh->vring.desc; 242f87d0fbbSRusty Russell *desc_max = vrh->vring.num; 243f87d0fbbSRusty Russell return i; 244f87d0fbbSRusty Russell } 245f87d0fbbSRusty Russell 246f87d0fbbSRusty Russell static int slow_copy(struct vringh *vrh, void *dst, const void *src, 247f87d0fbbSRusty Russell bool (*rcheck)(struct vringh *vrh, u64 addr, size_t *len, 248f87d0fbbSRusty Russell struct vringh_range *range, 249f87d0fbbSRusty Russell bool (*getrange)(struct vringh *vrh, 250f87d0fbbSRusty Russell u64, 251f87d0fbbSRusty Russell struct vringh_range *)), 252f87d0fbbSRusty Russell bool (*getrange)(struct vringh *vrh, 253f87d0fbbSRusty Russell u64 addr, 254f87d0fbbSRusty Russell struct vringh_range *r), 255f87d0fbbSRusty Russell struct vringh_range *range, 2569ad9c49cSJason Wang int (*copy)(const struct vringh *vrh, 2579ad9c49cSJason Wang void *dst, const void *src, size_t len)) 258f87d0fbbSRusty Russell { 259f87d0fbbSRusty Russell size_t part, len = sizeof(struct vring_desc); 260f87d0fbbSRusty Russell 261f87d0fbbSRusty Russell do { 262f87d0fbbSRusty Russell u64 addr; 263f87d0fbbSRusty Russell int err; 264f87d0fbbSRusty Russell 265f87d0fbbSRusty Russell part = len; 266f87d0fbbSRusty Russell addr = (u64)(unsigned long)src - range->offset; 267f87d0fbbSRusty Russell 268f87d0fbbSRusty Russell if (!rcheck(vrh, addr, &part, range, getrange)) 269f87d0fbbSRusty Russell return -EINVAL; 270f87d0fbbSRusty Russell 2719ad9c49cSJason Wang err = copy(vrh, dst, src, part); 272f87d0fbbSRusty Russell if (err) 273f87d0fbbSRusty Russell return err; 274f87d0fbbSRusty Russell 275f87d0fbbSRusty Russell dst += part; 276f87d0fbbSRusty Russell src += part; 277f87d0fbbSRusty Russell len -= part; 278f87d0fbbSRusty Russell } while (len); 279f87d0fbbSRusty Russell return 0; 280f87d0fbbSRusty Russell } 281f87d0fbbSRusty Russell 282f87d0fbbSRusty Russell static inline int 283f87d0fbbSRusty Russell __vringh_iov(struct vringh *vrh, u16 i, 284f87d0fbbSRusty Russell struct vringh_kiov *riov, 285f87d0fbbSRusty Russell struct vringh_kiov *wiov, 286f87d0fbbSRusty Russell bool (*rcheck)(struct vringh *vrh, u64 addr, size_t *len, 287f87d0fbbSRusty Russell struct vringh_range *range, 288f87d0fbbSRusty Russell bool (*getrange)(struct vringh *, u64, 289f87d0fbbSRusty Russell struct vringh_range *)), 290f87d0fbbSRusty Russell bool (*getrange)(struct vringh *, u64, struct vringh_range *), 291f87d0fbbSRusty Russell gfp_t gfp, 2929ad9c49cSJason Wang int (*copy)(const struct vringh *vrh, 2939ad9c49cSJason Wang void *dst, const void *src, size_t len)) 294f87d0fbbSRusty Russell { 295f87d0fbbSRusty Russell int err, count = 0, up_next, desc_max; 296f87d0fbbSRusty Russell struct vring_desc desc, *descs; 297f87d0fbbSRusty Russell struct vringh_range range = { -1ULL, 0 }, slowrange; 298f87d0fbbSRusty Russell bool slow = false; 299f87d0fbbSRusty Russell 300f87d0fbbSRusty Russell /* We start traversing vring's descriptor table. */ 301f87d0fbbSRusty Russell descs = vrh->vring.desc; 302f87d0fbbSRusty Russell desc_max = vrh->vring.num; 303f87d0fbbSRusty Russell up_next = -1; 304f87d0fbbSRusty Russell 3055745bcfbSStefano Garzarella /* You must want something! */ 3065745bcfbSStefano Garzarella if (WARN_ON(!riov && !wiov)) 3075745bcfbSStefano Garzarella return -EINVAL; 3085745bcfbSStefano Garzarella 309f87d0fbbSRusty Russell if (riov) 310bbc2c372SStefano Garzarella riov->i = riov->used = riov->consumed = 0; 3115745bcfbSStefano Garzarella if (wiov) 312bbc2c372SStefano Garzarella wiov->i = wiov->used = wiov->consumed = 0; 313f87d0fbbSRusty Russell 314f87d0fbbSRusty Russell for (;;) { 315f87d0fbbSRusty Russell void *addr; 316f87d0fbbSRusty Russell struct vringh_kiov *iov; 317f87d0fbbSRusty Russell size_t len; 318f87d0fbbSRusty Russell 319f87d0fbbSRusty Russell if (unlikely(slow)) 320f87d0fbbSRusty Russell err = slow_copy(vrh, &desc, &descs[i], rcheck, getrange, 321f87d0fbbSRusty Russell &slowrange, copy); 322f87d0fbbSRusty Russell else 3239ad9c49cSJason Wang err = copy(vrh, &desc, &descs[i], sizeof(desc)); 324f87d0fbbSRusty Russell if (unlikely(err)) 325f87d0fbbSRusty Russell goto fail; 326f87d0fbbSRusty Russell 327b9f7ac8cSMichael S. Tsirkin if (unlikely(desc.flags & 328b9f7ac8cSMichael S. Tsirkin cpu_to_vringh16(vrh, VRING_DESC_F_INDIRECT))) { 329b9f7ac8cSMichael S. Tsirkin u64 a = vringh64_to_cpu(vrh, desc.addr); 330b9f7ac8cSMichael S. Tsirkin 331f87d0fbbSRusty Russell /* Make sure it's OK, and get offset. */ 332b9f7ac8cSMichael S. Tsirkin len = vringh32_to_cpu(vrh, desc.len); 333b9f7ac8cSMichael S. Tsirkin if (!rcheck(vrh, a, &len, &range, getrange)) { 334f87d0fbbSRusty Russell err = -EINVAL; 335f87d0fbbSRusty Russell goto fail; 336f87d0fbbSRusty Russell } 337f87d0fbbSRusty Russell 338b9f7ac8cSMichael S. Tsirkin if (unlikely(len != vringh32_to_cpu(vrh, desc.len))) { 339f87d0fbbSRusty Russell slow = true; 340f87d0fbbSRusty Russell /* We need to save this range to use offset */ 341f87d0fbbSRusty Russell slowrange = range; 342f87d0fbbSRusty Russell } 343f87d0fbbSRusty Russell 344b9f7ac8cSMichael S. Tsirkin addr = (void *)(long)(a + range.offset); 345b9f7ac8cSMichael S. Tsirkin err = move_to_indirect(vrh, &up_next, &i, addr, &desc, 346f87d0fbbSRusty Russell &descs, &desc_max); 347f87d0fbbSRusty Russell if (err) 348f87d0fbbSRusty Russell goto fail; 349f87d0fbbSRusty Russell continue; 350f87d0fbbSRusty Russell } 351f87d0fbbSRusty Russell 352f87d0fbbSRusty Russell if (count++ == vrh->vring.num) { 353f87d0fbbSRusty Russell vringh_bad("Descriptor loop in %p", descs); 354f87d0fbbSRusty Russell err = -ELOOP; 355f87d0fbbSRusty Russell goto fail; 356f87d0fbbSRusty Russell } 357f87d0fbbSRusty Russell 358b9f7ac8cSMichael S. Tsirkin if (desc.flags & cpu_to_vringh16(vrh, VRING_DESC_F_WRITE)) 359f87d0fbbSRusty Russell iov = wiov; 360f87d0fbbSRusty Russell else { 361f87d0fbbSRusty Russell iov = riov; 362f87d0fbbSRusty Russell if (unlikely(wiov && wiov->i)) { 363f87d0fbbSRusty Russell vringh_bad("Readable desc %p after writable", 364f87d0fbbSRusty Russell &descs[i]); 365f87d0fbbSRusty Russell err = -EINVAL; 366f87d0fbbSRusty Russell goto fail; 367f87d0fbbSRusty Russell } 368f87d0fbbSRusty Russell } 369f87d0fbbSRusty Russell 370f87d0fbbSRusty Russell if (!iov) { 371f87d0fbbSRusty Russell vringh_bad("Unexpected %s desc", 372f87d0fbbSRusty Russell !wiov ? "writable" : "readable"); 373f87d0fbbSRusty Russell err = -EPROTO; 374f87d0fbbSRusty Russell goto fail; 375f87d0fbbSRusty Russell } 376f87d0fbbSRusty Russell 377f87d0fbbSRusty Russell again: 378f87d0fbbSRusty Russell /* Make sure it's OK, and get offset. */ 379b9f7ac8cSMichael S. Tsirkin len = vringh32_to_cpu(vrh, desc.len); 380b9f7ac8cSMichael S. Tsirkin if (!rcheck(vrh, vringh64_to_cpu(vrh, desc.addr), &len, &range, 381b9f7ac8cSMichael S. Tsirkin getrange)) { 382f87d0fbbSRusty Russell err = -EINVAL; 383f87d0fbbSRusty Russell goto fail; 384f87d0fbbSRusty Russell } 385b9f7ac8cSMichael S. Tsirkin addr = (void *)(unsigned long)(vringh64_to_cpu(vrh, desc.addr) + 386b9f7ac8cSMichael S. Tsirkin range.offset); 387f87d0fbbSRusty Russell 388f87d0fbbSRusty Russell if (unlikely(iov->used == (iov->max_num & ~VRINGH_IOV_ALLOCATED))) { 389f87d0fbbSRusty Russell err = resize_iovec(iov, gfp); 390f87d0fbbSRusty Russell if (err) 391f87d0fbbSRusty Russell goto fail; 392f87d0fbbSRusty Russell } 393f87d0fbbSRusty Russell 394f87d0fbbSRusty Russell iov->iov[iov->used].iov_base = addr; 395f87d0fbbSRusty Russell iov->iov[iov->used].iov_len = len; 396f87d0fbbSRusty Russell iov->used++; 397f87d0fbbSRusty Russell 398b9f7ac8cSMichael S. Tsirkin if (unlikely(len != vringh32_to_cpu(vrh, desc.len))) { 399b9f7ac8cSMichael S. Tsirkin desc.len = cpu_to_vringh32(vrh, 400b9f7ac8cSMichael S. Tsirkin vringh32_to_cpu(vrh, desc.len) - len); 401b9f7ac8cSMichael S. Tsirkin desc.addr = cpu_to_vringh64(vrh, 402b9f7ac8cSMichael S. Tsirkin vringh64_to_cpu(vrh, desc.addr) + len); 403f87d0fbbSRusty Russell goto again; 404f87d0fbbSRusty Russell } 405f87d0fbbSRusty Russell 406b9f7ac8cSMichael S. Tsirkin if (desc.flags & cpu_to_vringh16(vrh, VRING_DESC_F_NEXT)) { 407b9f7ac8cSMichael S. Tsirkin i = vringh16_to_cpu(vrh, desc.next); 408f87d0fbbSRusty Russell } else { 409f87d0fbbSRusty Russell /* Just in case we need to finish traversing above. */ 410f87d0fbbSRusty Russell if (unlikely(up_next > 0)) { 411f87d0fbbSRusty Russell i = return_from_indirect(vrh, &up_next, 412f87d0fbbSRusty Russell &descs, &desc_max); 413f87d0fbbSRusty Russell slow = false; 414f87d0fbbSRusty Russell } else 415f87d0fbbSRusty Russell break; 416f87d0fbbSRusty Russell } 417f87d0fbbSRusty Russell 418f87d0fbbSRusty Russell if (i >= desc_max) { 419f87d0fbbSRusty Russell vringh_bad("Chained index %u > %u", i, desc_max); 420f87d0fbbSRusty Russell err = -EINVAL; 421f87d0fbbSRusty Russell goto fail; 422f87d0fbbSRusty Russell } 423f87d0fbbSRusty Russell } 424f87d0fbbSRusty Russell 425f87d0fbbSRusty Russell return 0; 426f87d0fbbSRusty Russell 427f87d0fbbSRusty Russell fail: 428f87d0fbbSRusty Russell return err; 429f87d0fbbSRusty Russell } 430f87d0fbbSRusty Russell 431f87d0fbbSRusty Russell static inline int __vringh_complete(struct vringh *vrh, 432f87d0fbbSRusty Russell const struct vring_used_elem *used, 433f87d0fbbSRusty Russell unsigned int num_used, 434b9f7ac8cSMichael S. Tsirkin int (*putu16)(const struct vringh *vrh, 435b9f7ac8cSMichael S. Tsirkin __virtio16 *p, u16 val), 4369ad9c49cSJason Wang int (*putused)(const struct vringh *vrh, 4379ad9c49cSJason Wang struct vring_used_elem *dst, 438f87d0fbbSRusty Russell const struct vring_used_elem 439f87d0fbbSRusty Russell *src, unsigned num)) 440f87d0fbbSRusty Russell { 441f87d0fbbSRusty Russell struct vring_used *used_ring; 442f87d0fbbSRusty Russell int err; 443f87d0fbbSRusty Russell u16 used_idx, off; 444f87d0fbbSRusty Russell 445f87d0fbbSRusty Russell used_ring = vrh->vring.used; 446f87d0fbbSRusty Russell used_idx = vrh->last_used_idx + vrh->completed; 447f87d0fbbSRusty Russell 448f87d0fbbSRusty Russell off = used_idx % vrh->vring.num; 449f87d0fbbSRusty Russell 450f87d0fbbSRusty Russell /* Compiler knows num_used == 1 sometimes, hence extra check */ 451f87d0fbbSRusty Russell if (num_used > 1 && unlikely(off + num_used >= vrh->vring.num)) { 452f87d0fbbSRusty Russell u16 part = vrh->vring.num - off; 4539ad9c49cSJason Wang err = putused(vrh, &used_ring->ring[off], used, part); 454f87d0fbbSRusty Russell if (!err) 4559ad9c49cSJason Wang err = putused(vrh, &used_ring->ring[0], used + part, 456f87d0fbbSRusty Russell num_used - part); 457f87d0fbbSRusty Russell } else 4589ad9c49cSJason Wang err = putused(vrh, &used_ring->ring[off], used, num_used); 459f87d0fbbSRusty Russell 460f87d0fbbSRusty Russell if (err) { 461f87d0fbbSRusty Russell vringh_bad("Failed to write %u used entries %u at %p", 462f87d0fbbSRusty Russell num_used, off, &used_ring->ring[off]); 463f87d0fbbSRusty Russell return err; 464f87d0fbbSRusty Russell } 465f87d0fbbSRusty Russell 466f87d0fbbSRusty Russell /* Make sure buffer is written before we update index. */ 467f87d0fbbSRusty Russell virtio_wmb(vrh->weak_barriers); 468f87d0fbbSRusty Russell 469b9f7ac8cSMichael S. Tsirkin err = putu16(vrh, &vrh->vring.used->idx, used_idx + num_used); 470f87d0fbbSRusty Russell if (err) { 471f87d0fbbSRusty Russell vringh_bad("Failed to update used index at %p", 472f87d0fbbSRusty Russell &vrh->vring.used->idx); 473f87d0fbbSRusty Russell return err; 474f87d0fbbSRusty Russell } 475f87d0fbbSRusty Russell 476f87d0fbbSRusty Russell vrh->completed += num_used; 477f87d0fbbSRusty Russell return 0; 478f87d0fbbSRusty Russell } 479f87d0fbbSRusty Russell 480f87d0fbbSRusty Russell 481f87d0fbbSRusty Russell static inline int __vringh_need_notify(struct vringh *vrh, 482b9f7ac8cSMichael S. Tsirkin int (*getu16)(const struct vringh *vrh, 483b9f7ac8cSMichael S. Tsirkin u16 *val, 484b9f7ac8cSMichael S. Tsirkin const __virtio16 *p)) 485f87d0fbbSRusty Russell { 486f87d0fbbSRusty Russell bool notify; 487f87d0fbbSRusty Russell u16 used_event; 488f87d0fbbSRusty Russell int err; 489f87d0fbbSRusty Russell 490f87d0fbbSRusty Russell /* Flush out used index update. This is paired with the 491f87d0fbbSRusty Russell * barrier that the Guest executes when enabling 492f87d0fbbSRusty Russell * interrupts. */ 493f87d0fbbSRusty Russell virtio_mb(vrh->weak_barriers); 494f87d0fbbSRusty Russell 495f87d0fbbSRusty Russell /* Old-style, without event indices. */ 496f87d0fbbSRusty Russell if (!vrh->event_indices) { 497f87d0fbbSRusty Russell u16 flags; 498b9f7ac8cSMichael S. Tsirkin err = getu16(vrh, &flags, &vrh->vring.avail->flags); 499f87d0fbbSRusty Russell if (err) { 500f87d0fbbSRusty Russell vringh_bad("Failed to get flags at %p", 501f87d0fbbSRusty Russell &vrh->vring.avail->flags); 502f87d0fbbSRusty Russell return err; 503f87d0fbbSRusty Russell } 504f87d0fbbSRusty Russell return (!(flags & VRING_AVAIL_F_NO_INTERRUPT)); 505f87d0fbbSRusty Russell } 506f87d0fbbSRusty Russell 507f87d0fbbSRusty Russell /* Modern: we know when other side wants to know. */ 508b9f7ac8cSMichael S. Tsirkin err = getu16(vrh, &used_event, &vring_used_event(&vrh->vring)); 509f87d0fbbSRusty Russell if (err) { 510f87d0fbbSRusty Russell vringh_bad("Failed to get used event idx at %p", 511f87d0fbbSRusty Russell &vring_used_event(&vrh->vring)); 512f87d0fbbSRusty Russell return err; 513f87d0fbbSRusty Russell } 514f87d0fbbSRusty Russell 515f87d0fbbSRusty Russell /* Just in case we added so many that we wrap. */ 516f87d0fbbSRusty Russell if (unlikely(vrh->completed > 0xffff)) 517f87d0fbbSRusty Russell notify = true; 518f87d0fbbSRusty Russell else 519f87d0fbbSRusty Russell notify = vring_need_event(used_event, 520f87d0fbbSRusty Russell vrh->last_used_idx + vrh->completed, 521f87d0fbbSRusty Russell vrh->last_used_idx); 522f87d0fbbSRusty Russell 523f87d0fbbSRusty Russell vrh->last_used_idx += vrh->completed; 524f87d0fbbSRusty Russell vrh->completed = 0; 525f87d0fbbSRusty Russell return notify; 526f87d0fbbSRusty Russell } 527f87d0fbbSRusty Russell 528f87d0fbbSRusty Russell static inline bool __vringh_notify_enable(struct vringh *vrh, 529b9f7ac8cSMichael S. Tsirkin int (*getu16)(const struct vringh *vrh, 530b9f7ac8cSMichael S. Tsirkin u16 *val, const __virtio16 *p), 531b9f7ac8cSMichael S. Tsirkin int (*putu16)(const struct vringh *vrh, 532b9f7ac8cSMichael S. Tsirkin __virtio16 *p, u16 val)) 533f87d0fbbSRusty Russell { 534f87d0fbbSRusty Russell u16 avail; 535f87d0fbbSRusty Russell 536f87d0fbbSRusty Russell if (!vrh->event_indices) { 537f87d0fbbSRusty Russell /* Old-school; update flags. */ 538b9f7ac8cSMichael S. Tsirkin if (putu16(vrh, &vrh->vring.used->flags, 0) != 0) { 539f87d0fbbSRusty Russell vringh_bad("Clearing used flags %p", 540f87d0fbbSRusty Russell &vrh->vring.used->flags); 541f87d0fbbSRusty Russell return true; 542f87d0fbbSRusty Russell } 543f87d0fbbSRusty Russell } else { 544b9f7ac8cSMichael S. Tsirkin if (putu16(vrh, &vring_avail_event(&vrh->vring), 545f87d0fbbSRusty Russell vrh->last_avail_idx) != 0) { 546f87d0fbbSRusty Russell vringh_bad("Updating avail event index %p", 547f87d0fbbSRusty Russell &vring_avail_event(&vrh->vring)); 548f87d0fbbSRusty Russell return true; 549f87d0fbbSRusty Russell } 550f87d0fbbSRusty Russell } 551f87d0fbbSRusty Russell 552f87d0fbbSRusty Russell /* They could have slipped one in as we were doing that: make 553f87d0fbbSRusty Russell * sure it's written, then check again. */ 554f87d0fbbSRusty Russell virtio_mb(vrh->weak_barriers); 555f87d0fbbSRusty Russell 556b9f7ac8cSMichael S. Tsirkin if (getu16(vrh, &avail, &vrh->vring.avail->idx) != 0) { 557f87d0fbbSRusty Russell vringh_bad("Failed to check avail idx at %p", 558f87d0fbbSRusty Russell &vrh->vring.avail->idx); 559f87d0fbbSRusty Russell return true; 560f87d0fbbSRusty Russell } 561f87d0fbbSRusty Russell 562f87d0fbbSRusty Russell /* This is unlikely, so we just leave notifications enabled 563f87d0fbbSRusty Russell * (if we're using event_indices, we'll only get one 564f87d0fbbSRusty Russell * notification anyway). */ 565f87d0fbbSRusty Russell return avail == vrh->last_avail_idx; 566f87d0fbbSRusty Russell } 567f87d0fbbSRusty Russell 568f87d0fbbSRusty Russell static inline void __vringh_notify_disable(struct vringh *vrh, 569b9f7ac8cSMichael S. Tsirkin int (*putu16)(const struct vringh *vrh, 570b9f7ac8cSMichael S. Tsirkin __virtio16 *p, u16 val)) 571f87d0fbbSRusty Russell { 572f87d0fbbSRusty Russell if (!vrh->event_indices) { 573f87d0fbbSRusty Russell /* Old-school; update flags. */ 574b9f7ac8cSMichael S. Tsirkin if (putu16(vrh, &vrh->vring.used->flags, 575b9f7ac8cSMichael S. Tsirkin VRING_USED_F_NO_NOTIFY)) { 576f87d0fbbSRusty Russell vringh_bad("Setting used flags %p", 577f87d0fbbSRusty Russell &vrh->vring.used->flags); 578f87d0fbbSRusty Russell } 579f87d0fbbSRusty Russell } 580f87d0fbbSRusty Russell } 581f87d0fbbSRusty Russell 582f87d0fbbSRusty Russell /* Userspace access helpers: in this case, addresses are really userspace. */ 583b9f7ac8cSMichael S. Tsirkin static inline int getu16_user(const struct vringh *vrh, u16 *val, const __virtio16 *p) 584f87d0fbbSRusty Russell { 585b9f7ac8cSMichael S. Tsirkin __virtio16 v = 0; 586b9f7ac8cSMichael S. Tsirkin int rc = get_user(v, (__force __virtio16 __user *)p); 587b9f7ac8cSMichael S. Tsirkin *val = vringh16_to_cpu(vrh, v); 588b9f7ac8cSMichael S. Tsirkin return rc; 589f87d0fbbSRusty Russell } 590f87d0fbbSRusty Russell 591b9f7ac8cSMichael S. Tsirkin static inline int putu16_user(const struct vringh *vrh, __virtio16 *p, u16 val) 592f87d0fbbSRusty Russell { 593b9f7ac8cSMichael S. Tsirkin __virtio16 v = cpu_to_vringh16(vrh, val); 594b9f7ac8cSMichael S. Tsirkin return put_user(v, (__force __virtio16 __user *)p); 595f87d0fbbSRusty Russell } 596f87d0fbbSRusty Russell 5979ad9c49cSJason Wang static inline int copydesc_user(const struct vringh *vrh, 5989ad9c49cSJason Wang void *dst, const void *src, size_t len) 599f87d0fbbSRusty Russell { 600f87d0fbbSRusty Russell return copy_from_user(dst, (__force void __user *)src, len) ? 601f87d0fbbSRusty Russell -EFAULT : 0; 602f87d0fbbSRusty Russell } 603f87d0fbbSRusty Russell 6049ad9c49cSJason Wang static inline int putused_user(const struct vringh *vrh, 6059ad9c49cSJason Wang struct vring_used_elem *dst, 606f87d0fbbSRusty Russell const struct vring_used_elem *src, 607f87d0fbbSRusty Russell unsigned int num) 608f87d0fbbSRusty Russell { 609f87d0fbbSRusty Russell return copy_to_user((__force void __user *)dst, src, 610f87d0fbbSRusty Russell sizeof(*dst) * num) ? -EFAULT : 0; 611f87d0fbbSRusty Russell } 612f87d0fbbSRusty Russell 6139ad9c49cSJason Wang static inline int xfer_from_user(const struct vringh *vrh, void *src, 6149ad9c49cSJason Wang void *dst, size_t len) 615f87d0fbbSRusty Russell { 616f87d0fbbSRusty Russell return copy_from_user(dst, (__force void __user *)src, len) ? 617f87d0fbbSRusty Russell -EFAULT : 0; 618f87d0fbbSRusty Russell } 619f87d0fbbSRusty Russell 6209ad9c49cSJason Wang static inline int xfer_to_user(const struct vringh *vrh, 6219ad9c49cSJason Wang void *dst, void *src, size_t len) 622f87d0fbbSRusty Russell { 623f87d0fbbSRusty Russell return copy_to_user((__force void __user *)dst, src, len) ? 624f87d0fbbSRusty Russell -EFAULT : 0; 625f87d0fbbSRusty Russell } 626f87d0fbbSRusty Russell 627f87d0fbbSRusty Russell /** 628f87d0fbbSRusty Russell * vringh_init_user - initialize a vringh for a userspace vring. 629f87d0fbbSRusty Russell * @vrh: the vringh to initialize. 630f87d0fbbSRusty Russell * @features: the feature bits for this ring. 631f87d0fbbSRusty Russell * @num: the number of elements. 632f87d0fbbSRusty Russell * @weak_barriers: true if we only need memory barriers, not I/O. 633f87d0fbbSRusty Russell * @desc: the userpace descriptor pointer. 634f87d0fbbSRusty Russell * @avail: the userpace avail pointer. 635f87d0fbbSRusty Russell * @used: the userpace used pointer. 636f87d0fbbSRusty Russell * 637f87d0fbbSRusty Russell * Returns an error if num is invalid: you should check pointers 638f87d0fbbSRusty Russell * yourself! 639f87d0fbbSRusty Russell */ 640b97a8a90SMichael S. Tsirkin int vringh_init_user(struct vringh *vrh, u64 features, 641f87d0fbbSRusty Russell unsigned int num, bool weak_barriers, 642a865e420SMichael S. Tsirkin vring_desc_t __user *desc, 643a865e420SMichael S. Tsirkin vring_avail_t __user *avail, 644a865e420SMichael S. Tsirkin vring_used_t __user *used) 645f87d0fbbSRusty Russell { 646f87d0fbbSRusty Russell /* Sane power of 2 please! */ 647f87d0fbbSRusty Russell if (!num || num > 0xffff || (num & (num - 1))) { 648f87d0fbbSRusty Russell vringh_bad("Bad ring size %u", num); 649f87d0fbbSRusty Russell return -EINVAL; 650f87d0fbbSRusty Russell } 651f87d0fbbSRusty Russell 652b9f7ac8cSMichael S. Tsirkin vrh->little_endian = (features & (1ULL << VIRTIO_F_VERSION_1)); 653f87d0fbbSRusty Russell vrh->event_indices = (features & (1 << VIRTIO_RING_F_EVENT_IDX)); 654f87d0fbbSRusty Russell vrh->weak_barriers = weak_barriers; 655f87d0fbbSRusty Russell vrh->completed = 0; 656f87d0fbbSRusty Russell vrh->last_avail_idx = 0; 657f87d0fbbSRusty Russell vrh->last_used_idx = 0; 658f87d0fbbSRusty Russell vrh->vring.num = num; 659f87d0fbbSRusty Russell /* vring expects kernel addresses, but only used via accessors. */ 660f87d0fbbSRusty Russell vrh->vring.desc = (__force struct vring_desc *)desc; 661f87d0fbbSRusty Russell vrh->vring.avail = (__force struct vring_avail *)avail; 662f87d0fbbSRusty Russell vrh->vring.used = (__force struct vring_used *)used; 663f87d0fbbSRusty Russell return 0; 664f87d0fbbSRusty Russell } 665f87d0fbbSRusty Russell EXPORT_SYMBOL(vringh_init_user); 666f87d0fbbSRusty Russell 667f87d0fbbSRusty Russell /** 668f87d0fbbSRusty Russell * vringh_getdesc_user - get next available descriptor from userspace ring. 669f87d0fbbSRusty Russell * @vrh: the userspace vring. 670f87d0fbbSRusty Russell * @riov: where to put the readable descriptors (or NULL) 671f87d0fbbSRusty Russell * @wiov: where to put the writable descriptors (or NULL) 672f87d0fbbSRusty Russell * @getrange: function to call to check ranges. 673f87d0fbbSRusty Russell * @head: head index we received, for passing to vringh_complete_user(). 674f87d0fbbSRusty Russell * 675f87d0fbbSRusty Russell * Returns 0 if there was no descriptor, 1 if there was, or -errno. 676f87d0fbbSRusty Russell * 677f87d0fbbSRusty Russell * Note that on error return, you can tell the difference between an 678f87d0fbbSRusty Russell * invalid ring and a single invalid descriptor: in the former case, 679f87d0fbbSRusty Russell * *head will be vrh->vring.num. You may be able to ignore an invalid 680f87d0fbbSRusty Russell * descriptor, but there's not much you can do with an invalid ring. 681f87d0fbbSRusty Russell * 68269c13c58SStefano Garzarella * Note that you can reuse riov and wiov with subsequent calls. Content is 68369c13c58SStefano Garzarella * overwritten and memory reallocated if more space is needed. 68469c13c58SStefano Garzarella * When you don't have to use riov and wiov anymore, you should clean up them 68569c13c58SStefano Garzarella * calling vringh_iov_cleanup() to release the memory, even on error! 686f87d0fbbSRusty Russell */ 687f87d0fbbSRusty Russell int vringh_getdesc_user(struct vringh *vrh, 688f87d0fbbSRusty Russell struct vringh_iov *riov, 689f87d0fbbSRusty Russell struct vringh_iov *wiov, 690f87d0fbbSRusty Russell bool (*getrange)(struct vringh *vrh, 691f87d0fbbSRusty Russell u64 addr, struct vringh_range *r), 692f87d0fbbSRusty Russell u16 *head) 693f87d0fbbSRusty Russell { 694f87d0fbbSRusty Russell int err; 695f87d0fbbSRusty Russell 696f87d0fbbSRusty Russell *head = vrh->vring.num; 697f87d0fbbSRusty Russell err = __vringh_get_head(vrh, getu16_user, &vrh->last_avail_idx); 698f87d0fbbSRusty Russell if (err < 0) 699f87d0fbbSRusty Russell return err; 700f87d0fbbSRusty Russell 701f87d0fbbSRusty Russell /* Empty... */ 702f87d0fbbSRusty Russell if (err == vrh->vring.num) 703f87d0fbbSRusty Russell return 0; 704f87d0fbbSRusty Russell 705f87d0fbbSRusty Russell /* We need the layouts to be the identical for this to work */ 706f87d0fbbSRusty Russell BUILD_BUG_ON(sizeof(struct vringh_kiov) != sizeof(struct vringh_iov)); 707f87d0fbbSRusty Russell BUILD_BUG_ON(offsetof(struct vringh_kiov, iov) != 708f87d0fbbSRusty Russell offsetof(struct vringh_iov, iov)); 709f87d0fbbSRusty Russell BUILD_BUG_ON(offsetof(struct vringh_kiov, i) != 710f87d0fbbSRusty Russell offsetof(struct vringh_iov, i)); 711f87d0fbbSRusty Russell BUILD_BUG_ON(offsetof(struct vringh_kiov, used) != 712f87d0fbbSRusty Russell offsetof(struct vringh_iov, used)); 713f87d0fbbSRusty Russell BUILD_BUG_ON(offsetof(struct vringh_kiov, max_num) != 714f87d0fbbSRusty Russell offsetof(struct vringh_iov, max_num)); 715f87d0fbbSRusty Russell BUILD_BUG_ON(sizeof(struct iovec) != sizeof(struct kvec)); 716f87d0fbbSRusty Russell BUILD_BUG_ON(offsetof(struct iovec, iov_base) != 717f87d0fbbSRusty Russell offsetof(struct kvec, iov_base)); 718f87d0fbbSRusty Russell BUILD_BUG_ON(offsetof(struct iovec, iov_len) != 719f87d0fbbSRusty Russell offsetof(struct kvec, iov_len)); 720f87d0fbbSRusty Russell BUILD_BUG_ON(sizeof(((struct iovec *)NULL)->iov_base) 721f87d0fbbSRusty Russell != sizeof(((struct kvec *)NULL)->iov_base)); 722f87d0fbbSRusty Russell BUILD_BUG_ON(sizeof(((struct iovec *)NULL)->iov_len) 723f87d0fbbSRusty Russell != sizeof(((struct kvec *)NULL)->iov_len)); 724f87d0fbbSRusty Russell 725f87d0fbbSRusty Russell *head = err; 726f87d0fbbSRusty Russell err = __vringh_iov(vrh, *head, (struct vringh_kiov *)riov, 727f87d0fbbSRusty Russell (struct vringh_kiov *)wiov, 728f87d0fbbSRusty Russell range_check, getrange, GFP_KERNEL, copydesc_user); 729f87d0fbbSRusty Russell if (err) 730f87d0fbbSRusty Russell return err; 731f87d0fbbSRusty Russell 732f87d0fbbSRusty Russell return 1; 733f87d0fbbSRusty Russell } 734f87d0fbbSRusty Russell EXPORT_SYMBOL(vringh_getdesc_user); 735f87d0fbbSRusty Russell 736f87d0fbbSRusty Russell /** 737f87d0fbbSRusty Russell * vringh_iov_pull_user - copy bytes from vring_iov. 738f87d0fbbSRusty Russell * @riov: the riov as passed to vringh_getdesc_user() (updated as we consume) 739f87d0fbbSRusty Russell * @dst: the place to copy. 740f87d0fbbSRusty Russell * @len: the maximum length to copy. 741f87d0fbbSRusty Russell * 742f87d0fbbSRusty Russell * Returns the bytes copied <= len or a negative errno. 743f87d0fbbSRusty Russell */ 744f87d0fbbSRusty Russell ssize_t vringh_iov_pull_user(struct vringh_iov *riov, void *dst, size_t len) 745f87d0fbbSRusty Russell { 7469ad9c49cSJason Wang return vringh_iov_xfer(NULL, (struct vringh_kiov *)riov, 747f87d0fbbSRusty Russell dst, len, xfer_from_user); 748f87d0fbbSRusty Russell } 749f87d0fbbSRusty Russell EXPORT_SYMBOL(vringh_iov_pull_user); 750f87d0fbbSRusty Russell 751f87d0fbbSRusty Russell /** 752f87d0fbbSRusty Russell * vringh_iov_push_user - copy bytes into vring_iov. 753f87d0fbbSRusty Russell * @wiov: the wiov as passed to vringh_getdesc_user() (updated as we consume) 7548009b0f4SStefano Garzarella * @src: the place to copy from. 755f87d0fbbSRusty Russell * @len: the maximum length to copy. 756f87d0fbbSRusty Russell * 757f87d0fbbSRusty Russell * Returns the bytes copied <= len or a negative errno. 758f87d0fbbSRusty Russell */ 759f87d0fbbSRusty Russell ssize_t vringh_iov_push_user(struct vringh_iov *wiov, 760f87d0fbbSRusty Russell const void *src, size_t len) 761f87d0fbbSRusty Russell { 7629ad9c49cSJason Wang return vringh_iov_xfer(NULL, (struct vringh_kiov *)wiov, 763f87d0fbbSRusty Russell (void *)src, len, xfer_to_user); 764f87d0fbbSRusty Russell } 765f87d0fbbSRusty Russell EXPORT_SYMBOL(vringh_iov_push_user); 766f87d0fbbSRusty Russell 767f87d0fbbSRusty Russell /** 768f87d0fbbSRusty Russell * vringh_abandon_user - we've decided not to handle the descriptor(s). 769f87d0fbbSRusty Russell * @vrh: the vring. 770f87d0fbbSRusty Russell * @num: the number of descriptors to put back (ie. num 771f87d0fbbSRusty Russell * vringh_get_user() to undo). 772f87d0fbbSRusty Russell * 773f87d0fbbSRusty Russell * The next vringh_get_user() will return the old descriptor(s) again. 774f87d0fbbSRusty Russell */ 775f87d0fbbSRusty Russell void vringh_abandon_user(struct vringh *vrh, unsigned int num) 776f87d0fbbSRusty Russell { 777f87d0fbbSRusty Russell /* We only update vring_avail_event(vr) when we want to be notified, 778f87d0fbbSRusty Russell * so we haven't changed that yet. */ 779f87d0fbbSRusty Russell vrh->last_avail_idx -= num; 780f87d0fbbSRusty Russell } 781f87d0fbbSRusty Russell EXPORT_SYMBOL(vringh_abandon_user); 782f87d0fbbSRusty Russell 783f87d0fbbSRusty Russell /** 784f87d0fbbSRusty Russell * vringh_complete_user - we've finished with descriptor, publish it. 785f87d0fbbSRusty Russell * @vrh: the vring. 786f87d0fbbSRusty Russell * @head: the head as filled in by vringh_getdesc_user. 787f87d0fbbSRusty Russell * @len: the length of data we have written. 788f87d0fbbSRusty Russell * 789f87d0fbbSRusty Russell * You should check vringh_need_notify_user() after one or more calls 790f87d0fbbSRusty Russell * to this function. 791f87d0fbbSRusty Russell */ 792f87d0fbbSRusty Russell int vringh_complete_user(struct vringh *vrh, u16 head, u32 len) 793f87d0fbbSRusty Russell { 794f87d0fbbSRusty Russell struct vring_used_elem used; 795f87d0fbbSRusty Russell 796b9f7ac8cSMichael S. Tsirkin used.id = cpu_to_vringh32(vrh, head); 797b9f7ac8cSMichael S. Tsirkin used.len = cpu_to_vringh32(vrh, len); 798f87d0fbbSRusty Russell return __vringh_complete(vrh, &used, 1, putu16_user, putused_user); 799f87d0fbbSRusty Russell } 800f87d0fbbSRusty Russell EXPORT_SYMBOL(vringh_complete_user); 801f87d0fbbSRusty Russell 802f87d0fbbSRusty Russell /** 803f87d0fbbSRusty Russell * vringh_complete_multi_user - we've finished with many descriptors. 804f87d0fbbSRusty Russell * @vrh: the vring. 805f87d0fbbSRusty Russell * @used: the head, length pairs. 806f87d0fbbSRusty Russell * @num_used: the number of used elements. 807f87d0fbbSRusty Russell * 808f87d0fbbSRusty Russell * You should check vringh_need_notify_user() after one or more calls 809f87d0fbbSRusty Russell * to this function. 810f87d0fbbSRusty Russell */ 811f87d0fbbSRusty Russell int vringh_complete_multi_user(struct vringh *vrh, 812f87d0fbbSRusty Russell const struct vring_used_elem used[], 813f87d0fbbSRusty Russell unsigned num_used) 814f87d0fbbSRusty Russell { 815f87d0fbbSRusty Russell return __vringh_complete(vrh, used, num_used, 816f87d0fbbSRusty Russell putu16_user, putused_user); 817f87d0fbbSRusty Russell } 818f87d0fbbSRusty Russell EXPORT_SYMBOL(vringh_complete_multi_user); 819f87d0fbbSRusty Russell 820f87d0fbbSRusty Russell /** 821f87d0fbbSRusty Russell * vringh_notify_enable_user - we want to know if something changes. 822f87d0fbbSRusty Russell * @vrh: the vring. 823f87d0fbbSRusty Russell * 824f87d0fbbSRusty Russell * This always enables notifications, but returns false if there are 825f87d0fbbSRusty Russell * now more buffers available in the vring. 826f87d0fbbSRusty Russell */ 827f87d0fbbSRusty Russell bool vringh_notify_enable_user(struct vringh *vrh) 828f87d0fbbSRusty Russell { 829f87d0fbbSRusty Russell return __vringh_notify_enable(vrh, getu16_user, putu16_user); 830f87d0fbbSRusty Russell } 831f87d0fbbSRusty Russell EXPORT_SYMBOL(vringh_notify_enable_user); 832f87d0fbbSRusty Russell 833f87d0fbbSRusty Russell /** 834f87d0fbbSRusty Russell * vringh_notify_disable_user - don't tell us if something changes. 835f87d0fbbSRusty Russell * @vrh: the vring. 836f87d0fbbSRusty Russell * 837f87d0fbbSRusty Russell * This is our normal running state: we disable and then only enable when 838f87d0fbbSRusty Russell * we're going to sleep. 839f87d0fbbSRusty Russell */ 840f87d0fbbSRusty Russell void vringh_notify_disable_user(struct vringh *vrh) 841f87d0fbbSRusty Russell { 842f87d0fbbSRusty Russell __vringh_notify_disable(vrh, putu16_user); 843f87d0fbbSRusty Russell } 844f87d0fbbSRusty Russell EXPORT_SYMBOL(vringh_notify_disable_user); 845f87d0fbbSRusty Russell 846f87d0fbbSRusty Russell /** 847f87d0fbbSRusty Russell * vringh_need_notify_user - must we tell the other side about used buffers? 848f87d0fbbSRusty Russell * @vrh: the vring we've called vringh_complete_user() on. 849f87d0fbbSRusty Russell * 850f87d0fbbSRusty Russell * Returns -errno or 0 if we don't need to tell the other side, 1 if we do. 851f87d0fbbSRusty Russell */ 852f87d0fbbSRusty Russell int vringh_need_notify_user(struct vringh *vrh) 853f87d0fbbSRusty Russell { 854f87d0fbbSRusty Russell return __vringh_need_notify(vrh, getu16_user); 855f87d0fbbSRusty Russell } 856f87d0fbbSRusty Russell EXPORT_SYMBOL(vringh_need_notify_user); 857f87d0fbbSRusty Russell 858f87d0fbbSRusty Russell /* Kernelspace access helpers. */ 859b9f7ac8cSMichael S. Tsirkin static inline int getu16_kern(const struct vringh *vrh, 860b9f7ac8cSMichael S. Tsirkin u16 *val, const __virtio16 *p) 861f87d0fbbSRusty Russell { 8629d1b972fSMark Rutland *val = vringh16_to_cpu(vrh, READ_ONCE(*p)); 863f87d0fbbSRusty Russell return 0; 864f87d0fbbSRusty Russell } 865f87d0fbbSRusty Russell 866b9f7ac8cSMichael S. Tsirkin static inline int putu16_kern(const struct vringh *vrh, __virtio16 *p, u16 val) 867f87d0fbbSRusty Russell { 8689d1b972fSMark Rutland WRITE_ONCE(*p, cpu_to_vringh16(vrh, val)); 869f87d0fbbSRusty Russell return 0; 870f87d0fbbSRusty Russell } 871f87d0fbbSRusty Russell 8729ad9c49cSJason Wang static inline int copydesc_kern(const struct vringh *vrh, 8739ad9c49cSJason Wang void *dst, const void *src, size_t len) 874f87d0fbbSRusty Russell { 875f87d0fbbSRusty Russell memcpy(dst, src, len); 876f87d0fbbSRusty Russell return 0; 877f87d0fbbSRusty Russell } 878f87d0fbbSRusty Russell 8799ad9c49cSJason Wang static inline int putused_kern(const struct vringh *vrh, 8809ad9c49cSJason Wang struct vring_used_elem *dst, 881f87d0fbbSRusty Russell const struct vring_used_elem *src, 882f87d0fbbSRusty Russell unsigned int num) 883f87d0fbbSRusty Russell { 884f87d0fbbSRusty Russell memcpy(dst, src, num * sizeof(*dst)); 885f87d0fbbSRusty Russell return 0; 886f87d0fbbSRusty Russell } 887f87d0fbbSRusty Russell 8889ad9c49cSJason Wang static inline int xfer_kern(const struct vringh *vrh, void *src, 8899ad9c49cSJason Wang void *dst, size_t len) 890f87d0fbbSRusty Russell { 891f87d0fbbSRusty Russell memcpy(dst, src, len); 892f87d0fbbSRusty Russell return 0; 893f87d0fbbSRusty Russell } 894f87d0fbbSRusty Russell 8959ad9c49cSJason Wang static inline int kern_xfer(const struct vringh *vrh, void *dst, 8969ad9c49cSJason Wang void *src, size_t len) 897b3683deeSJason Wang { 898b3683deeSJason Wang memcpy(dst, src, len); 899b3683deeSJason Wang return 0; 900b3683deeSJason Wang } 901b3683deeSJason Wang 902f87d0fbbSRusty Russell /** 903f87d0fbbSRusty Russell * vringh_init_kern - initialize a vringh for a kernelspace vring. 904f87d0fbbSRusty Russell * @vrh: the vringh to initialize. 905f87d0fbbSRusty Russell * @features: the feature bits for this ring. 906f87d0fbbSRusty Russell * @num: the number of elements. 907f87d0fbbSRusty Russell * @weak_barriers: true if we only need memory barriers, not I/O. 908f87d0fbbSRusty Russell * @desc: the userpace descriptor pointer. 909f87d0fbbSRusty Russell * @avail: the userpace avail pointer. 910f87d0fbbSRusty Russell * @used: the userpace used pointer. 911f87d0fbbSRusty Russell * 912f87d0fbbSRusty Russell * Returns an error if num is invalid. 913f87d0fbbSRusty Russell */ 914b97a8a90SMichael S. Tsirkin int vringh_init_kern(struct vringh *vrh, u64 features, 915f87d0fbbSRusty Russell unsigned int num, bool weak_barriers, 916f87d0fbbSRusty Russell struct vring_desc *desc, 917f87d0fbbSRusty Russell struct vring_avail *avail, 918f87d0fbbSRusty Russell struct vring_used *used) 919f87d0fbbSRusty Russell { 920f87d0fbbSRusty Russell /* Sane power of 2 please! */ 921f87d0fbbSRusty Russell if (!num || num > 0xffff || (num & (num - 1))) { 922f87d0fbbSRusty Russell vringh_bad("Bad ring size %u", num); 923f87d0fbbSRusty Russell return -EINVAL; 924f87d0fbbSRusty Russell } 925f87d0fbbSRusty Russell 926b9f7ac8cSMichael S. Tsirkin vrh->little_endian = (features & (1ULL << VIRTIO_F_VERSION_1)); 927f87d0fbbSRusty Russell vrh->event_indices = (features & (1 << VIRTIO_RING_F_EVENT_IDX)); 928f87d0fbbSRusty Russell vrh->weak_barriers = weak_barriers; 929f87d0fbbSRusty Russell vrh->completed = 0; 930f87d0fbbSRusty Russell vrh->last_avail_idx = 0; 931f87d0fbbSRusty Russell vrh->last_used_idx = 0; 932f87d0fbbSRusty Russell vrh->vring.num = num; 933f87d0fbbSRusty Russell vrh->vring.desc = desc; 934f87d0fbbSRusty Russell vrh->vring.avail = avail; 935f87d0fbbSRusty Russell vrh->vring.used = used; 936f87d0fbbSRusty Russell return 0; 937f87d0fbbSRusty Russell } 938f87d0fbbSRusty Russell EXPORT_SYMBOL(vringh_init_kern); 939f87d0fbbSRusty Russell 940f87d0fbbSRusty Russell /** 941f87d0fbbSRusty Russell * vringh_getdesc_kern - get next available descriptor from kernelspace ring. 942f87d0fbbSRusty Russell * @vrh: the kernelspace vring. 943f87d0fbbSRusty Russell * @riov: where to put the readable descriptors (or NULL) 944f87d0fbbSRusty Russell * @wiov: where to put the writable descriptors (or NULL) 945f87d0fbbSRusty Russell * @head: head index we received, for passing to vringh_complete_kern(). 946f87d0fbbSRusty Russell * @gfp: flags for allocating larger riov/wiov. 947f87d0fbbSRusty Russell * 948f87d0fbbSRusty Russell * Returns 0 if there was no descriptor, 1 if there was, or -errno. 949f87d0fbbSRusty Russell * 950f87d0fbbSRusty Russell * Note that on error return, you can tell the difference between an 951f87d0fbbSRusty Russell * invalid ring and a single invalid descriptor: in the former case, 952f87d0fbbSRusty Russell * *head will be vrh->vring.num. You may be able to ignore an invalid 953f87d0fbbSRusty Russell * descriptor, but there's not much you can do with an invalid ring. 954f87d0fbbSRusty Russell * 95569c13c58SStefano Garzarella * Note that you can reuse riov and wiov with subsequent calls. Content is 95669c13c58SStefano Garzarella * overwritten and memory reallocated if more space is needed. 95769c13c58SStefano Garzarella * When you don't have to use riov and wiov anymore, you should clean up them 95869c13c58SStefano Garzarella * calling vringh_kiov_cleanup() to release the memory, even on error! 959f87d0fbbSRusty Russell */ 960f87d0fbbSRusty Russell int vringh_getdesc_kern(struct vringh *vrh, 961f87d0fbbSRusty Russell struct vringh_kiov *riov, 962f87d0fbbSRusty Russell struct vringh_kiov *wiov, 963f87d0fbbSRusty Russell u16 *head, 964f87d0fbbSRusty Russell gfp_t gfp) 965f87d0fbbSRusty Russell { 966f87d0fbbSRusty Russell int err; 967f87d0fbbSRusty Russell 968f87d0fbbSRusty Russell err = __vringh_get_head(vrh, getu16_kern, &vrh->last_avail_idx); 969f87d0fbbSRusty Russell if (err < 0) 970f87d0fbbSRusty Russell return err; 971f87d0fbbSRusty Russell 972f87d0fbbSRusty Russell /* Empty... */ 973f87d0fbbSRusty Russell if (err == vrh->vring.num) 974f87d0fbbSRusty Russell return 0; 975f87d0fbbSRusty Russell 976f87d0fbbSRusty Russell *head = err; 977f87d0fbbSRusty Russell err = __vringh_iov(vrh, *head, riov, wiov, no_range_check, NULL, 978f87d0fbbSRusty Russell gfp, copydesc_kern); 979f87d0fbbSRusty Russell if (err) 980f87d0fbbSRusty Russell return err; 981f87d0fbbSRusty Russell 982f87d0fbbSRusty Russell return 1; 983f87d0fbbSRusty Russell } 984f87d0fbbSRusty Russell EXPORT_SYMBOL(vringh_getdesc_kern); 985f87d0fbbSRusty Russell 986f87d0fbbSRusty Russell /** 987f87d0fbbSRusty Russell * vringh_iov_pull_kern - copy bytes from vring_iov. 988f87d0fbbSRusty Russell * @riov: the riov as passed to vringh_getdesc_kern() (updated as we consume) 989f87d0fbbSRusty Russell * @dst: the place to copy. 990f87d0fbbSRusty Russell * @len: the maximum length to copy. 991f87d0fbbSRusty Russell * 992f87d0fbbSRusty Russell * Returns the bytes copied <= len or a negative errno. 993f87d0fbbSRusty Russell */ 994f87d0fbbSRusty Russell ssize_t vringh_iov_pull_kern(struct vringh_kiov *riov, void *dst, size_t len) 995f87d0fbbSRusty Russell { 9969ad9c49cSJason Wang return vringh_iov_xfer(NULL, riov, dst, len, xfer_kern); 997f87d0fbbSRusty Russell } 998f87d0fbbSRusty Russell EXPORT_SYMBOL(vringh_iov_pull_kern); 999f87d0fbbSRusty Russell 1000f87d0fbbSRusty Russell /** 1001f87d0fbbSRusty Russell * vringh_iov_push_kern - copy bytes into vring_iov. 1002f87d0fbbSRusty Russell * @wiov: the wiov as passed to vringh_getdesc_kern() (updated as we consume) 10038009b0f4SStefano Garzarella * @src: the place to copy from. 1004f87d0fbbSRusty Russell * @len: the maximum length to copy. 1005f87d0fbbSRusty Russell * 1006f87d0fbbSRusty Russell * Returns the bytes copied <= len or a negative errno. 1007f87d0fbbSRusty Russell */ 1008f87d0fbbSRusty Russell ssize_t vringh_iov_push_kern(struct vringh_kiov *wiov, 1009f87d0fbbSRusty Russell const void *src, size_t len) 1010f87d0fbbSRusty Russell { 10119ad9c49cSJason Wang return vringh_iov_xfer(NULL, wiov, (void *)src, len, kern_xfer); 1012f87d0fbbSRusty Russell } 1013f87d0fbbSRusty Russell EXPORT_SYMBOL(vringh_iov_push_kern); 1014f87d0fbbSRusty Russell 1015f87d0fbbSRusty Russell /** 1016f87d0fbbSRusty Russell * vringh_abandon_kern - we've decided not to handle the descriptor(s). 1017f87d0fbbSRusty Russell * @vrh: the vring. 1018f87d0fbbSRusty Russell * @num: the number of descriptors to put back (ie. num 1019f87d0fbbSRusty Russell * vringh_get_kern() to undo). 1020f87d0fbbSRusty Russell * 1021f87d0fbbSRusty Russell * The next vringh_get_kern() will return the old descriptor(s) again. 1022f87d0fbbSRusty Russell */ 1023f87d0fbbSRusty Russell void vringh_abandon_kern(struct vringh *vrh, unsigned int num) 1024f87d0fbbSRusty Russell { 1025f87d0fbbSRusty Russell /* We only update vring_avail_event(vr) when we want to be notified, 1026f87d0fbbSRusty Russell * so we haven't changed that yet. */ 1027f87d0fbbSRusty Russell vrh->last_avail_idx -= num; 1028f87d0fbbSRusty Russell } 1029f87d0fbbSRusty Russell EXPORT_SYMBOL(vringh_abandon_kern); 1030f87d0fbbSRusty Russell 1031f87d0fbbSRusty Russell /** 1032f87d0fbbSRusty Russell * vringh_complete_kern - we've finished with descriptor, publish it. 1033f87d0fbbSRusty Russell * @vrh: the vring. 1034f87d0fbbSRusty Russell * @head: the head as filled in by vringh_getdesc_kern. 1035f87d0fbbSRusty Russell * @len: the length of data we have written. 1036f87d0fbbSRusty Russell * 1037f87d0fbbSRusty Russell * You should check vringh_need_notify_kern() after one or more calls 1038f87d0fbbSRusty Russell * to this function. 1039f87d0fbbSRusty Russell */ 1040f87d0fbbSRusty Russell int vringh_complete_kern(struct vringh *vrh, u16 head, u32 len) 1041f87d0fbbSRusty Russell { 1042f87d0fbbSRusty Russell struct vring_used_elem used; 1043f87d0fbbSRusty Russell 1044b9f7ac8cSMichael S. Tsirkin used.id = cpu_to_vringh32(vrh, head); 1045b9f7ac8cSMichael S. Tsirkin used.len = cpu_to_vringh32(vrh, len); 1046f87d0fbbSRusty Russell 1047f87d0fbbSRusty Russell return __vringh_complete(vrh, &used, 1, putu16_kern, putused_kern); 1048f87d0fbbSRusty Russell } 1049f87d0fbbSRusty Russell EXPORT_SYMBOL(vringh_complete_kern); 1050f87d0fbbSRusty Russell 1051f87d0fbbSRusty Russell /** 1052f87d0fbbSRusty Russell * vringh_notify_enable_kern - we want to know if something changes. 1053f87d0fbbSRusty Russell * @vrh: the vring. 1054f87d0fbbSRusty Russell * 1055f87d0fbbSRusty Russell * This always enables notifications, but returns false if there are 1056f87d0fbbSRusty Russell * now more buffers available in the vring. 1057f87d0fbbSRusty Russell */ 1058f87d0fbbSRusty Russell bool vringh_notify_enable_kern(struct vringh *vrh) 1059f87d0fbbSRusty Russell { 1060f87d0fbbSRusty Russell return __vringh_notify_enable(vrh, getu16_kern, putu16_kern); 1061f87d0fbbSRusty Russell } 1062f87d0fbbSRusty Russell EXPORT_SYMBOL(vringh_notify_enable_kern); 1063f87d0fbbSRusty Russell 1064f87d0fbbSRusty Russell /** 1065f87d0fbbSRusty Russell * vringh_notify_disable_kern - don't tell us if something changes. 1066f87d0fbbSRusty Russell * @vrh: the vring. 1067f87d0fbbSRusty Russell * 1068f87d0fbbSRusty Russell * This is our normal running state: we disable and then only enable when 1069f87d0fbbSRusty Russell * we're going to sleep. 1070f87d0fbbSRusty Russell */ 1071f87d0fbbSRusty Russell void vringh_notify_disable_kern(struct vringh *vrh) 1072f87d0fbbSRusty Russell { 1073f87d0fbbSRusty Russell __vringh_notify_disable(vrh, putu16_kern); 1074f87d0fbbSRusty Russell } 1075f87d0fbbSRusty Russell EXPORT_SYMBOL(vringh_notify_disable_kern); 1076f87d0fbbSRusty Russell 1077f87d0fbbSRusty Russell /** 1078f87d0fbbSRusty Russell * vringh_need_notify_kern - must we tell the other side about used buffers? 1079f87d0fbbSRusty Russell * @vrh: the vring we've called vringh_complete_kern() on. 1080f87d0fbbSRusty Russell * 1081f87d0fbbSRusty Russell * Returns -errno or 0 if we don't need to tell the other side, 1 if we do. 1082f87d0fbbSRusty Russell */ 1083f87d0fbbSRusty Russell int vringh_need_notify_kern(struct vringh *vrh) 1084f87d0fbbSRusty Russell { 1085f87d0fbbSRusty Russell return __vringh_need_notify(vrh, getu16_kern); 1086f87d0fbbSRusty Russell } 1087f87d0fbbSRusty Russell EXPORT_SYMBOL(vringh_need_notify_kern); 1088f558a845SDave Jones 10893302363aSMichael S. Tsirkin #if IS_REACHABLE(CONFIG_VHOST_IOTLB) 10903302363aSMichael S. Tsirkin 10919ad9c49cSJason Wang static int iotlb_translate(const struct vringh *vrh, 10929ad9c49cSJason Wang u64 addr, u64 len, struct bio_vec iov[], 10939ad9c49cSJason Wang int iov_size, u32 perm) 10949ad9c49cSJason Wang { 10959ad9c49cSJason Wang struct vhost_iotlb_map *map; 10969ad9c49cSJason Wang struct vhost_iotlb *iotlb = vrh->iotlb; 10979ad9c49cSJason Wang int ret = 0; 10989ad9c49cSJason Wang u64 s = 0; 10999ad9c49cSJason Wang 1100f53d9910SStefano Garzarella spin_lock(vrh->iotlb_lock); 1101f53d9910SStefano Garzarella 11029ad9c49cSJason Wang while (len > s) { 11039ad9c49cSJason Wang u64 size, pa, pfn; 11049ad9c49cSJason Wang 11059ad9c49cSJason Wang if (unlikely(ret >= iov_size)) { 11069ad9c49cSJason Wang ret = -ENOBUFS; 11079ad9c49cSJason Wang break; 11089ad9c49cSJason Wang } 11099ad9c49cSJason Wang 11109ad9c49cSJason Wang map = vhost_iotlb_itree_first(iotlb, addr, 11119ad9c49cSJason Wang addr + len - 1); 11129ad9c49cSJason Wang if (!map || map->start > addr) { 11139ad9c49cSJason Wang ret = -EINVAL; 11149ad9c49cSJason Wang break; 11159ad9c49cSJason Wang } else if (!(map->perm & perm)) { 11169ad9c49cSJason Wang ret = -EPERM; 11179ad9c49cSJason Wang break; 11189ad9c49cSJason Wang } 11199ad9c49cSJason Wang 11209ad9c49cSJason Wang size = map->size - addr + map->start; 11219ad9c49cSJason Wang pa = map->addr + addr - map->start; 11229ad9c49cSJason Wang pfn = pa >> PAGE_SHIFT; 11239ad9c49cSJason Wang iov[ret].bv_page = pfn_to_page(pfn); 11249ad9c49cSJason Wang iov[ret].bv_len = min(len - s, size); 11259ad9c49cSJason Wang iov[ret].bv_offset = pa & (PAGE_SIZE - 1); 11269ad9c49cSJason Wang s += size; 11279ad9c49cSJason Wang addr += size; 11289ad9c49cSJason Wang ++ret; 11299ad9c49cSJason Wang } 11309ad9c49cSJason Wang 1131f53d9910SStefano Garzarella spin_unlock(vrh->iotlb_lock); 1132f53d9910SStefano Garzarella 11339ad9c49cSJason Wang return ret; 11349ad9c49cSJason Wang } 11359ad9c49cSJason Wang 11369ad9c49cSJason Wang static inline int copy_from_iotlb(const struct vringh *vrh, void *dst, 11379ad9c49cSJason Wang void *src, size_t len) 11389ad9c49cSJason Wang { 11399ad9c49cSJason Wang struct iov_iter iter; 11409ad9c49cSJason Wang struct bio_vec iov[16]; 11419ad9c49cSJason Wang int ret; 11429ad9c49cSJason Wang 11439ad9c49cSJason Wang ret = iotlb_translate(vrh, (u64)(uintptr_t)src, 11449ad9c49cSJason Wang len, iov, 16, VHOST_MAP_RO); 11459ad9c49cSJason Wang if (ret < 0) 11469ad9c49cSJason Wang return ret; 11479ad9c49cSJason Wang 11489ad9c49cSJason Wang iov_iter_bvec(&iter, READ, iov, ret, len); 11499ad9c49cSJason Wang 11509ad9c49cSJason Wang ret = copy_from_iter(dst, len, &iter); 11519ad9c49cSJason Wang 11529ad9c49cSJason Wang return ret; 11539ad9c49cSJason Wang } 11549ad9c49cSJason Wang 11559ad9c49cSJason Wang static inline int copy_to_iotlb(const struct vringh *vrh, void *dst, 11569ad9c49cSJason Wang void *src, size_t len) 11579ad9c49cSJason Wang { 11589ad9c49cSJason Wang struct iov_iter iter; 11599ad9c49cSJason Wang struct bio_vec iov[16]; 11609ad9c49cSJason Wang int ret; 11619ad9c49cSJason Wang 11629ad9c49cSJason Wang ret = iotlb_translate(vrh, (u64)(uintptr_t)dst, 11639ad9c49cSJason Wang len, iov, 16, VHOST_MAP_WO); 11649ad9c49cSJason Wang if (ret < 0) 11659ad9c49cSJason Wang return ret; 11669ad9c49cSJason Wang 11679ad9c49cSJason Wang iov_iter_bvec(&iter, WRITE, iov, ret, len); 11689ad9c49cSJason Wang 11699ad9c49cSJason Wang return copy_to_iter(src, len, &iter); 11709ad9c49cSJason Wang } 11719ad9c49cSJason Wang 11729ad9c49cSJason Wang static inline int getu16_iotlb(const struct vringh *vrh, 11739ad9c49cSJason Wang u16 *val, const __virtio16 *p) 11749ad9c49cSJason Wang { 11759ad9c49cSJason Wang struct bio_vec iov; 11769ad9c49cSJason Wang void *kaddr, *from; 11779ad9c49cSJason Wang int ret; 11789ad9c49cSJason Wang 11799ad9c49cSJason Wang /* Atomic read is needed for getu16 */ 11809ad9c49cSJason Wang ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), 11819ad9c49cSJason Wang &iov, 1, VHOST_MAP_RO); 11829ad9c49cSJason Wang if (ret < 0) 11839ad9c49cSJason Wang return ret; 11849ad9c49cSJason Wang 11859ad9c49cSJason Wang kaddr = kmap_atomic(iov.bv_page); 11869ad9c49cSJason Wang from = kaddr + iov.bv_offset; 11879ad9c49cSJason Wang *val = vringh16_to_cpu(vrh, READ_ONCE(*(__virtio16 *)from)); 11889ad9c49cSJason Wang kunmap_atomic(kaddr); 11899ad9c49cSJason Wang 11909ad9c49cSJason Wang return 0; 11919ad9c49cSJason Wang } 11929ad9c49cSJason Wang 11939ad9c49cSJason Wang static inline int putu16_iotlb(const struct vringh *vrh, 11949ad9c49cSJason Wang __virtio16 *p, u16 val) 11959ad9c49cSJason Wang { 11969ad9c49cSJason Wang struct bio_vec iov; 11979ad9c49cSJason Wang void *kaddr, *to; 11989ad9c49cSJason Wang int ret; 11999ad9c49cSJason Wang 12009ad9c49cSJason Wang /* Atomic write is needed for putu16 */ 12019ad9c49cSJason Wang ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), 12029ad9c49cSJason Wang &iov, 1, VHOST_MAP_WO); 12039ad9c49cSJason Wang if (ret < 0) 12049ad9c49cSJason Wang return ret; 12059ad9c49cSJason Wang 12069ad9c49cSJason Wang kaddr = kmap_atomic(iov.bv_page); 12079ad9c49cSJason Wang to = kaddr + iov.bv_offset; 12089ad9c49cSJason Wang WRITE_ONCE(*(__virtio16 *)to, cpu_to_vringh16(vrh, val)); 12099ad9c49cSJason Wang kunmap_atomic(kaddr); 12109ad9c49cSJason Wang 12119ad9c49cSJason Wang return 0; 12129ad9c49cSJason Wang } 12139ad9c49cSJason Wang 12149ad9c49cSJason Wang static inline int copydesc_iotlb(const struct vringh *vrh, 12159ad9c49cSJason Wang void *dst, const void *src, size_t len) 12169ad9c49cSJason Wang { 12179ad9c49cSJason Wang int ret; 12189ad9c49cSJason Wang 12199ad9c49cSJason Wang ret = copy_from_iotlb(vrh, dst, (void *)src, len); 12209ad9c49cSJason Wang if (ret != len) 12219ad9c49cSJason Wang return -EFAULT; 12229ad9c49cSJason Wang 12239ad9c49cSJason Wang return 0; 12249ad9c49cSJason Wang } 12259ad9c49cSJason Wang 12269ad9c49cSJason Wang static inline int xfer_from_iotlb(const struct vringh *vrh, void *src, 12279ad9c49cSJason Wang void *dst, size_t len) 12289ad9c49cSJason Wang { 12299ad9c49cSJason Wang int ret; 12309ad9c49cSJason Wang 12319ad9c49cSJason Wang ret = copy_from_iotlb(vrh, dst, src, len); 12329ad9c49cSJason Wang if (ret != len) 12339ad9c49cSJason Wang return -EFAULT; 12349ad9c49cSJason Wang 12359ad9c49cSJason Wang return 0; 12369ad9c49cSJason Wang } 12379ad9c49cSJason Wang 12389ad9c49cSJason Wang static inline int xfer_to_iotlb(const struct vringh *vrh, 12399ad9c49cSJason Wang void *dst, void *src, size_t len) 12409ad9c49cSJason Wang { 12419ad9c49cSJason Wang int ret; 12429ad9c49cSJason Wang 12439ad9c49cSJason Wang ret = copy_to_iotlb(vrh, dst, src, len); 12449ad9c49cSJason Wang if (ret != len) 12459ad9c49cSJason Wang return -EFAULT; 12469ad9c49cSJason Wang 12479ad9c49cSJason Wang return 0; 12489ad9c49cSJason Wang } 12499ad9c49cSJason Wang 12509ad9c49cSJason Wang static inline int putused_iotlb(const struct vringh *vrh, 12519ad9c49cSJason Wang struct vring_used_elem *dst, 12529ad9c49cSJason Wang const struct vring_used_elem *src, 12539ad9c49cSJason Wang unsigned int num) 12549ad9c49cSJason Wang { 12559ad9c49cSJason Wang int size = num * sizeof(*dst); 12569ad9c49cSJason Wang int ret; 12579ad9c49cSJason Wang 12589ad9c49cSJason Wang ret = copy_to_iotlb(vrh, dst, (void *)src, num * sizeof(*dst)); 12599ad9c49cSJason Wang if (ret != size) 12609ad9c49cSJason Wang return -EFAULT; 12619ad9c49cSJason Wang 12629ad9c49cSJason Wang return 0; 12639ad9c49cSJason Wang } 12649ad9c49cSJason Wang 12659ad9c49cSJason Wang /** 12669ad9c49cSJason Wang * vringh_init_iotlb - initialize a vringh for a ring with IOTLB. 12679ad9c49cSJason Wang * @vrh: the vringh to initialize. 12689ad9c49cSJason Wang * @features: the feature bits for this ring. 12699ad9c49cSJason Wang * @num: the number of elements. 12709ad9c49cSJason Wang * @weak_barriers: true if we only need memory barriers, not I/O. 12719ad9c49cSJason Wang * @desc: the userpace descriptor pointer. 12729ad9c49cSJason Wang * @avail: the userpace avail pointer. 12739ad9c49cSJason Wang * @used: the userpace used pointer. 12749ad9c49cSJason Wang * 12759ad9c49cSJason Wang * Returns an error if num is invalid. 12769ad9c49cSJason Wang */ 12779ad9c49cSJason Wang int vringh_init_iotlb(struct vringh *vrh, u64 features, 12789ad9c49cSJason Wang unsigned int num, bool weak_barriers, 12799ad9c49cSJason Wang struct vring_desc *desc, 12809ad9c49cSJason Wang struct vring_avail *avail, 12819ad9c49cSJason Wang struct vring_used *used) 12829ad9c49cSJason Wang { 12839ad9c49cSJason Wang return vringh_init_kern(vrh, features, num, weak_barriers, 12849ad9c49cSJason Wang desc, avail, used); 12859ad9c49cSJason Wang } 12869ad9c49cSJason Wang EXPORT_SYMBOL(vringh_init_iotlb); 12879ad9c49cSJason Wang 12889ad9c49cSJason Wang /** 12899ad9c49cSJason Wang * vringh_set_iotlb - initialize a vringh for a ring with IOTLB. 12909ad9c49cSJason Wang * @vrh: the vring 12919ad9c49cSJason Wang * @iotlb: iotlb associated with this vring 1292f53d9910SStefano Garzarella * @iotlb_lock: spinlock to synchronize the iotlb accesses 12939ad9c49cSJason Wang */ 1294f53d9910SStefano Garzarella void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb, 1295f53d9910SStefano Garzarella spinlock_t *iotlb_lock) 12969ad9c49cSJason Wang { 12979ad9c49cSJason Wang vrh->iotlb = iotlb; 1298f53d9910SStefano Garzarella vrh->iotlb_lock = iotlb_lock; 12999ad9c49cSJason Wang } 13009ad9c49cSJason Wang EXPORT_SYMBOL(vringh_set_iotlb); 13019ad9c49cSJason Wang 13029ad9c49cSJason Wang /** 13039ad9c49cSJason Wang * vringh_getdesc_iotlb - get next available descriptor from ring with 13049ad9c49cSJason Wang * IOTLB. 13059ad9c49cSJason Wang * @vrh: the kernelspace vring. 13069ad9c49cSJason Wang * @riov: where to put the readable descriptors (or NULL) 13079ad9c49cSJason Wang * @wiov: where to put the writable descriptors (or NULL) 13089ad9c49cSJason Wang * @head: head index we received, for passing to vringh_complete_iotlb(). 13099ad9c49cSJason Wang * @gfp: flags for allocating larger riov/wiov. 13109ad9c49cSJason Wang * 13119ad9c49cSJason Wang * Returns 0 if there was no descriptor, 1 if there was, or -errno. 13129ad9c49cSJason Wang * 13139ad9c49cSJason Wang * Note that on error return, you can tell the difference between an 13149ad9c49cSJason Wang * invalid ring and a single invalid descriptor: in the former case, 13159ad9c49cSJason Wang * *head will be vrh->vring.num. You may be able to ignore an invalid 13169ad9c49cSJason Wang * descriptor, but there's not much you can do with an invalid ring. 13179ad9c49cSJason Wang * 131869c13c58SStefano Garzarella * Note that you can reuse riov and wiov with subsequent calls. Content is 131969c13c58SStefano Garzarella * overwritten and memory reallocated if more space is needed. 132069c13c58SStefano Garzarella * When you don't have to use riov and wiov anymore, you should clean up them 132169c13c58SStefano Garzarella * calling vringh_kiov_cleanup() to release the memory, even on error! 13229ad9c49cSJason Wang */ 13239ad9c49cSJason Wang int vringh_getdesc_iotlb(struct vringh *vrh, 13249ad9c49cSJason Wang struct vringh_kiov *riov, 13259ad9c49cSJason Wang struct vringh_kiov *wiov, 13269ad9c49cSJason Wang u16 *head, 13279ad9c49cSJason Wang gfp_t gfp) 13289ad9c49cSJason Wang { 13299ad9c49cSJason Wang int err; 13309ad9c49cSJason Wang 13319ad9c49cSJason Wang err = __vringh_get_head(vrh, getu16_iotlb, &vrh->last_avail_idx); 13329ad9c49cSJason Wang if (err < 0) 13339ad9c49cSJason Wang return err; 13349ad9c49cSJason Wang 13359ad9c49cSJason Wang /* Empty... */ 13369ad9c49cSJason Wang if (err == vrh->vring.num) 13379ad9c49cSJason Wang return 0; 13389ad9c49cSJason Wang 13399ad9c49cSJason Wang *head = err; 13409ad9c49cSJason Wang err = __vringh_iov(vrh, *head, riov, wiov, no_range_check, NULL, 13419ad9c49cSJason Wang gfp, copydesc_iotlb); 13429ad9c49cSJason Wang if (err) 13439ad9c49cSJason Wang return err; 13449ad9c49cSJason Wang 13459ad9c49cSJason Wang return 1; 13469ad9c49cSJason Wang } 13479ad9c49cSJason Wang EXPORT_SYMBOL(vringh_getdesc_iotlb); 13489ad9c49cSJason Wang 13499ad9c49cSJason Wang /** 13509ad9c49cSJason Wang * vringh_iov_pull_iotlb - copy bytes from vring_iov. 13519ad9c49cSJason Wang * @vrh: the vring. 13529ad9c49cSJason Wang * @riov: the riov as passed to vringh_getdesc_iotlb() (updated as we consume) 13539ad9c49cSJason Wang * @dst: the place to copy. 13549ad9c49cSJason Wang * @len: the maximum length to copy. 13559ad9c49cSJason Wang * 13569ad9c49cSJason Wang * Returns the bytes copied <= len or a negative errno. 13579ad9c49cSJason Wang */ 13589ad9c49cSJason Wang ssize_t vringh_iov_pull_iotlb(struct vringh *vrh, 13599ad9c49cSJason Wang struct vringh_kiov *riov, 13609ad9c49cSJason Wang void *dst, size_t len) 13619ad9c49cSJason Wang { 13629ad9c49cSJason Wang return vringh_iov_xfer(vrh, riov, dst, len, xfer_from_iotlb); 13639ad9c49cSJason Wang } 13649ad9c49cSJason Wang EXPORT_SYMBOL(vringh_iov_pull_iotlb); 13659ad9c49cSJason Wang 13669ad9c49cSJason Wang /** 13679ad9c49cSJason Wang * vringh_iov_push_iotlb - copy bytes into vring_iov. 13689ad9c49cSJason Wang * @vrh: the vring. 13699ad9c49cSJason Wang * @wiov: the wiov as passed to vringh_getdesc_iotlb() (updated as we consume) 13708009b0f4SStefano Garzarella * @src: the place to copy from. 13719ad9c49cSJason Wang * @len: the maximum length to copy. 13729ad9c49cSJason Wang * 13739ad9c49cSJason Wang * Returns the bytes copied <= len or a negative errno. 13749ad9c49cSJason Wang */ 13759ad9c49cSJason Wang ssize_t vringh_iov_push_iotlb(struct vringh *vrh, 13769ad9c49cSJason Wang struct vringh_kiov *wiov, 13779ad9c49cSJason Wang const void *src, size_t len) 13789ad9c49cSJason Wang { 13799ad9c49cSJason Wang return vringh_iov_xfer(vrh, wiov, (void *)src, len, xfer_to_iotlb); 13809ad9c49cSJason Wang } 13819ad9c49cSJason Wang EXPORT_SYMBOL(vringh_iov_push_iotlb); 13829ad9c49cSJason Wang 13839ad9c49cSJason Wang /** 13849ad9c49cSJason Wang * vringh_abandon_iotlb - we've decided not to handle the descriptor(s). 13859ad9c49cSJason Wang * @vrh: the vring. 13869ad9c49cSJason Wang * @num: the number of descriptors to put back (ie. num 13879ad9c49cSJason Wang * vringh_get_iotlb() to undo). 13889ad9c49cSJason Wang * 13899ad9c49cSJason Wang * The next vringh_get_iotlb() will return the old descriptor(s) again. 13909ad9c49cSJason Wang */ 13919ad9c49cSJason Wang void vringh_abandon_iotlb(struct vringh *vrh, unsigned int num) 13929ad9c49cSJason Wang { 13939ad9c49cSJason Wang /* We only update vring_avail_event(vr) when we want to be notified, 13949ad9c49cSJason Wang * so we haven't changed that yet. 13959ad9c49cSJason Wang */ 13969ad9c49cSJason Wang vrh->last_avail_idx -= num; 13979ad9c49cSJason Wang } 13989ad9c49cSJason Wang EXPORT_SYMBOL(vringh_abandon_iotlb); 13999ad9c49cSJason Wang 14009ad9c49cSJason Wang /** 14019ad9c49cSJason Wang * vringh_complete_iotlb - we've finished with descriptor, publish it. 14029ad9c49cSJason Wang * @vrh: the vring. 14039ad9c49cSJason Wang * @head: the head as filled in by vringh_getdesc_iotlb. 14049ad9c49cSJason Wang * @len: the length of data we have written. 14059ad9c49cSJason Wang * 14069ad9c49cSJason Wang * You should check vringh_need_notify_iotlb() after one or more calls 14079ad9c49cSJason Wang * to this function. 14089ad9c49cSJason Wang */ 14099ad9c49cSJason Wang int vringh_complete_iotlb(struct vringh *vrh, u16 head, u32 len) 14109ad9c49cSJason Wang { 14119ad9c49cSJason Wang struct vring_used_elem used; 14129ad9c49cSJason Wang 14139ad9c49cSJason Wang used.id = cpu_to_vringh32(vrh, head); 14149ad9c49cSJason Wang used.len = cpu_to_vringh32(vrh, len); 14159ad9c49cSJason Wang 14169ad9c49cSJason Wang return __vringh_complete(vrh, &used, 1, putu16_iotlb, putused_iotlb); 14179ad9c49cSJason Wang } 14189ad9c49cSJason Wang EXPORT_SYMBOL(vringh_complete_iotlb); 14199ad9c49cSJason Wang 14209ad9c49cSJason Wang /** 14219ad9c49cSJason Wang * vringh_notify_enable_iotlb - we want to know if something changes. 14229ad9c49cSJason Wang * @vrh: the vring. 14239ad9c49cSJason Wang * 14249ad9c49cSJason Wang * This always enables notifications, but returns false if there are 14259ad9c49cSJason Wang * now more buffers available in the vring. 14269ad9c49cSJason Wang */ 14279ad9c49cSJason Wang bool vringh_notify_enable_iotlb(struct vringh *vrh) 14289ad9c49cSJason Wang { 14299ad9c49cSJason Wang return __vringh_notify_enable(vrh, getu16_iotlb, putu16_iotlb); 14309ad9c49cSJason Wang } 14319ad9c49cSJason Wang EXPORT_SYMBOL(vringh_notify_enable_iotlb); 14329ad9c49cSJason Wang 14339ad9c49cSJason Wang /** 14349ad9c49cSJason Wang * vringh_notify_disable_iotlb - don't tell us if something changes. 14359ad9c49cSJason Wang * @vrh: the vring. 14369ad9c49cSJason Wang * 14379ad9c49cSJason Wang * This is our normal running state: we disable and then only enable when 14389ad9c49cSJason Wang * we're going to sleep. 14399ad9c49cSJason Wang */ 14409ad9c49cSJason Wang void vringh_notify_disable_iotlb(struct vringh *vrh) 14419ad9c49cSJason Wang { 14429ad9c49cSJason Wang __vringh_notify_disable(vrh, putu16_iotlb); 14439ad9c49cSJason Wang } 14449ad9c49cSJason Wang EXPORT_SYMBOL(vringh_notify_disable_iotlb); 14459ad9c49cSJason Wang 14469ad9c49cSJason Wang /** 14479ad9c49cSJason Wang * vringh_need_notify_iotlb - must we tell the other side about used buffers? 14489ad9c49cSJason Wang * @vrh: the vring we've called vringh_complete_iotlb() on. 14499ad9c49cSJason Wang * 14509ad9c49cSJason Wang * Returns -errno or 0 if we don't need to tell the other side, 1 if we do. 14519ad9c49cSJason Wang */ 14529ad9c49cSJason Wang int vringh_need_notify_iotlb(struct vringh *vrh) 14539ad9c49cSJason Wang { 14549ad9c49cSJason Wang return __vringh_need_notify(vrh, getu16_iotlb); 14559ad9c49cSJason Wang } 14569ad9c49cSJason Wang EXPORT_SYMBOL(vringh_need_notify_iotlb); 14579ad9c49cSJason Wang 14583302363aSMichael S. Tsirkin #endif 14599ad9c49cSJason Wang 1460f558a845SDave Jones MODULE_LICENSE("GPL"); 1461