1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2016 Red Hat, Inc. 4 * Author: Michael S. Tsirkin <mst@redhat.com> 5 * 6 * Partial implementation of virtio 0.9. event index is used for signalling, 7 * unconditionally. Design roughly follows linux kernel implementation in order 8 * to be able to judge its performance. 9 */ 10 #define _GNU_SOURCE 11 #include "main.h" 12 #include <stdlib.h> 13 #include <stdio.h> 14 #include <assert.h> 15 #include <string.h> 16 #include <linux/virtio_ring.h> 17 18 struct data { 19 void *data; 20 } *data; 21 22 struct vring ring; 23 24 /* enabling the below activates experimental ring polling code 25 * (which skips index reads on consumer in favor of looking at 26 * high bits of ring id ^ 0x8000). 27 */ 28 /* #ifdef RING_POLL */ 29 /* enabling the below activates experimental in-order code 30 * (which skips ring updates and reads and writes len in descriptor). 31 */ 32 /* #ifdef INORDER */ 33 34 #if defined(RING_POLL) && defined(INORDER) 35 #error "RING_POLL and INORDER are mutually exclusive" 36 #endif 37 38 /* how much padding is needed to avoid false cache sharing */ 39 #define HOST_GUEST_PADDING 0x80 40 41 struct guest { 42 unsigned short avail_idx; 43 unsigned short last_used_idx; 44 unsigned short num_free; 45 unsigned short kicked_avail_idx; 46 #ifndef INORDER 47 unsigned short free_head; 48 #else 49 unsigned short reserved_free_head; 50 #endif 51 unsigned char reserved[HOST_GUEST_PADDING - 10]; 52 } guest; 53 54 struct host { 55 /* we do not need to track last avail index 56 * unless we have more than one in flight. 57 */ 58 unsigned short used_idx; 59 unsigned short called_used_idx; 60 unsigned char reserved[HOST_GUEST_PADDING - 4]; 61 } host; 62 63 /* implemented by ring */ 64 void alloc_ring(void) 65 { 66 int ret; 67 int i; 68 void *p; 69 70 ret = posix_memalign(&p, 0x1000, vring_size(ring_size, 0x1000)); 71 if (ret) { 72 perror("Unable to allocate ring buffer.\n"); 73 exit(3); 74 } 75 memset(p, 0, vring_size(ring_size, 0x1000)); 76 vring_init(&ring, ring_size, p, 0x1000); 77 78 guest.avail_idx = 0; 79 guest.kicked_avail_idx = -1; 80 guest.last_used_idx = 0; 81 #ifndef INORDER 82 /* Put everything in free lists. */ 83 guest.free_head = 0; 84 #endif 85 for (i = 0; i < ring_size - 1; i++) 86 ring.desc[i].next = i + 1; 87 host.used_idx = 0; 88 host.called_used_idx = -1; 89 guest.num_free = ring_size; 90 data = malloc(ring_size * sizeof *data); 91 if (!data) { 92 perror("Unable to allocate data buffer.\n"); 93 exit(3); 94 } 95 memset(data, 0, ring_size * sizeof *data); 96 } 97 98 /* guest side */ 99 int add_inbuf(unsigned len, void *buf, void *datap) 100 { 101 unsigned head; 102 #ifndef INORDER 103 unsigned avail; 104 #endif 105 struct vring_desc *desc; 106 107 if (!guest.num_free) 108 return -1; 109 110 #ifdef INORDER 111 head = (ring_size - 1) & (guest.avail_idx++); 112 #else 113 head = guest.free_head; 114 #endif 115 guest.num_free--; 116 117 desc = ring.desc; 118 desc[head].flags = VRING_DESC_F_NEXT; 119 desc[head].addr = (unsigned long)(void *)buf; 120 desc[head].len = len; 121 /* We do it like this to simulate the way 122 * we'd have to flip it if we had multiple 123 * descriptors. 124 */ 125 desc[head].flags &= ~VRING_DESC_F_NEXT; 126 #ifndef INORDER 127 guest.free_head = desc[head].next; 128 #endif 129 130 data[head].data = datap; 131 132 #ifdef RING_POLL 133 /* Barrier A (for pairing) */ 134 smp_release(); 135 avail = guest.avail_idx++; 136 ring.avail->ring[avail & (ring_size - 1)] = 137 (head | (avail & ~(ring_size - 1))) ^ 0x8000; 138 #else 139 #ifndef INORDER 140 /* Barrier A (for pairing) */ 141 smp_release(); 142 avail = (ring_size - 1) & (guest.avail_idx++); 143 ring.avail->ring[avail] = head; 144 #endif 145 /* Barrier A (for pairing) */ 146 smp_release(); 147 #endif 148 ring.avail->idx = guest.avail_idx; 149 return 0; 150 } 151 152 void *get_buf(unsigned *lenp, void **bufp) 153 { 154 unsigned head; 155 unsigned index; 156 void *datap; 157 158 #ifdef RING_POLL 159 head = (ring_size - 1) & guest.last_used_idx; 160 index = ring.used->ring[head].id; 161 if ((index ^ guest.last_used_idx ^ 0x8000) & ~(ring_size - 1)) 162 return NULL; 163 /* Barrier B (for pairing) */ 164 smp_acquire(); 165 index &= ring_size - 1; 166 #else 167 if (ring.used->idx == guest.last_used_idx) 168 return NULL; 169 /* Barrier B (for pairing) */ 170 smp_acquire(); 171 #ifdef INORDER 172 head = (ring_size - 1) & guest.last_used_idx; 173 index = head; 174 #else 175 head = (ring_size - 1) & guest.last_used_idx; 176 index = ring.used->ring[head].id; 177 #endif 178 179 #endif 180 #ifdef INORDER 181 *lenp = ring.desc[index].len; 182 #else 183 *lenp = ring.used->ring[head].len; 184 #endif 185 datap = data[index].data; 186 *bufp = (void*)(unsigned long)ring.desc[index].addr; 187 data[index].data = NULL; 188 #ifndef INORDER 189 ring.desc[index].next = guest.free_head; 190 guest.free_head = index; 191 #endif 192 guest.num_free++; 193 guest.last_used_idx++; 194 return datap; 195 } 196 197 bool used_empty() 198 { 199 unsigned short last_used_idx = guest.last_used_idx; 200 #ifdef RING_POLL 201 unsigned short head = last_used_idx & (ring_size - 1); 202 unsigned index = ring.used->ring[head].id; 203 204 return (index ^ last_used_idx ^ 0x8000) & ~(ring_size - 1); 205 #else 206 return ring.used->idx == last_used_idx; 207 #endif 208 } 209 210 void disable_call() 211 { 212 /* Doing nothing to disable calls might cause 213 * extra interrupts, but reduces the number of cache misses. 214 */ 215 } 216 217 bool enable_call() 218 { 219 vring_used_event(&ring) = guest.last_used_idx; 220 /* Flush call index write */ 221 /* Barrier D (for pairing) */ 222 smp_mb(); 223 return used_empty(); 224 } 225 226 void kick_available(void) 227 { 228 bool need; 229 230 /* Flush in previous flags write */ 231 /* Barrier C (for pairing) */ 232 smp_mb(); 233 need = vring_need_event(vring_avail_event(&ring), 234 guest.avail_idx, 235 guest.kicked_avail_idx); 236 237 guest.kicked_avail_idx = guest.avail_idx; 238 if (need) 239 kick(); 240 } 241 242 /* host side */ 243 void disable_kick() 244 { 245 /* Doing nothing to disable kicks might cause 246 * extra interrupts, but reduces the number of cache misses. 247 */ 248 } 249 250 bool enable_kick() 251 { 252 vring_avail_event(&ring) = host.used_idx; 253 /* Barrier C (for pairing) */ 254 smp_mb(); 255 return avail_empty(); 256 } 257 258 bool avail_empty() 259 { 260 unsigned head = host.used_idx; 261 #ifdef RING_POLL 262 unsigned index = ring.avail->ring[head & (ring_size - 1)]; 263 264 return ((index ^ head ^ 0x8000) & ~(ring_size - 1)); 265 #else 266 return head == ring.avail->idx; 267 #endif 268 } 269 270 bool use_buf(unsigned *lenp, void **bufp) 271 { 272 unsigned used_idx = host.used_idx; 273 struct vring_desc *desc; 274 unsigned head; 275 276 #ifdef RING_POLL 277 head = ring.avail->ring[used_idx & (ring_size - 1)]; 278 if ((used_idx ^ head ^ 0x8000) & ~(ring_size - 1)) 279 return false; 280 /* Barrier A (for pairing) */ 281 smp_acquire(); 282 283 used_idx &= ring_size - 1; 284 desc = &ring.desc[head & (ring_size - 1)]; 285 #else 286 if (used_idx == ring.avail->idx) 287 return false; 288 289 /* Barrier A (for pairing) */ 290 smp_acquire(); 291 292 used_idx &= ring_size - 1; 293 #ifdef INORDER 294 head = used_idx; 295 #else 296 head = ring.avail->ring[used_idx]; 297 #endif 298 desc = &ring.desc[head]; 299 #endif 300 301 *lenp = desc->len; 302 *bufp = (void *)(unsigned long)desc->addr; 303 304 #ifdef INORDER 305 desc->len = desc->len - 1; 306 #else 307 /* now update used ring */ 308 ring.used->ring[used_idx].id = head; 309 ring.used->ring[used_idx].len = desc->len - 1; 310 #endif 311 /* Barrier B (for pairing) */ 312 smp_release(); 313 host.used_idx++; 314 ring.used->idx = host.used_idx; 315 316 return true; 317 } 318 319 void call_used(void) 320 { 321 bool need; 322 323 /* Flush in previous flags write */ 324 /* Barrier D (for pairing) */ 325 smp_mb(); 326 need = vring_need_event(vring_used_event(&ring), 327 host.used_idx, 328 host.called_used_idx); 329 330 host.called_used_idx = host.used_idx; 331 if (need) 332 call(); 333 } 334