1 /* 2 * Copyright (C) 2016 Red Hat, Inc. 3 * Author: Michael S. Tsirkin <mst@redhat.com> 4 * This work is licensed under the terms of the GNU GPL, version 2. 5 * 6 * Simple descriptor-based ring. virtio 0.9 compatible event index is used for 7 * signalling, unconditionally. 8 */ 9 #define _GNU_SOURCE 10 #include "main.h" 11 #include <stdlib.h> 12 #include <stdio.h> 13 #include <string.h> 14 15 /* Next - Where next entry will be written. 16 * Prev - "Next" value when event triggered previously. 17 * Event - Peer requested event after writing this entry. 18 */ 19 static inline bool need_event(unsigned short event, 20 unsigned short next, 21 unsigned short prev) 22 { 23 return (unsigned short)(next - event - 1) < (unsigned short)(next - prev); 24 } 25 26 /* Design: 27 * Guest adds descriptors with unique index values and DESC_HW in flags. 28 * Host overwrites used descriptors with correct len, index, and DESC_HW clear. 29 * Flags are always set last. 30 */ 31 #define DESC_HW 0x1 32 33 struct desc { 34 unsigned short flags; 35 unsigned short index; 36 unsigned len; 37 unsigned long long addr; 38 }; 39 40 /* how much padding is needed to avoid false cache sharing */ 41 #define HOST_GUEST_PADDING 0x80 42 43 /* Mostly read */ 44 struct event { 45 unsigned short kick_index; 46 unsigned char reserved0[HOST_GUEST_PADDING - 2]; 47 unsigned short call_index; 48 unsigned char reserved1[HOST_GUEST_PADDING - 2]; 49 }; 50 51 struct data { 52 void *buf; /* descriptor is writeable, we can't get buf from there */ 53 void *data; 54 } *data; 55 56 struct desc *ring; 57 struct event *event; 58 59 struct guest { 60 unsigned avail_idx; 61 unsigned last_used_idx; 62 unsigned num_free; 63 unsigned kicked_avail_idx; 64 unsigned char reserved[HOST_GUEST_PADDING - 12]; 65 } guest; 66 67 struct host { 68 /* we do not need to track last avail index 69 * unless we have more than one in flight. 70 */ 71 unsigned used_idx; 72 unsigned called_used_idx; 73 unsigned char reserved[HOST_GUEST_PADDING - 4]; 74 } host; 75 76 /* implemented by ring */ 77 void alloc_ring(void) 78 { 79 int ret; 80 int i; 81 82 ret = posix_memalign((void **)&ring, 0x1000, ring_size * sizeof *ring); 83 if (ret) { 84 perror("Unable to allocate ring buffer.\n"); 85 exit(3); 86 } 87 event = malloc(sizeof *event); 88 if (!event) { 89 perror("Unable to allocate event buffer.\n"); 90 exit(3); 91 } 92 memset(event, 0, sizeof *event); 93 guest.avail_idx = 0; 94 guest.kicked_avail_idx = -1; 95 guest.last_used_idx = 0; 96 host.used_idx = 0; 97 host.called_used_idx = -1; 98 for (i = 0; i < ring_size; ++i) { 99 struct desc desc = { 100 .index = i, 101 }; 102 ring[i] = desc; 103 } 104 guest.num_free = ring_size; 105 data = malloc(ring_size * sizeof *data); 106 if (!data) { 107 perror("Unable to allocate data buffer.\n"); 108 exit(3); 109 } 110 memset(data, 0, ring_size * sizeof *data); 111 } 112 113 /* guest side */ 114 int add_inbuf(unsigned len, void *buf, void *datap) 115 { 116 unsigned head, index; 117 118 if (!guest.num_free) 119 return -1; 120 121 guest.num_free--; 122 head = (ring_size - 1) & (guest.avail_idx++); 123 124 /* Start with a write. On MESI architectures this helps 125 * avoid a shared state with consumer that is polling this descriptor. 126 */ 127 ring[head].addr = (unsigned long)(void*)buf; 128 ring[head].len = len; 129 /* read below might bypass write above. That is OK because it's just an 130 * optimization. If this happens, we will get the cache line in a 131 * shared state which is unfortunate, but probably not worth it to 132 * add an explicit full barrier to avoid this. 133 */ 134 barrier(); 135 index = ring[head].index; 136 data[index].buf = buf; 137 data[index].data = datap; 138 /* Barrier A (for pairing) */ 139 smp_release(); 140 ring[head].flags = DESC_HW; 141 142 return 0; 143 } 144 145 void *get_buf(unsigned *lenp, void **bufp) 146 { 147 unsigned head = (ring_size - 1) & guest.last_used_idx; 148 unsigned index; 149 void *datap; 150 151 if (ring[head].flags & DESC_HW) 152 return NULL; 153 /* Barrier B (for pairing) */ 154 smp_acquire(); 155 *lenp = ring[head].len; 156 index = ring[head].index & (ring_size - 1); 157 datap = data[index].data; 158 *bufp = data[index].buf; 159 data[index].buf = NULL; 160 data[index].data = NULL; 161 guest.num_free++; 162 guest.last_used_idx++; 163 return datap; 164 } 165 166 void poll_used(void) 167 { 168 unsigned head = (ring_size - 1) & guest.last_used_idx; 169 170 while (ring[head].flags & DESC_HW) 171 busy_wait(); 172 } 173 174 void disable_call() 175 { 176 /* Doing nothing to disable calls might cause 177 * extra interrupts, but reduces the number of cache misses. 178 */ 179 } 180 181 bool enable_call() 182 { 183 unsigned head = (ring_size - 1) & guest.last_used_idx; 184 185 event->call_index = guest.last_used_idx; 186 /* Flush call index write */ 187 /* Barrier D (for pairing) */ 188 smp_mb(); 189 return ring[head].flags & DESC_HW; 190 } 191 192 void kick_available(void) 193 { 194 /* Flush in previous flags write */ 195 /* Barrier C (for pairing) */ 196 smp_mb(); 197 if (!need_event(event->kick_index, 198 guest.avail_idx, 199 guest.kicked_avail_idx)) 200 return; 201 202 guest.kicked_avail_idx = guest.avail_idx; 203 kick(); 204 } 205 206 /* host side */ 207 void disable_kick() 208 { 209 /* Doing nothing to disable kicks might cause 210 * extra interrupts, but reduces the number of cache misses. 211 */ 212 } 213 214 bool enable_kick() 215 { 216 unsigned head = (ring_size - 1) & host.used_idx; 217 218 event->kick_index = host.used_idx; 219 /* Barrier C (for pairing) */ 220 smp_mb(); 221 return !(ring[head].flags & DESC_HW); 222 } 223 224 void poll_avail(void) 225 { 226 unsigned head = (ring_size - 1) & host.used_idx; 227 228 while (!(ring[head].flags & DESC_HW)) 229 busy_wait(); 230 } 231 232 bool use_buf(unsigned *lenp, void **bufp) 233 { 234 unsigned head = (ring_size - 1) & host.used_idx; 235 236 if (!(ring[head].flags & DESC_HW)) 237 return false; 238 239 /* make sure length read below is not speculated */ 240 /* Barrier A (for pairing) */ 241 smp_acquire(); 242 243 /* simple in-order completion: we don't need 244 * to touch index at all. This also means we 245 * can just modify the descriptor in-place. 246 */ 247 ring[head].len--; 248 /* Make sure len is valid before flags. 249 * Note: alternative is to write len and flags in one access - 250 * possible on 64 bit architectures but wmb is free on Intel anyway 251 * so I have no way to test whether it's a gain. 252 */ 253 /* Barrier B (for pairing) */ 254 smp_release(); 255 ring[head].flags = 0; 256 host.used_idx++; 257 return true; 258 } 259 260 void call_used(void) 261 { 262 /* Flush in previous flags write */ 263 /* Barrier D (for pairing) */ 264 smp_mb(); 265 if (!need_event(event->call_index, 266 host.used_idx, 267 host.called_used_idx)) 268 return; 269 270 host.called_used_idx = host.used_idx; 271 call(); 272 } 273