1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* XDP user-space ring structure 3 * Copyright(c) 2018 Intel Corporation. 4 */ 5 6 #ifndef _LINUX_XSK_QUEUE_H 7 #define _LINUX_XSK_QUEUE_H 8 9 #include <linux/types.h> 10 #include <linux/if_xdp.h> 11 #include <net/xdp_sock.h> 12 13 struct xdp_ring { 14 u32 producer ____cacheline_aligned_in_smp; 15 u32 consumer ____cacheline_aligned_in_smp; 16 u32 flags; 17 }; 18 19 /* Used for the RX and TX queues for packets */ 20 struct xdp_rxtx_ring { 21 struct xdp_ring ptrs; 22 struct xdp_desc desc[0] ____cacheline_aligned_in_smp; 23 }; 24 25 /* Used for the fill and completion queues for buffers */ 26 struct xdp_umem_ring { 27 struct xdp_ring ptrs; 28 u64 desc[0] ____cacheline_aligned_in_smp; 29 }; 30 31 struct xsk_queue { 32 u64 chunk_mask; 33 u64 size; 34 u32 ring_mask; 35 u32 nentries; 36 u32 cached_prod; 37 u32 cached_cons; 38 struct xdp_ring *ring; 39 u64 invalid_descs; 40 }; 41 42 /* The structure of the shared state of the rings are the same as the 43 * ring buffer in kernel/events/ring_buffer.c. For the Rx and completion 44 * ring, the kernel is the producer and user space is the consumer. For 45 * the Tx and fill rings, the kernel is the consumer and user space is 46 * the producer. 47 * 48 * producer consumer 49 * 50 * if (LOAD ->consumer) { LOAD ->producer 51 * (A) smp_rmb() (C) 52 * STORE $data LOAD $data 53 * smp_wmb() (B) smp_mb() (D) 54 * STORE ->producer STORE ->consumer 55 * } 56 * 57 * (A) pairs with (D), and (B) pairs with (C). 58 * 59 * Starting with (B), it protects the data from being written after 60 * the producer pointer. If this barrier was missing, the consumer 61 * could observe the producer pointer being set and thus load the data 62 * before the producer has written the new data. The consumer would in 63 * this case load the old data. 64 * 65 * (C) protects the consumer from speculatively loading the data before 66 * the producer pointer actually has been read. If we do not have this 67 * barrier, some architectures could load old data as speculative loads 68 * are not discarded as the CPU does not know there is a dependency 69 * between ->producer and data. 70 * 71 * (A) is a control dependency that separates the load of ->consumer 72 * from the stores of $data. In case ->consumer indicates there is no 73 * room in the buffer to store $data we do not. So no barrier is needed. 74 * 75 * (D) protects the load of the data to be observed to happen after the 76 * store of the consumer pointer. If we did not have this memory 77 * barrier, the producer could observe the consumer pointer being set 78 * and overwrite the data with a new value before the consumer got the 79 * chance to read the old value. The consumer would thus miss reading 80 * the old entry and very likely read the new entry twice, once right 81 * now and again after circling through the ring. 82 */ 83 84 /* The operations on the rings are the following: 85 * 86 * producer consumer 87 * 88 * RESERVE entries PEEK in the ring for entries 89 * WRITE data into the ring READ data from the ring 90 * SUBMIT entries RELEASE entries 91 * 92 * The producer reserves one or more entries in the ring. It can then 93 * fill in these entries and finally submit them so that they can be 94 * seen and read by the consumer. 95 * 96 * The consumer peeks into the ring to see if the producer has written 97 * any new entries. If so, the producer can then read these entries 98 * and when it is done reading them release them back to the producer 99 * so that the producer can use these slots to fill in new entries. 100 * 101 * The function names below reflect these operations. 102 */ 103 104 /* Functions that read and validate content from consumer rings. */ 105 106 static inline bool xskq_cons_crosses_non_contig_pg(struct xdp_umem *umem, 107 u64 addr, 108 u64 length) 109 { 110 bool cross_pg = (addr & (PAGE_SIZE - 1)) + length > PAGE_SIZE; 111 bool next_pg_contig = 112 (unsigned long)umem->pages[(addr >> PAGE_SHIFT)].addr & 113 XSK_NEXT_PG_CONTIG_MASK; 114 115 return cross_pg && !next_pg_contig; 116 } 117 118 static inline bool xskq_cons_is_valid_unaligned(struct xsk_queue *q, 119 u64 addr, 120 u64 length, 121 struct xdp_umem *umem) 122 { 123 u64 base_addr = xsk_umem_extract_addr(addr); 124 125 addr = xsk_umem_add_offset_to_addr(addr); 126 if (base_addr >= q->size || addr >= q->size || 127 xskq_cons_crosses_non_contig_pg(umem, addr, length)) { 128 q->invalid_descs++; 129 return false; 130 } 131 132 return true; 133 } 134 135 static inline bool xskq_cons_is_valid_addr(struct xsk_queue *q, u64 addr) 136 { 137 if (addr >= q->size) { 138 q->invalid_descs++; 139 return false; 140 } 141 142 return true; 143 } 144 145 static inline bool xskq_cons_read_addr(struct xsk_queue *q, u64 *addr, 146 struct xdp_umem *umem) 147 { 148 struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; 149 150 while (q->cached_cons != q->cached_prod) { 151 u32 idx = q->cached_cons & q->ring_mask; 152 153 *addr = ring->desc[idx] & q->chunk_mask; 154 155 if (umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG) { 156 if (xskq_cons_is_valid_unaligned(q, *addr, 157 umem->chunk_size_nohr, 158 umem)) 159 return true; 160 goto out; 161 } 162 163 if (xskq_cons_is_valid_addr(q, *addr)) 164 return true; 165 166 out: 167 q->cached_cons++; 168 } 169 170 return false; 171 } 172 173 static inline bool xskq_cons_is_valid_desc(struct xsk_queue *q, 174 struct xdp_desc *d, 175 struct xdp_umem *umem) 176 { 177 if (umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG) { 178 if (!xskq_cons_is_valid_unaligned(q, d->addr, d->len, umem)) 179 return false; 180 181 if (d->len > umem->chunk_size_nohr || d->options) { 182 q->invalid_descs++; 183 return false; 184 } 185 186 return true; 187 } 188 189 if (!xskq_cons_is_valid_addr(q, d->addr)) 190 return false; 191 192 if (((d->addr + d->len) & q->chunk_mask) != (d->addr & q->chunk_mask) || 193 d->options) { 194 q->invalid_descs++; 195 return false; 196 } 197 198 return true; 199 } 200 201 static inline bool xskq_cons_read_desc(struct xsk_queue *q, 202 struct xdp_desc *desc, 203 struct xdp_umem *umem) 204 { 205 while (q->cached_cons != q->cached_prod) { 206 struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring; 207 u32 idx = q->cached_cons & q->ring_mask; 208 209 *desc = ring->desc[idx]; 210 if (xskq_cons_is_valid_desc(q, desc, umem)) 211 return true; 212 213 q->cached_cons++; 214 } 215 216 return false; 217 } 218 219 /* Functions for consumers */ 220 221 static inline void __xskq_cons_release(struct xsk_queue *q) 222 { 223 smp_mb(); /* D, matches A */ 224 WRITE_ONCE(q->ring->consumer, q->cached_cons); 225 } 226 227 static inline void __xskq_cons_peek(struct xsk_queue *q) 228 { 229 /* Refresh the local pointer */ 230 q->cached_prod = READ_ONCE(q->ring->producer); 231 smp_rmb(); /* C, matches B */ 232 } 233 234 static inline void xskq_cons_get_entries(struct xsk_queue *q) 235 { 236 __xskq_cons_release(q); 237 __xskq_cons_peek(q); 238 } 239 240 static inline bool xskq_cons_has_entries(struct xsk_queue *q, u32 cnt) 241 { 242 u32 entries = q->cached_prod - q->cached_cons; 243 244 if (entries >= cnt) 245 return true; 246 247 __xskq_cons_peek(q); 248 entries = q->cached_prod - q->cached_cons; 249 250 return entries >= cnt; 251 } 252 253 static inline bool xskq_cons_peek_addr(struct xsk_queue *q, u64 *addr, 254 struct xdp_umem *umem) 255 { 256 if (q->cached_prod == q->cached_cons) 257 xskq_cons_get_entries(q); 258 return xskq_cons_read_addr(q, addr, umem); 259 } 260 261 static inline bool xskq_cons_peek_desc(struct xsk_queue *q, 262 struct xdp_desc *desc, 263 struct xdp_umem *umem) 264 { 265 if (q->cached_prod == q->cached_cons) 266 xskq_cons_get_entries(q); 267 return xskq_cons_read_desc(q, desc, umem); 268 } 269 270 static inline void xskq_cons_release(struct xsk_queue *q) 271 { 272 /* To improve performance, only update local state here. 273 * Reflect this to global state when we get new entries 274 * from the ring in xskq_cons_get_entries() and whenever 275 * Rx or Tx processing are completed in the NAPI loop. 276 */ 277 q->cached_cons++; 278 } 279 280 static inline bool xskq_cons_is_full(struct xsk_queue *q) 281 { 282 /* No barriers needed since data is not accessed */ 283 return READ_ONCE(q->ring->producer) - READ_ONCE(q->ring->consumer) == 284 q->nentries; 285 } 286 287 /* Functions for producers */ 288 289 static inline bool xskq_prod_is_full(struct xsk_queue *q) 290 { 291 u32 free_entries = q->nentries - (q->cached_prod - q->cached_cons); 292 293 if (free_entries) 294 return false; 295 296 /* Refresh the local tail pointer */ 297 q->cached_cons = READ_ONCE(q->ring->consumer); 298 free_entries = q->nentries - (q->cached_prod - q->cached_cons); 299 300 return !free_entries; 301 } 302 303 static inline int xskq_prod_reserve(struct xsk_queue *q) 304 { 305 if (xskq_prod_is_full(q)) 306 return -ENOSPC; 307 308 /* A, matches D */ 309 q->cached_prod++; 310 return 0; 311 } 312 313 static inline int xskq_prod_reserve_addr(struct xsk_queue *q, u64 addr) 314 { 315 struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; 316 317 if (xskq_prod_is_full(q)) 318 return -ENOSPC; 319 320 /* A, matches D */ 321 ring->desc[q->cached_prod++ & q->ring_mask] = addr; 322 return 0; 323 } 324 325 static inline int xskq_prod_reserve_desc(struct xsk_queue *q, 326 u64 addr, u32 len) 327 { 328 struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring; 329 u32 idx; 330 331 if (xskq_prod_is_full(q)) 332 return -ENOSPC; 333 334 /* A, matches D */ 335 idx = q->cached_prod++ & q->ring_mask; 336 ring->desc[idx].addr = addr; 337 ring->desc[idx].len = len; 338 339 return 0; 340 } 341 342 static inline void __xskq_prod_submit(struct xsk_queue *q, u32 idx) 343 { 344 smp_wmb(); /* B, matches C */ 345 346 WRITE_ONCE(q->ring->producer, idx); 347 } 348 349 static inline void xskq_prod_submit(struct xsk_queue *q) 350 { 351 __xskq_prod_submit(q, q->cached_prod); 352 } 353 354 static inline void xskq_prod_submit_addr(struct xsk_queue *q, u64 addr) 355 { 356 struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; 357 u32 idx = q->ring->producer; 358 359 ring->desc[idx++ & q->ring_mask] = addr; 360 361 __xskq_prod_submit(q, idx); 362 } 363 364 static inline void xskq_prod_submit_n(struct xsk_queue *q, u32 nb_entries) 365 { 366 __xskq_prod_submit(q, q->ring->producer + nb_entries); 367 } 368 369 static inline bool xskq_prod_is_empty(struct xsk_queue *q) 370 { 371 /* No barriers needed since data is not accessed */ 372 return READ_ONCE(q->ring->consumer) == READ_ONCE(q->ring->producer); 373 } 374 375 /* For both producers and consumers */ 376 377 static inline u64 xskq_nb_invalid_descs(struct xsk_queue *q) 378 { 379 return q ? q->invalid_descs : 0; 380 } 381 382 void xskq_set_umem(struct xsk_queue *q, u64 size, u64 chunk_mask); 383 struct xsk_queue *xskq_create(u32 nentries, bool umem_queue); 384 void xskq_destroy(struct xsk_queue *q_ops); 385 386 /* Executed by the core when the entire UMEM gets freed */ 387 void xsk_reuseq_destroy(struct xdp_umem *umem); 388 389 #endif /* _LINUX_XSK_QUEUE_H */ 390