1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /*
3 * Definitions for the 'struct ptr_ring' datastructure.
4 *
5 * Author:
6 * Michael S. Tsirkin <mst@redhat.com>
7 *
8 * Copyright (C) 2016 Red Hat, Inc.
9 *
10 * This is a limited-size FIFO maintaining pointers in FIFO order, with
11 * one CPU producing entries and another consuming entries from a FIFO.
12 *
13 * This implementation tries to minimize cache-contention when there is a
14 * single producer and a single consumer CPU.
15 */
16
17 #ifndef _LINUX_PTR_RING_H
18 #define _LINUX_PTR_RING_H 1
19
20 #ifdef __KERNEL__
21 #include <linux/spinlock.h>
22 #include <linux/cache.h>
23 #include <linux/types.h>
24 #include <linux/compiler.h>
25 #include <linux/slab.h>
26 #include <linux/mm.h>
27 #include <asm/errno.h>
28 #endif
29
30 struct ptr_ring {
31 int producer ____cacheline_aligned_in_smp;
32 spinlock_t producer_lock;
33 int consumer_head ____cacheline_aligned_in_smp; /* next valid entry */
34 int consumer_tail; /* next entry to invalidate */
35 spinlock_t consumer_lock;
36 /* Shared consumer/producer data */
37 /* Read-only by both the producer and the consumer */
38 int size ____cacheline_aligned_in_smp; /* max entries in queue */
39 int batch; /* number of entries to consume in a batch */
40 void **queue;
41 };
42
43 /* Note: callers invoking this in a loop must use a compiler barrier,
44 * for example cpu_relax().
45 *
46 * NB: this is unlike __ptr_ring_empty in that callers must hold producer_lock:
47 * see e.g. ptr_ring_full.
48 */
__ptr_ring_full(struct ptr_ring * r)49 static inline bool __ptr_ring_full(struct ptr_ring *r)
50 {
51 return r->queue[r->producer];
52 }
53
ptr_ring_full(struct ptr_ring * r)54 static inline bool ptr_ring_full(struct ptr_ring *r)
55 {
56 bool ret;
57
58 spin_lock(&r->producer_lock);
59 ret = __ptr_ring_full(r);
60 spin_unlock(&r->producer_lock);
61
62 return ret;
63 }
64
ptr_ring_full_irq(struct ptr_ring * r)65 static inline bool ptr_ring_full_irq(struct ptr_ring *r)
66 {
67 bool ret;
68
69 spin_lock_irq(&r->producer_lock);
70 ret = __ptr_ring_full(r);
71 spin_unlock_irq(&r->producer_lock);
72
73 return ret;
74 }
75
ptr_ring_full_any(struct ptr_ring * r)76 static inline bool ptr_ring_full_any(struct ptr_ring *r)
77 {
78 unsigned long flags;
79 bool ret;
80
81 spin_lock_irqsave(&r->producer_lock, flags);
82 ret = __ptr_ring_full(r);
83 spin_unlock_irqrestore(&r->producer_lock, flags);
84
85 return ret;
86 }
87
ptr_ring_full_bh(struct ptr_ring * r)88 static inline bool ptr_ring_full_bh(struct ptr_ring *r)
89 {
90 bool ret;
91
92 spin_lock_bh(&r->producer_lock);
93 ret = __ptr_ring_full(r);
94 spin_unlock_bh(&r->producer_lock);
95
96 return ret;
97 }
98
99 /* Note: callers invoking this in a loop must use a compiler barrier,
100 * for example cpu_relax(). Callers must hold producer_lock.
101 * Callers are responsible for making sure pointer that is being queued
102 * points to a valid data.
103 */
__ptr_ring_produce(struct ptr_ring * r,void * ptr)104 static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr)
105 {
106 if (unlikely(!r->size) || r->queue[r->producer])
107 return -ENOSPC;
108
109 /* Make sure the pointer we are storing points to a valid data. */
110 /* Pairs with the dependency ordering in __ptr_ring_consume. */
111 smp_wmb();
112
113 WRITE_ONCE(r->queue[r->producer++], ptr);
114 if (unlikely(r->producer >= r->size))
115 r->producer = 0;
116 return 0;
117 }
118
119 /*
120 * Note: resize (below) nests producer lock within consumer lock, so if you
121 * consume in interrupt or BH context, you must disable interrupts/BH when
122 * calling this.
123 */
ptr_ring_produce(struct ptr_ring * r,void * ptr)124 static inline int ptr_ring_produce(struct ptr_ring *r, void *ptr)
125 {
126 int ret;
127
128 spin_lock(&r->producer_lock);
129 ret = __ptr_ring_produce(r, ptr);
130 spin_unlock(&r->producer_lock);
131
132 return ret;
133 }
134
ptr_ring_produce_irq(struct ptr_ring * r,void * ptr)135 static inline int ptr_ring_produce_irq(struct ptr_ring *r, void *ptr)
136 {
137 int ret;
138
139 spin_lock_irq(&r->producer_lock);
140 ret = __ptr_ring_produce(r, ptr);
141 spin_unlock_irq(&r->producer_lock);
142
143 return ret;
144 }
145
ptr_ring_produce_any(struct ptr_ring * r,void * ptr)146 static inline int ptr_ring_produce_any(struct ptr_ring *r, void *ptr)
147 {
148 unsigned long flags;
149 int ret;
150
151 spin_lock_irqsave(&r->producer_lock, flags);
152 ret = __ptr_ring_produce(r, ptr);
153 spin_unlock_irqrestore(&r->producer_lock, flags);
154
155 return ret;
156 }
157
ptr_ring_produce_bh(struct ptr_ring * r,void * ptr)158 static inline int ptr_ring_produce_bh(struct ptr_ring *r, void *ptr)
159 {
160 int ret;
161
162 spin_lock_bh(&r->producer_lock);
163 ret = __ptr_ring_produce(r, ptr);
164 spin_unlock_bh(&r->producer_lock);
165
166 return ret;
167 }
168
__ptr_ring_peek(struct ptr_ring * r)169 static inline void *__ptr_ring_peek(struct ptr_ring *r)
170 {
171 if (likely(r->size))
172 return READ_ONCE(r->queue[r->consumer_head]);
173 return NULL;
174 }
175
176 /*
177 * Test ring empty status without taking any locks.
178 *
179 * NB: This is only safe to call if ring is never resized.
180 *
181 * However, if some other CPU consumes ring entries at the same time, the value
182 * returned is not guaranteed to be correct.
183 *
184 * In this case - to avoid incorrectly detecting the ring
185 * as empty - the CPU consuming the ring entries is responsible
186 * for either consuming all ring entries until the ring is empty,
187 * or synchronizing with some other CPU and causing it to
188 * re-test __ptr_ring_empty and/or consume the ring enteries
189 * after the synchronization point.
190 *
191 * Note: callers invoking this in a loop must use a compiler barrier,
192 * for example cpu_relax().
193 */
__ptr_ring_empty(struct ptr_ring * r)194 static inline bool __ptr_ring_empty(struct ptr_ring *r)
195 {
196 if (likely(r->size))
197 return !r->queue[READ_ONCE(r->consumer_head)];
198 return true;
199 }
200
ptr_ring_empty(struct ptr_ring * r)201 static inline bool ptr_ring_empty(struct ptr_ring *r)
202 {
203 bool ret;
204
205 spin_lock(&r->consumer_lock);
206 ret = __ptr_ring_empty(r);
207 spin_unlock(&r->consumer_lock);
208
209 return ret;
210 }
211
ptr_ring_empty_irq(struct ptr_ring * r)212 static inline bool ptr_ring_empty_irq(struct ptr_ring *r)
213 {
214 bool ret;
215
216 spin_lock_irq(&r->consumer_lock);
217 ret = __ptr_ring_empty(r);
218 spin_unlock_irq(&r->consumer_lock);
219
220 return ret;
221 }
222
ptr_ring_empty_any(struct ptr_ring * r)223 static inline bool ptr_ring_empty_any(struct ptr_ring *r)
224 {
225 unsigned long flags;
226 bool ret;
227
228 spin_lock_irqsave(&r->consumer_lock, flags);
229 ret = __ptr_ring_empty(r);
230 spin_unlock_irqrestore(&r->consumer_lock, flags);
231
232 return ret;
233 }
234
ptr_ring_empty_bh(struct ptr_ring * r)235 static inline bool ptr_ring_empty_bh(struct ptr_ring *r)
236 {
237 bool ret;
238
239 spin_lock_bh(&r->consumer_lock);
240 ret = __ptr_ring_empty(r);
241 spin_unlock_bh(&r->consumer_lock);
242
243 return ret;
244 }
245
246 /* Must only be called after __ptr_ring_peek returned !NULL */
__ptr_ring_discard_one(struct ptr_ring * r)247 static inline void __ptr_ring_discard_one(struct ptr_ring *r)
248 {
249 /* Fundamentally, what we want to do is update consumer
250 * index and zero out the entry so producer can reuse it.
251 * Doing it naively at each consume would be as simple as:
252 * consumer = r->consumer;
253 * r->queue[consumer++] = NULL;
254 * if (unlikely(consumer >= r->size))
255 * consumer = 0;
256 * r->consumer = consumer;
257 * but that is suboptimal when the ring is full as producer is writing
258 * out new entries in the same cache line. Defer these updates until a
259 * batch of entries has been consumed.
260 */
261 /* Note: we must keep consumer_head valid at all times for __ptr_ring_empty
262 * to work correctly.
263 */
264 int consumer_head = r->consumer_head;
265 int head = consumer_head++;
266
267 /* Once we have processed enough entries invalidate them in
268 * the ring all at once so producer can reuse their space in the ring.
269 * We also do this when we reach end of the ring - not mandatory
270 * but helps keep the implementation simple.
271 */
272 if (unlikely(consumer_head - r->consumer_tail >= r->batch ||
273 consumer_head >= r->size)) {
274 /* Zero out entries in the reverse order: this way we touch the
275 * cache line that producer might currently be reading the last;
276 * producer won't make progress and touch other cache lines
277 * besides the first one until we write out all entries.
278 */
279 while (likely(head >= r->consumer_tail))
280 r->queue[head--] = NULL;
281 r->consumer_tail = consumer_head;
282 }
283 if (unlikely(consumer_head >= r->size)) {
284 consumer_head = 0;
285 r->consumer_tail = 0;
286 }
287 /* matching READ_ONCE in __ptr_ring_empty for lockless tests */
288 WRITE_ONCE(r->consumer_head, consumer_head);
289 }
290
__ptr_ring_consume(struct ptr_ring * r)291 static inline void *__ptr_ring_consume(struct ptr_ring *r)
292 {
293 void *ptr;
294
295 /* The READ_ONCE in __ptr_ring_peek guarantees that anyone
296 * accessing data through the pointer is up to date. Pairs
297 * with smp_wmb in __ptr_ring_produce.
298 */
299 ptr = __ptr_ring_peek(r);
300 if (ptr)
301 __ptr_ring_discard_one(r);
302
303 return ptr;
304 }
305
__ptr_ring_consume_batched(struct ptr_ring * r,void ** array,int n)306 static inline int __ptr_ring_consume_batched(struct ptr_ring *r,
307 void **array, int n)
308 {
309 void *ptr;
310 int i;
311
312 for (i = 0; i < n; i++) {
313 ptr = __ptr_ring_consume(r);
314 if (!ptr)
315 break;
316 array[i] = ptr;
317 }
318
319 return i;
320 }
321
322 /*
323 * Note: resize (below) nests producer lock within consumer lock, so if you
324 * call this in interrupt or BH context, you must disable interrupts/BH when
325 * producing.
326 */
ptr_ring_consume(struct ptr_ring * r)327 static inline void *ptr_ring_consume(struct ptr_ring *r)
328 {
329 void *ptr;
330
331 spin_lock(&r->consumer_lock);
332 ptr = __ptr_ring_consume(r);
333 spin_unlock(&r->consumer_lock);
334
335 return ptr;
336 }
337
ptr_ring_consume_irq(struct ptr_ring * r)338 static inline void *ptr_ring_consume_irq(struct ptr_ring *r)
339 {
340 void *ptr;
341
342 spin_lock_irq(&r->consumer_lock);
343 ptr = __ptr_ring_consume(r);
344 spin_unlock_irq(&r->consumer_lock);
345
346 return ptr;
347 }
348
ptr_ring_consume_any(struct ptr_ring * r)349 static inline void *ptr_ring_consume_any(struct ptr_ring *r)
350 {
351 unsigned long flags;
352 void *ptr;
353
354 spin_lock_irqsave(&r->consumer_lock, flags);
355 ptr = __ptr_ring_consume(r);
356 spin_unlock_irqrestore(&r->consumer_lock, flags);
357
358 return ptr;
359 }
360
ptr_ring_consume_bh(struct ptr_ring * r)361 static inline void *ptr_ring_consume_bh(struct ptr_ring *r)
362 {
363 void *ptr;
364
365 spin_lock_bh(&r->consumer_lock);
366 ptr = __ptr_ring_consume(r);
367 spin_unlock_bh(&r->consumer_lock);
368
369 return ptr;
370 }
371
ptr_ring_consume_batched(struct ptr_ring * r,void ** array,int n)372 static inline int ptr_ring_consume_batched(struct ptr_ring *r,
373 void **array, int n)
374 {
375 int ret;
376
377 spin_lock(&r->consumer_lock);
378 ret = __ptr_ring_consume_batched(r, array, n);
379 spin_unlock(&r->consumer_lock);
380
381 return ret;
382 }
383
ptr_ring_consume_batched_irq(struct ptr_ring * r,void ** array,int n)384 static inline int ptr_ring_consume_batched_irq(struct ptr_ring *r,
385 void **array, int n)
386 {
387 int ret;
388
389 spin_lock_irq(&r->consumer_lock);
390 ret = __ptr_ring_consume_batched(r, array, n);
391 spin_unlock_irq(&r->consumer_lock);
392
393 return ret;
394 }
395
ptr_ring_consume_batched_any(struct ptr_ring * r,void ** array,int n)396 static inline int ptr_ring_consume_batched_any(struct ptr_ring *r,
397 void **array, int n)
398 {
399 unsigned long flags;
400 int ret;
401
402 spin_lock_irqsave(&r->consumer_lock, flags);
403 ret = __ptr_ring_consume_batched(r, array, n);
404 spin_unlock_irqrestore(&r->consumer_lock, flags);
405
406 return ret;
407 }
408
ptr_ring_consume_batched_bh(struct ptr_ring * r,void ** array,int n)409 static inline int ptr_ring_consume_batched_bh(struct ptr_ring *r,
410 void **array, int n)
411 {
412 int ret;
413
414 spin_lock_bh(&r->consumer_lock);
415 ret = __ptr_ring_consume_batched(r, array, n);
416 spin_unlock_bh(&r->consumer_lock);
417
418 return ret;
419 }
420
421 /* Cast to structure type and call a function without discarding from FIFO.
422 * Function must return a value.
423 * Callers must take consumer_lock.
424 */
425 #define __PTR_RING_PEEK_CALL(r, f) ((f)(__ptr_ring_peek(r)))
426
427 #define PTR_RING_PEEK_CALL(r, f) ({ \
428 typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
429 \
430 spin_lock(&(r)->consumer_lock); \
431 __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
432 spin_unlock(&(r)->consumer_lock); \
433 __PTR_RING_PEEK_CALL_v; \
434 })
435
436 #define PTR_RING_PEEK_CALL_IRQ(r, f) ({ \
437 typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
438 \
439 spin_lock_irq(&(r)->consumer_lock); \
440 __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
441 spin_unlock_irq(&(r)->consumer_lock); \
442 __PTR_RING_PEEK_CALL_v; \
443 })
444
445 #define PTR_RING_PEEK_CALL_BH(r, f) ({ \
446 typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
447 \
448 spin_lock_bh(&(r)->consumer_lock); \
449 __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
450 spin_unlock_bh(&(r)->consumer_lock); \
451 __PTR_RING_PEEK_CALL_v; \
452 })
453
454 #define PTR_RING_PEEK_CALL_ANY(r, f) ({ \
455 typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
456 unsigned long __PTR_RING_PEEK_CALL_f;\
457 \
458 spin_lock_irqsave(&(r)->consumer_lock, __PTR_RING_PEEK_CALL_f); \
459 __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
460 spin_unlock_irqrestore(&(r)->consumer_lock, __PTR_RING_PEEK_CALL_f); \
461 __PTR_RING_PEEK_CALL_v; \
462 })
463
464 /* Not all gfp_t flags (besides GFP_KERNEL) are allowed. See
465 * documentation for vmalloc for which of them are legal.
466 */
__ptr_ring_init_queue_alloc(unsigned int size,gfp_t gfp)467 static inline void **__ptr_ring_init_queue_alloc(unsigned int size, gfp_t gfp)
468 {
469 if (size > KMALLOC_MAX_SIZE / sizeof(void *))
470 return NULL;
471 return kvmalloc_array(size, sizeof(void *), gfp | __GFP_ZERO);
472 }
473
__ptr_ring_set_size(struct ptr_ring * r,int size)474 static inline void __ptr_ring_set_size(struct ptr_ring *r, int size)
475 {
476 r->size = size;
477 r->batch = SMP_CACHE_BYTES * 2 / sizeof(*(r->queue));
478 /* We need to set batch at least to 1 to make logic
479 * in __ptr_ring_discard_one work correctly.
480 * Batching too much (because ring is small) would cause a lot of
481 * burstiness. Needs tuning, for now disable batching.
482 */
483 if (r->batch > r->size / 2 || !r->batch)
484 r->batch = 1;
485 }
486
ptr_ring_init(struct ptr_ring * r,int size,gfp_t gfp)487 static inline int ptr_ring_init(struct ptr_ring *r, int size, gfp_t gfp)
488 {
489 r->queue = __ptr_ring_init_queue_alloc(size, gfp);
490 if (!r->queue)
491 return -ENOMEM;
492
493 __ptr_ring_set_size(r, size);
494 r->producer = r->consumer_head = r->consumer_tail = 0;
495 spin_lock_init(&r->producer_lock);
496 spin_lock_init(&r->consumer_lock);
497
498 return 0;
499 }
500
501 /*
502 * Return entries into ring. Destroy entries that don't fit.
503 *
504 * Note: this is expected to be a rare slow path operation.
505 *
506 * Note: producer lock is nested within consumer lock, so if you
507 * resize you must make sure all uses nest correctly.
508 * In particular if you consume ring in interrupt or BH context, you must
509 * disable interrupts/BH when doing so.
510 */
ptr_ring_unconsume(struct ptr_ring * r,void ** batch,int n,void (* destroy)(void *))511 static inline void ptr_ring_unconsume(struct ptr_ring *r, void **batch, int n,
512 void (*destroy)(void *))
513 {
514 unsigned long flags;
515 int head;
516
517 spin_lock_irqsave(&r->consumer_lock, flags);
518 spin_lock(&r->producer_lock);
519
520 if (!r->size)
521 goto done;
522
523 /*
524 * Clean out buffered entries (for simplicity). This way following code
525 * can test entries for NULL and if not assume they are valid.
526 */
527 head = r->consumer_head - 1;
528 while (likely(head >= r->consumer_tail))
529 r->queue[head--] = NULL;
530 r->consumer_tail = r->consumer_head;
531
532 /*
533 * Go over entries in batch, start moving head back and copy entries.
534 * Stop when we run into previously unconsumed entries.
535 */
536 while (n) {
537 head = r->consumer_head - 1;
538 if (head < 0)
539 head = r->size - 1;
540 if (r->queue[head]) {
541 /* This batch entry will have to be destroyed. */
542 goto done;
543 }
544 r->queue[head] = batch[--n];
545 r->consumer_tail = head;
546 /* matching READ_ONCE in __ptr_ring_empty for lockless tests */
547 WRITE_ONCE(r->consumer_head, head);
548 }
549
550 done:
551 /* Destroy all entries left in the batch. */
552 while (n)
553 destroy(batch[--n]);
554 spin_unlock(&r->producer_lock);
555 spin_unlock_irqrestore(&r->consumer_lock, flags);
556 }
557
__ptr_ring_swap_queue(struct ptr_ring * r,void ** queue,int size,gfp_t gfp,void (* destroy)(void *))558 static inline void **__ptr_ring_swap_queue(struct ptr_ring *r, void **queue,
559 int size, gfp_t gfp,
560 void (*destroy)(void *))
561 {
562 int producer = 0;
563 void **old;
564 void *ptr;
565
566 while ((ptr = __ptr_ring_consume(r)))
567 if (producer < size)
568 queue[producer++] = ptr;
569 else if (destroy)
570 destroy(ptr);
571
572 if (producer >= size)
573 producer = 0;
574 __ptr_ring_set_size(r, size);
575 r->producer = producer;
576 r->consumer_head = 0;
577 r->consumer_tail = 0;
578 old = r->queue;
579 r->queue = queue;
580
581 return old;
582 }
583
584 /*
585 * Note: producer lock is nested within consumer lock, so if you
586 * resize you must make sure all uses nest correctly.
587 * In particular if you consume ring in interrupt or BH context, you must
588 * disable interrupts/BH when doing so.
589 */
ptr_ring_resize(struct ptr_ring * r,int size,gfp_t gfp,void (* destroy)(void *))590 static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp,
591 void (*destroy)(void *))
592 {
593 unsigned long flags;
594 void **queue = __ptr_ring_init_queue_alloc(size, gfp);
595 void **old;
596
597 if (!queue)
598 return -ENOMEM;
599
600 spin_lock_irqsave(&(r)->consumer_lock, flags);
601 spin_lock(&(r)->producer_lock);
602
603 old = __ptr_ring_swap_queue(r, queue, size, gfp, destroy);
604
605 spin_unlock(&(r)->producer_lock);
606 spin_unlock_irqrestore(&(r)->consumer_lock, flags);
607
608 kvfree(old);
609
610 return 0;
611 }
612
613 /*
614 * Note: producer lock is nested within consumer lock, so if you
615 * resize you must make sure all uses nest correctly.
616 * In particular if you consume ring in interrupt or BH context, you must
617 * disable interrupts/BH when doing so.
618 */
ptr_ring_resize_multiple(struct ptr_ring ** rings,unsigned int nrings,int size,gfp_t gfp,void (* destroy)(void *))619 static inline int ptr_ring_resize_multiple(struct ptr_ring **rings,
620 unsigned int nrings,
621 int size,
622 gfp_t gfp, void (*destroy)(void *))
623 {
624 unsigned long flags;
625 void ***queues;
626 int i;
627
628 queues = kmalloc_array(nrings, sizeof(*queues), gfp);
629 if (!queues)
630 goto noqueues;
631
632 for (i = 0; i < nrings; ++i) {
633 queues[i] = __ptr_ring_init_queue_alloc(size, gfp);
634 if (!queues[i])
635 goto nomem;
636 }
637
638 for (i = 0; i < nrings; ++i) {
639 spin_lock_irqsave(&(rings[i])->consumer_lock, flags);
640 spin_lock(&(rings[i])->producer_lock);
641 queues[i] = __ptr_ring_swap_queue(rings[i], queues[i],
642 size, gfp, destroy);
643 spin_unlock(&(rings[i])->producer_lock);
644 spin_unlock_irqrestore(&(rings[i])->consumer_lock, flags);
645 }
646
647 for (i = 0; i < nrings; ++i)
648 kvfree(queues[i]);
649
650 kfree(queues);
651
652 return 0;
653
654 nomem:
655 while (--i >= 0)
656 kvfree(queues[i]);
657
658 kfree(queues);
659
660 noqueues:
661 return -ENOMEM;
662 }
663
ptr_ring_cleanup(struct ptr_ring * r,void (* destroy)(void *))664 static inline void ptr_ring_cleanup(struct ptr_ring *r, void (*destroy)(void *))
665 {
666 void *ptr;
667
668 if (destroy)
669 while ((ptr = ptr_ring_consume(r)))
670 destroy(ptr);
671 kvfree(r->queue);
672 }
673
674 #endif /* _LINUX_PTR_RING_H */
675