1ff7d6b27SJesper Dangaard Brouer /* SPDX-License-Identifier: GPL-2.0
2ff7d6b27SJesper Dangaard Brouer *
3ff7d6b27SJesper Dangaard Brouer * page_pool.c
4ff7d6b27SJesper Dangaard Brouer * Author: Jesper Dangaard Brouer <netoptimizer@brouer.com>
5ff7d6b27SJesper Dangaard Brouer * Copyright (C) 2016 Red Hat, Inc.
6ff7d6b27SJesper Dangaard Brouer */
732c28f7eSJesper Dangaard Brouer
8ff7d6b27SJesper Dangaard Brouer #include <linux/types.h>
9ff7d6b27SJesper Dangaard Brouer #include <linux/kernel.h>
10ff7d6b27SJesper Dangaard Brouer #include <linux/slab.h>
11f71fec47SJesper Dangaard Brouer #include <linux/device.h>
12ff7d6b27SJesper Dangaard Brouer
13a9ca9f9cSYunsheng Lin #include <net/page_pool/helpers.h>
1478862447SLorenzo Bianconi #include <net/xdp.h>
1578862447SLorenzo Bianconi
16ff7d6b27SJesper Dangaard Brouer #include <linux/dma-direction.h>
17ff7d6b27SJesper Dangaard Brouer #include <linux/dma-mapping.h>
18ff7d6b27SJesper Dangaard Brouer #include <linux/page-flags.h>
198d29c703SMatthew Wilcox (Oracle) #include <linux/mm.h> /* for put_page() */
20c07aea3eSMatteo Croce #include <linux/poison.h>
21f3c5264fSLorenzo Bianconi #include <linux/ethtool.h>
228c48eea3SJakub Kicinski #include <linux/netdevice.h>
23ff7d6b27SJesper Dangaard Brouer
2432c28f7eSJesper Dangaard Brouer #include <trace/events/page_pool.h>
2532c28f7eSJesper Dangaard Brouer
26c3f812ceSJonathan Lemon #define DEFER_TIME (msecs_to_jiffies(1000))
27c3f812ceSJonathan Lemon #define DEFER_WARN_INTERVAL (60 * HZ)
28c3f812ceSJonathan Lemon
2953e0961dSYunsheng Lin #define BIAS_MAX LONG_MAX
3053e0961dSYunsheng Lin
318610037eSJoe Damato #ifdef CONFIG_PAGE_POOL_STATS
328610037eSJoe Damato /* alloc_stat_inc is intended to be used in softirq context */
338610037eSJoe Damato #define alloc_stat_inc(pool, __stat) (pool->alloc_stats.__stat++)
34ad6fa1e1SJoe Damato /* recycle_stat_inc is safe to use when preemption is possible. */
35ad6fa1e1SJoe Damato #define recycle_stat_inc(pool, __stat) \
36ad6fa1e1SJoe Damato do { \
37ad6fa1e1SJoe Damato struct page_pool_recycle_stats __percpu *s = pool->recycle_stats; \
38ad6fa1e1SJoe Damato this_cpu_inc(s->__stat); \
39ad6fa1e1SJoe Damato } while (0)
406b95e338SJoe Damato
41590032a4SLorenzo Bianconi #define recycle_stat_add(pool, __stat, val) \
42590032a4SLorenzo Bianconi do { \
43590032a4SLorenzo Bianconi struct page_pool_recycle_stats __percpu *s = pool->recycle_stats; \
44590032a4SLorenzo Bianconi this_cpu_add(s->__stat, val); \
45590032a4SLorenzo Bianconi } while (0)
46590032a4SLorenzo Bianconi
47f3c5264fSLorenzo Bianconi static const char pp_stats[][ETH_GSTRING_LEN] = {
48f3c5264fSLorenzo Bianconi "rx_pp_alloc_fast",
49f3c5264fSLorenzo Bianconi "rx_pp_alloc_slow",
50f3c5264fSLorenzo Bianconi "rx_pp_alloc_slow_ho",
51f3c5264fSLorenzo Bianconi "rx_pp_alloc_empty",
52f3c5264fSLorenzo Bianconi "rx_pp_alloc_refill",
53f3c5264fSLorenzo Bianconi "rx_pp_alloc_waive",
54f3c5264fSLorenzo Bianconi "rx_pp_recycle_cached",
55f3c5264fSLorenzo Bianconi "rx_pp_recycle_cache_full",
56f3c5264fSLorenzo Bianconi "rx_pp_recycle_ring",
57f3c5264fSLorenzo Bianconi "rx_pp_recycle_ring_full",
58f3c5264fSLorenzo Bianconi "rx_pp_recycle_released_ref",
59f3c5264fSLorenzo Bianconi };
60f3c5264fSLorenzo Bianconi
6182e896d9SJakub Kicinski /**
6282e896d9SJakub Kicinski * page_pool_get_stats() - fetch page pool stats
6382e896d9SJakub Kicinski * @pool: pool from which page was allocated
6482e896d9SJakub Kicinski * @stats: struct page_pool_stats to fill in
6582e896d9SJakub Kicinski *
6682e896d9SJakub Kicinski * Retrieve statistics about the page_pool. This API is only available
6782e896d9SJakub Kicinski * if the kernel has been configured with ``CONFIG_PAGE_POOL_STATS=y``.
6882e896d9SJakub Kicinski * A pointer to a caller allocated struct page_pool_stats structure
6982e896d9SJakub Kicinski * is passed to this API which is filled in. The caller can then report
7082e896d9SJakub Kicinski * those stats to the user (perhaps via ethtool, debugfs, etc.).
7182e896d9SJakub Kicinski */
page_pool_get_stats(struct page_pool * pool,struct page_pool_stats * stats)726b95e338SJoe Damato bool page_pool_get_stats(struct page_pool *pool,
736b95e338SJoe Damato struct page_pool_stats *stats)
746b95e338SJoe Damato {
756b95e338SJoe Damato int cpu = 0;
766b95e338SJoe Damato
776b95e338SJoe Damato if (!stats)
786b95e338SJoe Damato return false;
796b95e338SJoe Damato
80f3c5264fSLorenzo Bianconi /* The caller is responsible to initialize stats. */
81f3c5264fSLorenzo Bianconi stats->alloc_stats.fast += pool->alloc_stats.fast;
82f3c5264fSLorenzo Bianconi stats->alloc_stats.slow += pool->alloc_stats.slow;
83f3c5264fSLorenzo Bianconi stats->alloc_stats.slow_high_order += pool->alloc_stats.slow_high_order;
84f3c5264fSLorenzo Bianconi stats->alloc_stats.empty += pool->alloc_stats.empty;
85f3c5264fSLorenzo Bianconi stats->alloc_stats.refill += pool->alloc_stats.refill;
86f3c5264fSLorenzo Bianconi stats->alloc_stats.waive += pool->alloc_stats.waive;
876b95e338SJoe Damato
886b95e338SJoe Damato for_each_possible_cpu(cpu) {
896b95e338SJoe Damato const struct page_pool_recycle_stats *pcpu =
906b95e338SJoe Damato per_cpu_ptr(pool->recycle_stats, cpu);
916b95e338SJoe Damato
926b95e338SJoe Damato stats->recycle_stats.cached += pcpu->cached;
936b95e338SJoe Damato stats->recycle_stats.cache_full += pcpu->cache_full;
946b95e338SJoe Damato stats->recycle_stats.ring += pcpu->ring;
956b95e338SJoe Damato stats->recycle_stats.ring_full += pcpu->ring_full;
966b95e338SJoe Damato stats->recycle_stats.released_refcnt += pcpu->released_refcnt;
976b95e338SJoe Damato }
986b95e338SJoe Damato
996b95e338SJoe Damato return true;
1006b95e338SJoe Damato }
1016b95e338SJoe Damato EXPORT_SYMBOL(page_pool_get_stats);
102f3c5264fSLorenzo Bianconi
page_pool_ethtool_stats_get_strings(u8 * data)103f3c5264fSLorenzo Bianconi u8 *page_pool_ethtool_stats_get_strings(u8 *data)
104f3c5264fSLorenzo Bianconi {
105f3c5264fSLorenzo Bianconi int i;
106f3c5264fSLorenzo Bianconi
107f3c5264fSLorenzo Bianconi for (i = 0; i < ARRAY_SIZE(pp_stats); i++) {
108f3c5264fSLorenzo Bianconi memcpy(data, pp_stats[i], ETH_GSTRING_LEN);
109f3c5264fSLorenzo Bianconi data += ETH_GSTRING_LEN;
110f3c5264fSLorenzo Bianconi }
111f3c5264fSLorenzo Bianconi
112f3c5264fSLorenzo Bianconi return data;
113f3c5264fSLorenzo Bianconi }
114f3c5264fSLorenzo Bianconi EXPORT_SYMBOL(page_pool_ethtool_stats_get_strings);
115f3c5264fSLorenzo Bianconi
page_pool_ethtool_stats_get_count(void)116f3c5264fSLorenzo Bianconi int page_pool_ethtool_stats_get_count(void)
117f3c5264fSLorenzo Bianconi {
118f3c5264fSLorenzo Bianconi return ARRAY_SIZE(pp_stats);
119f3c5264fSLorenzo Bianconi }
120f3c5264fSLorenzo Bianconi EXPORT_SYMBOL(page_pool_ethtool_stats_get_count);
121f3c5264fSLorenzo Bianconi
page_pool_ethtool_stats_get(u64 * data,void * stats)122f3c5264fSLorenzo Bianconi u64 *page_pool_ethtool_stats_get(u64 *data, void *stats)
123f3c5264fSLorenzo Bianconi {
124f3c5264fSLorenzo Bianconi struct page_pool_stats *pool_stats = stats;
125f3c5264fSLorenzo Bianconi
126f3c5264fSLorenzo Bianconi *data++ = pool_stats->alloc_stats.fast;
127f3c5264fSLorenzo Bianconi *data++ = pool_stats->alloc_stats.slow;
128f3c5264fSLorenzo Bianconi *data++ = pool_stats->alloc_stats.slow_high_order;
129f3c5264fSLorenzo Bianconi *data++ = pool_stats->alloc_stats.empty;
130f3c5264fSLorenzo Bianconi *data++ = pool_stats->alloc_stats.refill;
131f3c5264fSLorenzo Bianconi *data++ = pool_stats->alloc_stats.waive;
132f3c5264fSLorenzo Bianconi *data++ = pool_stats->recycle_stats.cached;
133f3c5264fSLorenzo Bianconi *data++ = pool_stats->recycle_stats.cache_full;
134f3c5264fSLorenzo Bianconi *data++ = pool_stats->recycle_stats.ring;
135f3c5264fSLorenzo Bianconi *data++ = pool_stats->recycle_stats.ring_full;
136f3c5264fSLorenzo Bianconi *data++ = pool_stats->recycle_stats.released_refcnt;
137f3c5264fSLorenzo Bianconi
138f3c5264fSLorenzo Bianconi return data;
139f3c5264fSLorenzo Bianconi }
140f3c5264fSLorenzo Bianconi EXPORT_SYMBOL(page_pool_ethtool_stats_get);
141f3c5264fSLorenzo Bianconi
1428610037eSJoe Damato #else
1438610037eSJoe Damato #define alloc_stat_inc(pool, __stat)
144ad6fa1e1SJoe Damato #define recycle_stat_inc(pool, __stat)
145590032a4SLorenzo Bianconi #define recycle_stat_add(pool, __stat, val)
1468610037eSJoe Damato #endif
1478610037eSJoe Damato
page_pool_producer_lock(struct page_pool * pool)148368d3cb4SYunsheng Lin static bool page_pool_producer_lock(struct page_pool *pool)
149368d3cb4SYunsheng Lin __acquires(&pool->ring.producer_lock)
150368d3cb4SYunsheng Lin {
151368d3cb4SYunsheng Lin bool in_softirq = in_softirq();
152368d3cb4SYunsheng Lin
153368d3cb4SYunsheng Lin if (in_softirq)
154368d3cb4SYunsheng Lin spin_lock(&pool->ring.producer_lock);
155368d3cb4SYunsheng Lin else
156368d3cb4SYunsheng Lin spin_lock_bh(&pool->ring.producer_lock);
157368d3cb4SYunsheng Lin
158368d3cb4SYunsheng Lin return in_softirq;
159368d3cb4SYunsheng Lin }
160368d3cb4SYunsheng Lin
page_pool_producer_unlock(struct page_pool * pool,bool in_softirq)161368d3cb4SYunsheng Lin static void page_pool_producer_unlock(struct page_pool *pool,
162368d3cb4SYunsheng Lin bool in_softirq)
163368d3cb4SYunsheng Lin __releases(&pool->ring.producer_lock)
164368d3cb4SYunsheng Lin {
165368d3cb4SYunsheng Lin if (in_softirq)
166368d3cb4SYunsheng Lin spin_unlock(&pool->ring.producer_lock);
167368d3cb4SYunsheng Lin else
168368d3cb4SYunsheng Lin spin_unlock_bh(&pool->ring.producer_lock);
169368d3cb4SYunsheng Lin }
170368d3cb4SYunsheng Lin
page_pool_init(struct page_pool * pool,const struct page_pool_params * params)171ff7d6b27SJesper Dangaard Brouer static int page_pool_init(struct page_pool *pool,
172ff7d6b27SJesper Dangaard Brouer const struct page_pool_params *params)
173ff7d6b27SJesper Dangaard Brouer {
174ff7d6b27SJesper Dangaard Brouer unsigned int ring_qsize = 1024; /* Default */
175ff7d6b27SJesper Dangaard Brouer
176ff7d6b27SJesper Dangaard Brouer memcpy(&pool->p, params, sizeof(pool->p));
177ff7d6b27SJesper Dangaard Brouer
178ff7d6b27SJesper Dangaard Brouer /* Validate only known flags were used */
179ff7d6b27SJesper Dangaard Brouer if (pool->p.flags & ~(PP_FLAG_ALL))
180ff7d6b27SJesper Dangaard Brouer return -EINVAL;
181ff7d6b27SJesper Dangaard Brouer
182ff7d6b27SJesper Dangaard Brouer if (pool->p.pool_size)
183ff7d6b27SJesper Dangaard Brouer ring_qsize = pool->p.pool_size;
184ff7d6b27SJesper Dangaard Brouer
185ff7d6b27SJesper Dangaard Brouer /* Sanity limit mem that can be pinned down */
186ff7d6b27SJesper Dangaard Brouer if (ring_qsize > 32768)
187ff7d6b27SJesper Dangaard Brouer return -E2BIG;
188ff7d6b27SJesper Dangaard Brouer
189ff7d6b27SJesper Dangaard Brouer /* DMA direction is either DMA_FROM_DEVICE or DMA_BIDIRECTIONAL.
190ff7d6b27SJesper Dangaard Brouer * DMA_BIDIRECTIONAL is for allowing page used for DMA sending,
191ff7d6b27SJesper Dangaard Brouer * which is the XDP_TX use-case.
192ff7d6b27SJesper Dangaard Brouer */
193798dda81SDenis Kirjanov if (pool->p.flags & PP_FLAG_DMA_MAP) {
194ff7d6b27SJesper Dangaard Brouer if ((pool->p.dma_dir != DMA_FROM_DEVICE) &&
195ff7d6b27SJesper Dangaard Brouer (pool->p.dma_dir != DMA_BIDIRECTIONAL))
196ff7d6b27SJesper Dangaard Brouer return -EINVAL;
197798dda81SDenis Kirjanov }
198ff7d6b27SJesper Dangaard Brouer
199e68bc756SLorenzo Bianconi if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) {
200e68bc756SLorenzo Bianconi /* In order to request DMA-sync-for-device the page
201e68bc756SLorenzo Bianconi * needs to be mapped
202e68bc756SLorenzo Bianconi */
203e68bc756SLorenzo Bianconi if (!(pool->p.flags & PP_FLAG_DMA_MAP))
204e68bc756SLorenzo Bianconi return -EINVAL;
205e68bc756SLorenzo Bianconi
206e68bc756SLorenzo Bianconi if (!pool->p.max_len)
207e68bc756SLorenzo Bianconi return -EINVAL;
208e68bc756SLorenzo Bianconi
209e68bc756SLorenzo Bianconi /* pool->p.offset has to be set according to the address
210e68bc756SLorenzo Bianconi * offset used by the DMA engine to start copying rx data
211e68bc756SLorenzo Bianconi */
212e68bc756SLorenzo Bianconi }
213e68bc756SLorenzo Bianconi
214f915b75bSYunsheng Lin if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT &&
215f915b75bSYunsheng Lin pool->p.flags & PP_FLAG_PAGE_FRAG)
216f915b75bSYunsheng Lin return -EINVAL;
217f915b75bSYunsheng Lin
218ad6fa1e1SJoe Damato #ifdef CONFIG_PAGE_POOL_STATS
219ad6fa1e1SJoe Damato pool->recycle_stats = alloc_percpu(struct page_pool_recycle_stats);
220ad6fa1e1SJoe Damato if (!pool->recycle_stats)
221ad6fa1e1SJoe Damato return -ENOMEM;
222ad6fa1e1SJoe Damato #endif
223ad6fa1e1SJoe Damato
224*8c137b1cSJian Shen if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0) {
225*8c137b1cSJian Shen #ifdef CONFIG_PAGE_POOL_STATS
226*8c137b1cSJian Shen free_percpu(pool->recycle_stats);
227*8c137b1cSJian Shen #endif
228ff7d6b27SJesper Dangaard Brouer return -ENOMEM;
229*8c137b1cSJian Shen }
230ff7d6b27SJesper Dangaard Brouer
23199c07c43SJesper Dangaard Brouer atomic_set(&pool->pages_state_release_cnt, 0);
23299c07c43SJesper Dangaard Brouer
2331da4bbefSIvan Khoronzhuk /* Driver calling page_pool_create() also call page_pool_destroy() */
2341da4bbefSIvan Khoronzhuk refcount_set(&pool->user_cnt, 1);
2351da4bbefSIvan Khoronzhuk
236f71fec47SJesper Dangaard Brouer if (pool->p.flags & PP_FLAG_DMA_MAP)
237f71fec47SJesper Dangaard Brouer get_device(pool->p.dev);
238f71fec47SJesper Dangaard Brouer
239ff7d6b27SJesper Dangaard Brouer return 0;
240ff7d6b27SJesper Dangaard Brouer }
241ff7d6b27SJesper Dangaard Brouer
24282e896d9SJakub Kicinski /**
24382e896d9SJakub Kicinski * page_pool_create() - create a page pool.
24482e896d9SJakub Kicinski * @params: parameters, see struct page_pool_params
24582e896d9SJakub Kicinski */
page_pool_create(const struct page_pool_params * params)246ff7d6b27SJesper Dangaard Brouer struct page_pool *page_pool_create(const struct page_pool_params *params)
247ff7d6b27SJesper Dangaard Brouer {
248ff7d6b27SJesper Dangaard Brouer struct page_pool *pool;
249873343e7SYunsheng Lin int err;
250ff7d6b27SJesper Dangaard Brouer
251ff7d6b27SJesper Dangaard Brouer pool = kzalloc_node(sizeof(*pool), GFP_KERNEL, params->nid);
252ff7d6b27SJesper Dangaard Brouer if (!pool)
253ff7d6b27SJesper Dangaard Brouer return ERR_PTR(-ENOMEM);
254ff7d6b27SJesper Dangaard Brouer
255ff7d6b27SJesper Dangaard Brouer err = page_pool_init(pool, params);
256ff7d6b27SJesper Dangaard Brouer if (err < 0) {
257ff7d6b27SJesper Dangaard Brouer pr_warn("%s() gave up with errno %d\n", __func__, err);
258ff7d6b27SJesper Dangaard Brouer kfree(pool);
259ff7d6b27SJesper Dangaard Brouer return ERR_PTR(err);
260ff7d6b27SJesper Dangaard Brouer }
2611da4bbefSIvan Khoronzhuk
262ff7d6b27SJesper Dangaard Brouer return pool;
263ff7d6b27SJesper Dangaard Brouer }
264ff7d6b27SJesper Dangaard Brouer EXPORT_SYMBOL(page_pool_create);
265ff7d6b27SJesper Dangaard Brouer
266458de8a9SIlias Apalodimas static void page_pool_return_page(struct page_pool *pool, struct page *page);
26744768decSJesper Dangaard Brouer
26844768decSJesper Dangaard Brouer noinline
page_pool_refill_alloc_cache(struct page_pool * pool)269304db6cbSLi RongQing static struct page *page_pool_refill_alloc_cache(struct page_pool *pool)
27044768decSJesper Dangaard Brouer {
27144768decSJesper Dangaard Brouer struct ptr_ring *r = &pool->ring;
27244768decSJesper Dangaard Brouer struct page *page;
27344768decSJesper Dangaard Brouer int pref_nid; /* preferred NUMA node */
27444768decSJesper Dangaard Brouer
27544768decSJesper Dangaard Brouer /* Quicker fallback, avoid locks when ring is empty */
2768610037eSJoe Damato if (__ptr_ring_empty(r)) {
2778610037eSJoe Damato alloc_stat_inc(pool, empty);
27844768decSJesper Dangaard Brouer return NULL;
2798610037eSJoe Damato }
28044768decSJesper Dangaard Brouer
28144768decSJesper Dangaard Brouer /* Softirq guarantee CPU and thus NUMA node is stable. This,
28244768decSJesper Dangaard Brouer * assumes CPU refilling driver RX-ring will also run RX-NAPI.
28344768decSJesper Dangaard Brouer */
284f13fc107SJesper Dangaard Brouer #ifdef CONFIG_NUMA
28544768decSJesper Dangaard Brouer pref_nid = (pool->p.nid == NUMA_NO_NODE) ? numa_mem_id() : pool->p.nid;
286f13fc107SJesper Dangaard Brouer #else
287f13fc107SJesper Dangaard Brouer /* Ignore pool->p.nid setting if !CONFIG_NUMA, helps compiler */
288f13fc107SJesper Dangaard Brouer pref_nid = numa_mem_id(); /* will be zero like page_to_nid() */
289f13fc107SJesper Dangaard Brouer #endif
29044768decSJesper Dangaard Brouer
29144768decSJesper Dangaard Brouer /* Refill alloc array, but only if NUMA match */
29244768decSJesper Dangaard Brouer do {
29344768decSJesper Dangaard Brouer page = __ptr_ring_consume(r);
29444768decSJesper Dangaard Brouer if (unlikely(!page))
29544768decSJesper Dangaard Brouer break;
29644768decSJesper Dangaard Brouer
29744768decSJesper Dangaard Brouer if (likely(page_to_nid(page) == pref_nid)) {
29844768decSJesper Dangaard Brouer pool->alloc.cache[pool->alloc.count++] = page;
29944768decSJesper Dangaard Brouer } else {
30044768decSJesper Dangaard Brouer /* NUMA mismatch;
30144768decSJesper Dangaard Brouer * (1) release 1 page to page-allocator and
30244768decSJesper Dangaard Brouer * (2) break out to fallthrough to alloc_pages_node.
30344768decSJesper Dangaard Brouer * This limit stress on page buddy alloactor.
30444768decSJesper Dangaard Brouer */
305458de8a9SIlias Apalodimas page_pool_return_page(pool, page);
3068610037eSJoe Damato alloc_stat_inc(pool, waive);
30744768decSJesper Dangaard Brouer page = NULL;
30844768decSJesper Dangaard Brouer break;
30944768decSJesper Dangaard Brouer }
310304db6cbSLi RongQing } while (pool->alloc.count < PP_ALLOC_CACHE_REFILL);
31144768decSJesper Dangaard Brouer
31244768decSJesper Dangaard Brouer /* Return last page */
3138610037eSJoe Damato if (likely(pool->alloc.count > 0)) {
31444768decSJesper Dangaard Brouer page = pool->alloc.cache[--pool->alloc.count];
3158610037eSJoe Damato alloc_stat_inc(pool, refill);
3168610037eSJoe Damato }
31744768decSJesper Dangaard Brouer
31844768decSJesper Dangaard Brouer return page;
31944768decSJesper Dangaard Brouer }
32044768decSJesper Dangaard Brouer
321ff7d6b27SJesper Dangaard Brouer /* fast path */
__page_pool_get_cached(struct page_pool * pool)322ff7d6b27SJesper Dangaard Brouer static struct page *__page_pool_get_cached(struct page_pool *pool)
323ff7d6b27SJesper Dangaard Brouer {
324ff7d6b27SJesper Dangaard Brouer struct page *page;
325ff7d6b27SJesper Dangaard Brouer
326304db6cbSLi RongQing /* Caller MUST guarantee safe non-concurrent access, e.g. softirq */
327ff7d6b27SJesper Dangaard Brouer if (likely(pool->alloc.count)) {
328ff7d6b27SJesper Dangaard Brouer /* Fast-path */
329ff7d6b27SJesper Dangaard Brouer page = pool->alloc.cache[--pool->alloc.count];
3308610037eSJoe Damato alloc_stat_inc(pool, fast);
331304db6cbSLi RongQing } else {
332304db6cbSLi RongQing page = page_pool_refill_alloc_cache(pool);
333ff7d6b27SJesper Dangaard Brouer }
334ff7d6b27SJesper Dangaard Brouer
335ff7d6b27SJesper Dangaard Brouer return page;
336ff7d6b27SJesper Dangaard Brouer }
337ff7d6b27SJesper Dangaard Brouer
page_pool_dma_sync_for_device(struct page_pool * pool,struct page * page,unsigned int dma_sync_size)338e68bc756SLorenzo Bianconi static void page_pool_dma_sync_for_device(struct page_pool *pool,
339e68bc756SLorenzo Bianconi struct page *page,
340e68bc756SLorenzo Bianconi unsigned int dma_sync_size)
341e68bc756SLorenzo Bianconi {
3429ddb3c14SMatthew Wilcox (Oracle) dma_addr_t dma_addr = page_pool_get_dma_addr(page);
3439ddb3c14SMatthew Wilcox (Oracle)
344e68bc756SLorenzo Bianconi dma_sync_size = min(dma_sync_size, pool->p.max_len);
3459ddb3c14SMatthew Wilcox (Oracle) dma_sync_single_range_for_device(pool->p.dev, dma_addr,
346e68bc756SLorenzo Bianconi pool->p.offset, dma_sync_size,
347e68bc756SLorenzo Bianconi pool->p.dma_dir);
348e68bc756SLorenzo Bianconi }
349e68bc756SLorenzo Bianconi
page_pool_dma_map(struct page_pool * pool,struct page * page)350dfa59717SJesper Dangaard Brouer static bool page_pool_dma_map(struct page_pool *pool, struct page *page)
351dfa59717SJesper Dangaard Brouer {
352dfa59717SJesper Dangaard Brouer dma_addr_t dma;
353dfa59717SJesper Dangaard Brouer
354dfa59717SJesper Dangaard Brouer /* Setup DMA mapping: use 'struct page' area for storing DMA-addr
355dfa59717SJesper Dangaard Brouer * since dma_addr_t can be either 32 or 64 bits and does not always fit
356dfa59717SJesper Dangaard Brouer * into page private data (i.e 32bit cpu with 64bit DMA caps)
357dfa59717SJesper Dangaard Brouer * This mapping is kept for lifetime of page, until leaving pool.
358dfa59717SJesper Dangaard Brouer */
359dfa59717SJesper Dangaard Brouer dma = dma_map_page_attrs(pool->p.dev, page, 0,
360dfa59717SJesper Dangaard Brouer (PAGE_SIZE << pool->p.order),
3618e4c62c7SJakub Kicinski pool->p.dma_dir, DMA_ATTR_SKIP_CPU_SYNC |
3628e4c62c7SJakub Kicinski DMA_ATTR_WEAK_ORDERING);
363dfa59717SJesper Dangaard Brouer if (dma_mapping_error(pool->p.dev, dma))
364dfa59717SJesper Dangaard Brouer return false;
365dfa59717SJesper Dangaard Brouer
3669ddb3c14SMatthew Wilcox (Oracle) page_pool_set_dma_addr(page, dma);
367dfa59717SJesper Dangaard Brouer
368dfa59717SJesper Dangaard Brouer if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
369dfa59717SJesper Dangaard Brouer page_pool_dma_sync_for_device(pool, page, pool->p.max_len);
370dfa59717SJesper Dangaard Brouer
371dfa59717SJesper Dangaard Brouer return true;
372dfa59717SJesper Dangaard Brouer }
373dfa59717SJesper Dangaard Brouer
page_pool_set_pp_info(struct page_pool * pool,struct page * page)37457f05bc2SYunsheng Lin static void page_pool_set_pp_info(struct page_pool *pool,
37557f05bc2SYunsheng Lin struct page *page)
37657f05bc2SYunsheng Lin {
37757f05bc2SYunsheng Lin page->pp = pool;
37857f05bc2SYunsheng Lin page->pp_magic |= PP_SIGNATURE;
37935b2e549SToke Høiland-Jørgensen if (pool->p.init_callback)
38035b2e549SToke Høiland-Jørgensen pool->p.init_callback(page, pool->p.init_arg);
38157f05bc2SYunsheng Lin }
38257f05bc2SYunsheng Lin
page_pool_clear_pp_info(struct page * page)38357f05bc2SYunsheng Lin static void page_pool_clear_pp_info(struct page *page)
38457f05bc2SYunsheng Lin {
38557f05bc2SYunsheng Lin page->pp_magic = 0;
38657f05bc2SYunsheng Lin page->pp = NULL;
38757f05bc2SYunsheng Lin }
38857f05bc2SYunsheng Lin
__page_pool_alloc_page_order(struct page_pool * pool,gfp_t gfp)389be5dba25SJesper Dangaard Brouer static struct page *__page_pool_alloc_page_order(struct page_pool *pool,
390be5dba25SJesper Dangaard Brouer gfp_t gfp)
391ff7d6b27SJesper Dangaard Brouer {
392ff7d6b27SJesper Dangaard Brouer struct page *page;
393ff7d6b27SJesper Dangaard Brouer
394ff7d6b27SJesper Dangaard Brouer gfp |= __GFP_COMP;
395ff7d6b27SJesper Dangaard Brouer page = alloc_pages_node(pool->p.nid, gfp, pool->p.order);
396be5dba25SJesper Dangaard Brouer if (unlikely(!page))
397ff7d6b27SJesper Dangaard Brouer return NULL;
398ff7d6b27SJesper Dangaard Brouer
399be5dba25SJesper Dangaard Brouer if ((pool->p.flags & PP_FLAG_DMA_MAP) &&
400dfa59717SJesper Dangaard Brouer unlikely(!page_pool_dma_map(pool, page))) {
401ff7d6b27SJesper Dangaard Brouer put_page(page);
402ff7d6b27SJesper Dangaard Brouer return NULL;
403ff7d6b27SJesper Dangaard Brouer }
404ff7d6b27SJesper Dangaard Brouer
4058610037eSJoe Damato alloc_stat_inc(pool, slow_high_order);
40657f05bc2SYunsheng Lin page_pool_set_pp_info(pool, page);
407c07aea3eSMatteo Croce
40899c07c43SJesper Dangaard Brouer /* Track how many pages are held 'in-flight' */
40999c07c43SJesper Dangaard Brouer pool->pages_state_hold_cnt++;
41032c28f7eSJesper Dangaard Brouer trace_page_pool_state_hold(pool, page, pool->pages_state_hold_cnt);
411be5dba25SJesper Dangaard Brouer return page;
412be5dba25SJesper Dangaard Brouer }
413be5dba25SJesper Dangaard Brouer
414be5dba25SJesper Dangaard Brouer /* slow path */
415be5dba25SJesper Dangaard Brouer noinline
__page_pool_alloc_pages_slow(struct page_pool * pool,gfp_t gfp)416be5dba25SJesper Dangaard Brouer static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
417be5dba25SJesper Dangaard Brouer gfp_t gfp)
418be5dba25SJesper Dangaard Brouer {
419be5dba25SJesper Dangaard Brouer const int bulk = PP_ALLOC_CACHE_REFILL;
420be5dba25SJesper Dangaard Brouer unsigned int pp_flags = pool->p.flags;
421be5dba25SJesper Dangaard Brouer unsigned int pp_order = pool->p.order;
422be5dba25SJesper Dangaard Brouer struct page *page;
423be5dba25SJesper Dangaard Brouer int i, nr_pages;
424be5dba25SJesper Dangaard Brouer
425be5dba25SJesper Dangaard Brouer /* Don't support bulk alloc for high-order pages */
426be5dba25SJesper Dangaard Brouer if (unlikely(pp_order))
427be5dba25SJesper Dangaard Brouer return __page_pool_alloc_page_order(pool, gfp);
428be5dba25SJesper Dangaard Brouer
429be5dba25SJesper Dangaard Brouer /* Unnecessary as alloc cache is empty, but guarantees zero count */
430be5dba25SJesper Dangaard Brouer if (unlikely(pool->alloc.count > 0))
431be5dba25SJesper Dangaard Brouer return pool->alloc.cache[--pool->alloc.count];
432be5dba25SJesper Dangaard Brouer
433be5dba25SJesper Dangaard Brouer /* Mark empty alloc.cache slots "empty" for alloc_pages_bulk_array */
434be5dba25SJesper Dangaard Brouer memset(&pool->alloc.cache, 0, sizeof(void *) * bulk);
435be5dba25SJesper Dangaard Brouer
436d810d367SJie Wang nr_pages = alloc_pages_bulk_array_node(gfp, pool->p.nid, bulk,
437d810d367SJie Wang pool->alloc.cache);
438be5dba25SJesper Dangaard Brouer if (unlikely(!nr_pages))
439be5dba25SJesper Dangaard Brouer return NULL;
440be5dba25SJesper Dangaard Brouer
441be5dba25SJesper Dangaard Brouer /* Pages have been filled into alloc.cache array, but count is zero and
442be5dba25SJesper Dangaard Brouer * page element have not been (possibly) DMA mapped.
443be5dba25SJesper Dangaard Brouer */
444be5dba25SJesper Dangaard Brouer for (i = 0; i < nr_pages; i++) {
445be5dba25SJesper Dangaard Brouer page = pool->alloc.cache[i];
446be5dba25SJesper Dangaard Brouer if ((pp_flags & PP_FLAG_DMA_MAP) &&
447be5dba25SJesper Dangaard Brouer unlikely(!page_pool_dma_map(pool, page))) {
448be5dba25SJesper Dangaard Brouer put_page(page);
449be5dba25SJesper Dangaard Brouer continue;
450be5dba25SJesper Dangaard Brouer }
45157f05bc2SYunsheng Lin
45257f05bc2SYunsheng Lin page_pool_set_pp_info(pool, page);
453be5dba25SJesper Dangaard Brouer pool->alloc.cache[pool->alloc.count++] = page;
454be5dba25SJesper Dangaard Brouer /* Track how many pages are held 'in-flight' */
455be5dba25SJesper Dangaard Brouer pool->pages_state_hold_cnt++;
456be5dba25SJesper Dangaard Brouer trace_page_pool_state_hold(pool, page,
457be5dba25SJesper Dangaard Brouer pool->pages_state_hold_cnt);
458be5dba25SJesper Dangaard Brouer }
459be5dba25SJesper Dangaard Brouer
460be5dba25SJesper Dangaard Brouer /* Return last page */
4618610037eSJoe Damato if (likely(pool->alloc.count > 0)) {
462be5dba25SJesper Dangaard Brouer page = pool->alloc.cache[--pool->alloc.count];
4638610037eSJoe Damato alloc_stat_inc(pool, slow);
4648610037eSJoe Damato } else {
465be5dba25SJesper Dangaard Brouer page = NULL;
4668610037eSJoe Damato }
46732c28f7eSJesper Dangaard Brouer
468ff7d6b27SJesper Dangaard Brouer /* When page just alloc'ed is should/must have refcnt 1. */
469ff7d6b27SJesper Dangaard Brouer return page;
470ff7d6b27SJesper Dangaard Brouer }
471ff7d6b27SJesper Dangaard Brouer
472ff7d6b27SJesper Dangaard Brouer /* For using page_pool replace: alloc_pages() API calls, but provide
473ff7d6b27SJesper Dangaard Brouer * synchronization guarantee for allocation side.
474ff7d6b27SJesper Dangaard Brouer */
page_pool_alloc_pages(struct page_pool * pool,gfp_t gfp)475ff7d6b27SJesper Dangaard Brouer struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp)
476ff7d6b27SJesper Dangaard Brouer {
477ff7d6b27SJesper Dangaard Brouer struct page *page;
478ff7d6b27SJesper Dangaard Brouer
479ff7d6b27SJesper Dangaard Brouer /* Fast-path: Get a page from cache */
480ff7d6b27SJesper Dangaard Brouer page = __page_pool_get_cached(pool);
481ff7d6b27SJesper Dangaard Brouer if (page)
482ff7d6b27SJesper Dangaard Brouer return page;
483ff7d6b27SJesper Dangaard Brouer
484ff7d6b27SJesper Dangaard Brouer /* Slow-path: cache empty, do real allocation */
485ff7d6b27SJesper Dangaard Brouer page = __page_pool_alloc_pages_slow(pool, gfp);
486ff7d6b27SJesper Dangaard Brouer return page;
487ff7d6b27SJesper Dangaard Brouer }
488ff7d6b27SJesper Dangaard Brouer EXPORT_SYMBOL(page_pool_alloc_pages);
489ff7d6b27SJesper Dangaard Brouer
49099c07c43SJesper Dangaard Brouer /* Calculate distance between two u32 values, valid if distance is below 2^(31)
49199c07c43SJesper Dangaard Brouer * https://en.wikipedia.org/wiki/Serial_number_arithmetic#General_Solution
49299c07c43SJesper Dangaard Brouer */
49399c07c43SJesper Dangaard Brouer #define _distance(a, b) (s32)((a) - (b))
49499c07c43SJesper Dangaard Brouer
page_pool_inflight(struct page_pool * pool)49599c07c43SJesper Dangaard Brouer static s32 page_pool_inflight(struct page_pool *pool)
49699c07c43SJesper Dangaard Brouer {
49799c07c43SJesper Dangaard Brouer u32 release_cnt = atomic_read(&pool->pages_state_release_cnt);
49899c07c43SJesper Dangaard Brouer u32 hold_cnt = READ_ONCE(pool->pages_state_hold_cnt);
499c3f812ceSJonathan Lemon s32 inflight;
50099c07c43SJesper Dangaard Brouer
501c3f812ceSJonathan Lemon inflight = _distance(hold_cnt, release_cnt);
50299c07c43SJesper Dangaard Brouer
5037c9e6942SJesper Dangaard Brouer trace_page_pool_release(pool, inflight, hold_cnt, release_cnt);
50499c07c43SJesper Dangaard Brouer WARN(inflight < 0, "Negative(%d) inflight packet-pages", inflight);
50599c07c43SJesper Dangaard Brouer
506c3f812ceSJonathan Lemon return inflight;
50799c07c43SJesper Dangaard Brouer }
50899c07c43SJesper Dangaard Brouer
509458de8a9SIlias Apalodimas /* Disconnects a page (from a page_pool). API users can have a need
510458de8a9SIlias Apalodimas * to disconnect a page (from a page_pool), to allow it to be used as
511458de8a9SIlias Apalodimas * a regular page (that will eventually be returned to the normal
512458de8a9SIlias Apalodimas * page-allocator via put_page).
513458de8a9SIlias Apalodimas */
page_pool_return_page(struct page_pool * pool,struct page * page)51407e0c7d3SJakub Kicinski static void page_pool_return_page(struct page_pool *pool, struct page *page)
515ff7d6b27SJesper Dangaard Brouer {
5161567b85eSIlias Apalodimas dma_addr_t dma;
517c3f812ceSJonathan Lemon int count;
5181567b85eSIlias Apalodimas
519ff7d6b27SJesper Dangaard Brouer if (!(pool->p.flags & PP_FLAG_DMA_MAP))
520458de8a9SIlias Apalodimas /* Always account for inflight pages, even if we didn't
521458de8a9SIlias Apalodimas * map them
522458de8a9SIlias Apalodimas */
52399c07c43SJesper Dangaard Brouer goto skip_dma_unmap;
524ff7d6b27SJesper Dangaard Brouer
5259ddb3c14SMatthew Wilcox (Oracle) dma = page_pool_get_dma_addr(page);
526458de8a9SIlias Apalodimas
5279ddb3c14SMatthew Wilcox (Oracle) /* When page is unmapped, it cannot be returned to our pool */
52813f16d9dSJesper Dangaard Brouer dma_unmap_page_attrs(pool->p.dev, dma,
52913f16d9dSJesper Dangaard Brouer PAGE_SIZE << pool->p.order, pool->p.dma_dir,
5308e4c62c7SJakub Kicinski DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
5319ddb3c14SMatthew Wilcox (Oracle) page_pool_set_dma_addr(page, 0);
53299c07c43SJesper Dangaard Brouer skip_dma_unmap:
53357f05bc2SYunsheng Lin page_pool_clear_pp_info(page);
534c07aea3eSMatteo Croce
535c3f812ceSJonathan Lemon /* This may be the last page returned, releasing the pool, so
536c3f812ceSJonathan Lemon * it is not safe to reference pool afterwards.
537c3f812ceSJonathan Lemon */
5387fb9b66dSYunsheng Lin count = atomic_inc_return_relaxed(&pool->pages_state_release_cnt);
539c3f812ceSJonathan Lemon trace_page_pool_state_release(pool, page, count);
54099c07c43SJesper Dangaard Brouer
541ff7d6b27SJesper Dangaard Brouer put_page(page);
542ff7d6b27SJesper Dangaard Brouer /* An optimization would be to call __free_pages(page, pool->p.order)
543ff7d6b27SJesper Dangaard Brouer * knowing page is not part of page-cache (thus avoiding a
544ff7d6b27SJesper Dangaard Brouer * __page_cache_release() call).
545ff7d6b27SJesper Dangaard Brouer */
546ff7d6b27SJesper Dangaard Brouer }
547ff7d6b27SJesper Dangaard Brouer
page_pool_recycle_in_ring(struct page_pool * pool,struct page * page)548458de8a9SIlias Apalodimas static bool page_pool_recycle_in_ring(struct page_pool *pool, struct page *page)
549ff7d6b27SJesper Dangaard Brouer {
550ff7d6b27SJesper Dangaard Brouer int ret;
551542bcea4SQingfang DENG /* BH protection not needed if current is softirq */
552542bcea4SQingfang DENG if (in_softirq())
553ff7d6b27SJesper Dangaard Brouer ret = ptr_ring_produce(&pool->ring, page);
554ff7d6b27SJesper Dangaard Brouer else
555ff7d6b27SJesper Dangaard Brouer ret = ptr_ring_produce_bh(&pool->ring, page);
556ff7d6b27SJesper Dangaard Brouer
557ad6fa1e1SJoe Damato if (!ret) {
558ad6fa1e1SJoe Damato recycle_stat_inc(pool, ring);
559ad6fa1e1SJoe Damato return true;
560ad6fa1e1SJoe Damato }
561ad6fa1e1SJoe Damato
562ad6fa1e1SJoe Damato return false;
563ff7d6b27SJesper Dangaard Brouer }
564ff7d6b27SJesper Dangaard Brouer
565ff7d6b27SJesper Dangaard Brouer /* Only allow direct recycling in special circumstances, into the
566ff7d6b27SJesper Dangaard Brouer * alloc side cache. E.g. during RX-NAPI processing for XDP_DROP use-case.
567ff7d6b27SJesper Dangaard Brouer *
568ff7d6b27SJesper Dangaard Brouer * Caller must provide appropriate safe context.
569ff7d6b27SJesper Dangaard Brouer */
page_pool_recycle_in_cache(struct page * page,struct page_pool * pool)570458de8a9SIlias Apalodimas static bool page_pool_recycle_in_cache(struct page *page,
571ff7d6b27SJesper Dangaard Brouer struct page_pool *pool)
572ff7d6b27SJesper Dangaard Brouer {
573ad6fa1e1SJoe Damato if (unlikely(pool->alloc.count == PP_ALLOC_CACHE_SIZE)) {
574ad6fa1e1SJoe Damato recycle_stat_inc(pool, cache_full);
575ff7d6b27SJesper Dangaard Brouer return false;
576ad6fa1e1SJoe Damato }
577ff7d6b27SJesper Dangaard Brouer
578ff7d6b27SJesper Dangaard Brouer /* Caller MUST have verified/know (page_ref_count(page) == 1) */
579ff7d6b27SJesper Dangaard Brouer pool->alloc.cache[pool->alloc.count++] = page;
580ad6fa1e1SJoe Damato recycle_stat_inc(pool, cached);
581ff7d6b27SJesper Dangaard Brouer return true;
582ff7d6b27SJesper Dangaard Brouer }
583ff7d6b27SJesper Dangaard Brouer
584458de8a9SIlias Apalodimas /* If the page refcnt == 1, this will try to recycle the page.
585458de8a9SIlias Apalodimas * if PP_FLAG_DMA_SYNC_DEV is set, we'll try to sync the DMA area for
586458de8a9SIlias Apalodimas * the configured size min(dma_sync_size, pool->max_len).
587458de8a9SIlias Apalodimas * If the page refcnt != 1, then the page will be returned to memory
588458de8a9SIlias Apalodimas * subsystem.
589458de8a9SIlias Apalodimas */
59078862447SLorenzo Bianconi static __always_inline struct page *
__page_pool_put_page(struct page_pool * pool,struct page * page,unsigned int dma_sync_size,bool allow_direct)59178862447SLorenzo Bianconi __page_pool_put_page(struct page_pool *pool, struct page *page,
592e68bc756SLorenzo Bianconi unsigned int dma_sync_size, bool allow_direct)
593ff7d6b27SJesper Dangaard Brouer {
594ff4e538cSJakub Kicinski lockdep_assert_no_hardirq();
595ff4e538cSJakub Kicinski
596ff7d6b27SJesper Dangaard Brouer /* This allocator is optimized for the XDP mode that uses
597ff7d6b27SJesper Dangaard Brouer * one-frame-per-page, but have fallbacks that act like the
598ff7d6b27SJesper Dangaard Brouer * regular page allocator APIs.
599ff7d6b27SJesper Dangaard Brouer *
600ff7d6b27SJesper Dangaard Brouer * refcnt == 1 means page_pool owns page, and can recycle it.
60105656132SAlexander Lobakin *
60205656132SAlexander Lobakin * page is NOT reusable when allocated when system is under
60305656132SAlexander Lobakin * some pressure. (page_is_pfmemalloc)
604ff7d6b27SJesper Dangaard Brouer */
60505656132SAlexander Lobakin if (likely(page_ref_count(page) == 1 && !page_is_pfmemalloc(page))) {
606ff7d6b27SJesper Dangaard Brouer /* Read barrier done in page_ref_count / READ_ONCE */
607ff7d6b27SJesper Dangaard Brouer
608e68bc756SLorenzo Bianconi if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
609e68bc756SLorenzo Bianconi page_pool_dma_sync_for_device(pool, page,
610e68bc756SLorenzo Bianconi dma_sync_size);
611e68bc756SLorenzo Bianconi
612542bcea4SQingfang DENG if (allow_direct && in_softirq() &&
61378862447SLorenzo Bianconi page_pool_recycle_in_cache(page, pool))
61478862447SLorenzo Bianconi return NULL;
615ff7d6b27SJesper Dangaard Brouer
61678862447SLorenzo Bianconi /* Page found as candidate for recycling */
61778862447SLorenzo Bianconi return page;
618ff7d6b27SJesper Dangaard Brouer }
619ff7d6b27SJesper Dangaard Brouer /* Fallback/non-XDP mode: API user have elevated refcnt.
620ff7d6b27SJesper Dangaard Brouer *
621ff7d6b27SJesper Dangaard Brouer * Many drivers split up the page into fragments, and some
622ff7d6b27SJesper Dangaard Brouer * want to keep doing this to save memory and do refcnt based
623ff7d6b27SJesper Dangaard Brouer * recycling. Support this use case too, to ease drivers
624ff7d6b27SJesper Dangaard Brouer * switching between XDP/non-XDP.
625ff7d6b27SJesper Dangaard Brouer *
626ff7d6b27SJesper Dangaard Brouer * In-case page_pool maintains the DMA mapping, API user must
627ff7d6b27SJesper Dangaard Brouer * call page_pool_put_page once. In this elevated refcnt
628ff7d6b27SJesper Dangaard Brouer * case, the DMA is unmapped/released, as driver is likely
629ff7d6b27SJesper Dangaard Brouer * doing refcnt based recycle tricks, meaning another process
630ff7d6b27SJesper Dangaard Brouer * will be invoking put_page.
631ff7d6b27SJesper Dangaard Brouer */
632ad6fa1e1SJoe Damato recycle_stat_inc(pool, released_refcnt);
63307e0c7d3SJakub Kicinski page_pool_return_page(pool, page);
63478862447SLorenzo Bianconi
63578862447SLorenzo Bianconi return NULL;
63678862447SLorenzo Bianconi }
63778862447SLorenzo Bianconi
page_pool_put_defragged_page(struct page_pool * pool,struct page * page,unsigned int dma_sync_size,bool allow_direct)63852cc6ffcSAlexander Duyck void page_pool_put_defragged_page(struct page_pool *pool, struct page *page,
63978862447SLorenzo Bianconi unsigned int dma_sync_size, bool allow_direct)
64078862447SLorenzo Bianconi {
64178862447SLorenzo Bianconi page = __page_pool_put_page(pool, page, dma_sync_size, allow_direct);
64278862447SLorenzo Bianconi if (page && !page_pool_recycle_in_ring(pool, page)) {
64378862447SLorenzo Bianconi /* Cache full, fallback to free pages */
644ad6fa1e1SJoe Damato recycle_stat_inc(pool, ring_full);
64578862447SLorenzo Bianconi page_pool_return_page(pool, page);
64678862447SLorenzo Bianconi }
647ff7d6b27SJesper Dangaard Brouer }
64852cc6ffcSAlexander Duyck EXPORT_SYMBOL(page_pool_put_defragged_page);
649ff7d6b27SJesper Dangaard Brouer
65082e896d9SJakub Kicinski /**
65182e896d9SJakub Kicinski * page_pool_put_page_bulk() - release references on multiple pages
65282e896d9SJakub Kicinski * @pool: pool from which pages were allocated
65382e896d9SJakub Kicinski * @data: array holding page pointers
65482e896d9SJakub Kicinski * @count: number of pages in @data
65582e896d9SJakub Kicinski *
65682e896d9SJakub Kicinski * Tries to refill a number of pages into the ptr_ring cache holding ptr_ring
65782e896d9SJakub Kicinski * producer lock. If the ptr_ring is full, page_pool_put_page_bulk()
65882e896d9SJakub Kicinski * will release leftover pages to the page allocator.
65982e896d9SJakub Kicinski * page_pool_put_page_bulk() is suitable to be run inside the driver NAPI tx
66082e896d9SJakub Kicinski * completion loop for the XDP_REDIRECT use case.
66182e896d9SJakub Kicinski *
66282e896d9SJakub Kicinski * Please note the caller must not use data area after running
66382e896d9SJakub Kicinski * page_pool_put_page_bulk(), as this function overwrites it.
66482e896d9SJakub Kicinski */
page_pool_put_page_bulk(struct page_pool * pool,void ** data,int count)66578862447SLorenzo Bianconi void page_pool_put_page_bulk(struct page_pool *pool, void **data,
66678862447SLorenzo Bianconi int count)
66778862447SLorenzo Bianconi {
66878862447SLorenzo Bianconi int i, bulk_len = 0;
669368d3cb4SYunsheng Lin bool in_softirq;
67078862447SLorenzo Bianconi
67178862447SLorenzo Bianconi for (i = 0; i < count; i++) {
67278862447SLorenzo Bianconi struct page *page = virt_to_head_page(data[i]);
67378862447SLorenzo Bianconi
67452cc6ffcSAlexander Duyck /* It is not the last user for the page frag case */
67552cc6ffcSAlexander Duyck if (!page_pool_is_last_frag(pool, page))
67652cc6ffcSAlexander Duyck continue;
67752cc6ffcSAlexander Duyck
67878862447SLorenzo Bianconi page = __page_pool_put_page(pool, page, -1, false);
67978862447SLorenzo Bianconi /* Approved for bulk recycling in ptr_ring cache */
68078862447SLorenzo Bianconi if (page)
68178862447SLorenzo Bianconi data[bulk_len++] = page;
68278862447SLorenzo Bianconi }
68378862447SLorenzo Bianconi
68478862447SLorenzo Bianconi if (unlikely(!bulk_len))
68578862447SLorenzo Bianconi return;
68678862447SLorenzo Bianconi
68778862447SLorenzo Bianconi /* Bulk producer into ptr_ring page_pool cache */
688368d3cb4SYunsheng Lin in_softirq = page_pool_producer_lock(pool);
68978862447SLorenzo Bianconi for (i = 0; i < bulk_len; i++) {
690590032a4SLorenzo Bianconi if (__ptr_ring_produce(&pool->ring, data[i])) {
691590032a4SLorenzo Bianconi /* ring full */
692590032a4SLorenzo Bianconi recycle_stat_inc(pool, ring_full);
693590032a4SLorenzo Bianconi break;
69478862447SLorenzo Bianconi }
695590032a4SLorenzo Bianconi }
696590032a4SLorenzo Bianconi recycle_stat_add(pool, ring, i);
697368d3cb4SYunsheng Lin page_pool_producer_unlock(pool, in_softirq);
69878862447SLorenzo Bianconi
69978862447SLorenzo Bianconi /* Hopefully all pages was return into ptr_ring */
70078862447SLorenzo Bianconi if (likely(i == bulk_len))
70178862447SLorenzo Bianconi return;
70278862447SLorenzo Bianconi
70378862447SLorenzo Bianconi /* ptr_ring cache full, free remaining pages outside producer lock
70478862447SLorenzo Bianconi * since put_page() with refcnt == 1 can be an expensive operation
70578862447SLorenzo Bianconi */
70678862447SLorenzo Bianconi for (; i < bulk_len; i++)
70778862447SLorenzo Bianconi page_pool_return_page(pool, data[i]);
70878862447SLorenzo Bianconi }
70978862447SLorenzo Bianconi EXPORT_SYMBOL(page_pool_put_page_bulk);
71078862447SLorenzo Bianconi
page_pool_drain_frag(struct page_pool * pool,struct page * page)71153e0961dSYunsheng Lin static struct page *page_pool_drain_frag(struct page_pool *pool,
71253e0961dSYunsheng Lin struct page *page)
71353e0961dSYunsheng Lin {
71453e0961dSYunsheng Lin long drain_count = BIAS_MAX - pool->frag_users;
71553e0961dSYunsheng Lin
71653e0961dSYunsheng Lin /* Some user is still using the page frag */
71752cc6ffcSAlexander Duyck if (likely(page_pool_defrag_page(page, drain_count)))
71853e0961dSYunsheng Lin return NULL;
71953e0961dSYunsheng Lin
72053e0961dSYunsheng Lin if (page_ref_count(page) == 1 && !page_is_pfmemalloc(page)) {
72153e0961dSYunsheng Lin if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
72253e0961dSYunsheng Lin page_pool_dma_sync_for_device(pool, page, -1);
72353e0961dSYunsheng Lin
72453e0961dSYunsheng Lin return page;
72553e0961dSYunsheng Lin }
72653e0961dSYunsheng Lin
72753e0961dSYunsheng Lin page_pool_return_page(pool, page);
72853e0961dSYunsheng Lin return NULL;
72953e0961dSYunsheng Lin }
73053e0961dSYunsheng Lin
page_pool_free_frag(struct page_pool * pool)73153e0961dSYunsheng Lin static void page_pool_free_frag(struct page_pool *pool)
73253e0961dSYunsheng Lin {
73353e0961dSYunsheng Lin long drain_count = BIAS_MAX - pool->frag_users;
73453e0961dSYunsheng Lin struct page *page = pool->frag_page;
73553e0961dSYunsheng Lin
73653e0961dSYunsheng Lin pool->frag_page = NULL;
73753e0961dSYunsheng Lin
73852cc6ffcSAlexander Duyck if (!page || page_pool_defrag_page(page, drain_count))
73953e0961dSYunsheng Lin return;
74053e0961dSYunsheng Lin
74153e0961dSYunsheng Lin page_pool_return_page(pool, page);
74253e0961dSYunsheng Lin }
74353e0961dSYunsheng Lin
page_pool_alloc_frag(struct page_pool * pool,unsigned int * offset,unsigned int size,gfp_t gfp)74453e0961dSYunsheng Lin struct page *page_pool_alloc_frag(struct page_pool *pool,
74553e0961dSYunsheng Lin unsigned int *offset,
74653e0961dSYunsheng Lin unsigned int size, gfp_t gfp)
74753e0961dSYunsheng Lin {
74853e0961dSYunsheng Lin unsigned int max_size = PAGE_SIZE << pool->p.order;
74953e0961dSYunsheng Lin struct page *page = pool->frag_page;
75053e0961dSYunsheng Lin
75153e0961dSYunsheng Lin if (WARN_ON(!(pool->p.flags & PP_FLAG_PAGE_FRAG) ||
75253e0961dSYunsheng Lin size > max_size))
75353e0961dSYunsheng Lin return NULL;
75453e0961dSYunsheng Lin
75553e0961dSYunsheng Lin size = ALIGN(size, dma_get_cache_alignment());
75653e0961dSYunsheng Lin *offset = pool->frag_offset;
75753e0961dSYunsheng Lin
75853e0961dSYunsheng Lin if (page && *offset + size > max_size) {
75953e0961dSYunsheng Lin page = page_pool_drain_frag(pool, page);
7600f6deac3SJie Wang if (page) {
7610f6deac3SJie Wang alloc_stat_inc(pool, fast);
76253e0961dSYunsheng Lin goto frag_reset;
76353e0961dSYunsheng Lin }
7640f6deac3SJie Wang }
76553e0961dSYunsheng Lin
76653e0961dSYunsheng Lin if (!page) {
76753e0961dSYunsheng Lin page = page_pool_alloc_pages(pool, gfp);
76853e0961dSYunsheng Lin if (unlikely(!page)) {
76953e0961dSYunsheng Lin pool->frag_page = NULL;
77053e0961dSYunsheng Lin return NULL;
77153e0961dSYunsheng Lin }
77253e0961dSYunsheng Lin
77353e0961dSYunsheng Lin pool->frag_page = page;
77453e0961dSYunsheng Lin
77553e0961dSYunsheng Lin frag_reset:
77653e0961dSYunsheng Lin pool->frag_users = 1;
77753e0961dSYunsheng Lin *offset = 0;
77853e0961dSYunsheng Lin pool->frag_offset = size;
77952cc6ffcSAlexander Duyck page_pool_fragment_page(page, BIAS_MAX);
78053e0961dSYunsheng Lin return page;
78153e0961dSYunsheng Lin }
78253e0961dSYunsheng Lin
78353e0961dSYunsheng Lin pool->frag_users++;
78453e0961dSYunsheng Lin pool->frag_offset = *offset + size;
7850f6deac3SJie Wang alloc_stat_inc(pool, fast);
78653e0961dSYunsheng Lin return page;
78753e0961dSYunsheng Lin }
78853e0961dSYunsheng Lin EXPORT_SYMBOL(page_pool_alloc_frag);
78953e0961dSYunsheng Lin
page_pool_empty_ring(struct page_pool * pool)790458de8a9SIlias Apalodimas static void page_pool_empty_ring(struct page_pool *pool)
791ff7d6b27SJesper Dangaard Brouer {
792ff7d6b27SJesper Dangaard Brouer struct page *page;
793ff7d6b27SJesper Dangaard Brouer
794ff7d6b27SJesper Dangaard Brouer /* Empty recycle ring */
7954905bd9aSTariq Toukan while ((page = ptr_ring_consume_bh(&pool->ring))) {
796ff7d6b27SJesper Dangaard Brouer /* Verify the refcnt invariant of cached pages */
797ff7d6b27SJesper Dangaard Brouer if (!(page_ref_count(page) == 1))
798ff7d6b27SJesper Dangaard Brouer pr_crit("%s() page_pool refcnt %d violation\n",
799ff7d6b27SJesper Dangaard Brouer __func__, page_ref_count(page));
800ff7d6b27SJesper Dangaard Brouer
801458de8a9SIlias Apalodimas page_pool_return_page(pool, page);
802ff7d6b27SJesper Dangaard Brouer }
803ff7d6b27SJesper Dangaard Brouer }
804ff7d6b27SJesper Dangaard Brouer
page_pool_free(struct page_pool * pool)805c3f812ceSJonathan Lemon static void page_pool_free(struct page_pool *pool)
806d956a048SJesper Dangaard Brouer {
807c3f812ceSJonathan Lemon if (pool->disconnect)
808c3f812ceSJonathan Lemon pool->disconnect(pool);
809e54cfd7eSJesper Dangaard Brouer
810e54cfd7eSJesper Dangaard Brouer ptr_ring_cleanup(&pool->ring, NULL);
811f71fec47SJesper Dangaard Brouer
812f71fec47SJesper Dangaard Brouer if (pool->p.flags & PP_FLAG_DMA_MAP)
813f71fec47SJesper Dangaard Brouer put_device(pool->p.dev);
814f71fec47SJesper Dangaard Brouer
815ad6fa1e1SJoe Damato #ifdef CONFIG_PAGE_POOL_STATS
816ad6fa1e1SJoe Damato free_percpu(pool->recycle_stats);
817ad6fa1e1SJoe Damato #endif
818e54cfd7eSJesper Dangaard Brouer kfree(pool);
819e54cfd7eSJesper Dangaard Brouer }
820e54cfd7eSJesper Dangaard Brouer
page_pool_empty_alloc_cache_once(struct page_pool * pool)8217c9e6942SJesper Dangaard Brouer static void page_pool_empty_alloc_cache_once(struct page_pool *pool)
822ff7d6b27SJesper Dangaard Brouer {
823ff7d6b27SJesper Dangaard Brouer struct page *page;
824ff7d6b27SJesper Dangaard Brouer
8257c9e6942SJesper Dangaard Brouer if (pool->destroy_cnt)
8267c9e6942SJesper Dangaard Brouer return;
8277c9e6942SJesper Dangaard Brouer
828ff7d6b27SJesper Dangaard Brouer /* Empty alloc cache, assume caller made sure this is
829ff7d6b27SJesper Dangaard Brouer * no-longer in use, and page_pool_alloc_pages() cannot be
830ff7d6b27SJesper Dangaard Brouer * call concurrently.
831ff7d6b27SJesper Dangaard Brouer */
832ff7d6b27SJesper Dangaard Brouer while (pool->alloc.count) {
833ff7d6b27SJesper Dangaard Brouer page = pool->alloc.cache[--pool->alloc.count];
834458de8a9SIlias Apalodimas page_pool_return_page(pool, page);
835ff7d6b27SJesper Dangaard Brouer }
8367c9e6942SJesper Dangaard Brouer }
8377c9e6942SJesper Dangaard Brouer
page_pool_scrub(struct page_pool * pool)8387c9e6942SJesper Dangaard Brouer static void page_pool_scrub(struct page_pool *pool)
8397c9e6942SJesper Dangaard Brouer {
8407c9e6942SJesper Dangaard Brouer page_pool_empty_alloc_cache_once(pool);
8417c9e6942SJesper Dangaard Brouer pool->destroy_cnt++;
842ff7d6b27SJesper Dangaard Brouer
843ff7d6b27SJesper Dangaard Brouer /* No more consumers should exist, but producers could still
844ff7d6b27SJesper Dangaard Brouer * be in-flight.
845ff7d6b27SJesper Dangaard Brouer */
846458de8a9SIlias Apalodimas page_pool_empty_ring(pool);
847ff7d6b27SJesper Dangaard Brouer }
848c3f812ceSJonathan Lemon
page_pool_release(struct page_pool * pool)849c3f812ceSJonathan Lemon static int page_pool_release(struct page_pool *pool)
850c3f812ceSJonathan Lemon {
851c3f812ceSJonathan Lemon int inflight;
852c3f812ceSJonathan Lemon
853c3f812ceSJonathan Lemon page_pool_scrub(pool);
854c3f812ceSJonathan Lemon inflight = page_pool_inflight(pool);
855c3f812ceSJonathan Lemon if (!inflight)
856c3f812ceSJonathan Lemon page_pool_free(pool);
857c3f812ceSJonathan Lemon
858c3f812ceSJonathan Lemon return inflight;
859c3f812ceSJonathan Lemon }
860c3f812ceSJonathan Lemon
page_pool_release_retry(struct work_struct * wq)861c3f812ceSJonathan Lemon static void page_pool_release_retry(struct work_struct *wq)
862c3f812ceSJonathan Lemon {
863c3f812ceSJonathan Lemon struct delayed_work *dwq = to_delayed_work(wq);
864c3f812ceSJonathan Lemon struct page_pool *pool = container_of(dwq, typeof(*pool), release_dw);
865c3f812ceSJonathan Lemon int inflight;
866c3f812ceSJonathan Lemon
867c3f812ceSJonathan Lemon inflight = page_pool_release(pool);
868c3f812ceSJonathan Lemon if (!inflight)
869c3f812ceSJonathan Lemon return;
870c3f812ceSJonathan Lemon
871c3f812ceSJonathan Lemon /* Periodic warning */
872c3f812ceSJonathan Lemon if (time_after_eq(jiffies, pool->defer_warn)) {
873c3f812ceSJonathan Lemon int sec = (s32)((u32)jiffies - (u32)pool->defer_start) / HZ;
874c3f812ceSJonathan Lemon
875c3f812ceSJonathan Lemon pr_warn("%s() stalled pool shutdown %d inflight %d sec\n",
876c3f812ceSJonathan Lemon __func__, inflight, sec);
877c3f812ceSJonathan Lemon pool->defer_warn = jiffies + DEFER_WARN_INTERVAL;
878c3f812ceSJonathan Lemon }
879c3f812ceSJonathan Lemon
880c3f812ceSJonathan Lemon /* Still not ready to be disconnected, retry later */
881c3f812ceSJonathan Lemon schedule_delayed_work(&pool->release_dw, DEFER_TIME);
882c3f812ceSJonathan Lemon }
883c3f812ceSJonathan Lemon
page_pool_use_xdp_mem(struct page_pool * pool,void (* disconnect)(void *),struct xdp_mem_info * mem)88464693ec7SToke Høiland-Jørgensen void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *),
88564693ec7SToke Høiland-Jørgensen struct xdp_mem_info *mem)
886c3f812ceSJonathan Lemon {
887c3f812ceSJonathan Lemon refcount_inc(&pool->user_cnt);
888c3f812ceSJonathan Lemon pool->disconnect = disconnect;
88964693ec7SToke Høiland-Jørgensen pool->xdp_mem_id = mem->id;
890c3f812ceSJonathan Lemon }
891c3f812ceSJonathan Lemon
page_pool_unlink_napi(struct page_pool * pool)892dd64b232SJakub Kicinski void page_pool_unlink_napi(struct page_pool *pool)
893dd64b232SJakub Kicinski {
894dd64b232SJakub Kicinski if (!pool->p.napi)
895dd64b232SJakub Kicinski return;
896dd64b232SJakub Kicinski
897dd64b232SJakub Kicinski /* To avoid races with recycling and additional barriers make sure
898dd64b232SJakub Kicinski * pool and NAPI are unlinked when NAPI is disabled.
899dd64b232SJakub Kicinski */
900dd64b232SJakub Kicinski WARN_ON(!test_bit(NAPI_STATE_SCHED, &pool->p.napi->state) ||
901dd64b232SJakub Kicinski READ_ONCE(pool->p.napi->list_owner) != -1);
902dd64b232SJakub Kicinski
903dd64b232SJakub Kicinski WRITE_ONCE(pool->p.napi, NULL);
904dd64b232SJakub Kicinski }
905dd64b232SJakub Kicinski EXPORT_SYMBOL(page_pool_unlink_napi);
906dd64b232SJakub Kicinski
page_pool_destroy(struct page_pool * pool)907c3f812ceSJonathan Lemon void page_pool_destroy(struct page_pool *pool)
908c3f812ceSJonathan Lemon {
909c3f812ceSJonathan Lemon if (!pool)
910c3f812ceSJonathan Lemon return;
911c3f812ceSJonathan Lemon
912c3f812ceSJonathan Lemon if (!page_pool_put(pool))
913c3f812ceSJonathan Lemon return;
914c3f812ceSJonathan Lemon
915dd64b232SJakub Kicinski page_pool_unlink_napi(pool);
91653e0961dSYunsheng Lin page_pool_free_frag(pool);
91753e0961dSYunsheng Lin
918c3f812ceSJonathan Lemon if (!page_pool_release(pool))
919c3f812ceSJonathan Lemon return;
920c3f812ceSJonathan Lemon
921c3f812ceSJonathan Lemon pool->defer_start = jiffies;
922c3f812ceSJonathan Lemon pool->defer_warn = jiffies + DEFER_WARN_INTERVAL;
923c3f812ceSJonathan Lemon
924c3f812ceSJonathan Lemon INIT_DELAYED_WORK(&pool->release_dw, page_pool_release_retry);
925c3f812ceSJonathan Lemon schedule_delayed_work(&pool->release_dw, DEFER_TIME);
926c3f812ceSJonathan Lemon }
927c3f812ceSJonathan Lemon EXPORT_SYMBOL(page_pool_destroy);
928bc836748SSaeed Mahameed
929bc836748SSaeed Mahameed /* Caller must provide appropriate safe context, e.g. NAPI. */
page_pool_update_nid(struct page_pool * pool,int new_nid)930bc836748SSaeed Mahameed void page_pool_update_nid(struct page_pool *pool, int new_nid)
931bc836748SSaeed Mahameed {
93244768decSJesper Dangaard Brouer struct page *page;
93344768decSJesper Dangaard Brouer
934bc836748SSaeed Mahameed trace_page_pool_update_nid(pool, new_nid);
935bc836748SSaeed Mahameed pool->p.nid = new_nid;
93644768decSJesper Dangaard Brouer
93744768decSJesper Dangaard Brouer /* Flush pool alloc cache, as refill will check NUMA node */
93844768decSJesper Dangaard Brouer while (pool->alloc.count) {
93944768decSJesper Dangaard Brouer page = pool->alloc.cache[--pool->alloc.count];
940458de8a9SIlias Apalodimas page_pool_return_page(pool, page);
94144768decSJesper Dangaard Brouer }
942bc836748SSaeed Mahameed }
943bc836748SSaeed Mahameed EXPORT_SYMBOL(page_pool_update_nid);
944