xref: /openbmc/linux/net/rds/page.c (revision be6e4d66)
17875e18eSAndy Grover /*
27875e18eSAndy Grover  * Copyright (c) 2006 Oracle.  All rights reserved.
37875e18eSAndy Grover  *
47875e18eSAndy Grover  * This software is available to you under a choice of one of two
57875e18eSAndy Grover  * licenses.  You may choose to be licensed under the terms of the GNU
67875e18eSAndy Grover  * General Public License (GPL) Version 2, available from the file
77875e18eSAndy Grover  * COPYING in the main directory of this source tree, or the
87875e18eSAndy Grover  * OpenIB.org BSD license below:
97875e18eSAndy Grover  *
107875e18eSAndy Grover  *     Redistribution and use in source and binary forms, with or
117875e18eSAndy Grover  *     without modification, are permitted provided that the following
127875e18eSAndy Grover  *     conditions are met:
137875e18eSAndy Grover  *
147875e18eSAndy Grover  *      - Redistributions of source code must retain the above
157875e18eSAndy Grover  *        copyright notice, this list of conditions and the following
167875e18eSAndy Grover  *        disclaimer.
177875e18eSAndy Grover  *
187875e18eSAndy Grover  *      - Redistributions in binary form must reproduce the above
197875e18eSAndy Grover  *        copyright notice, this list of conditions and the following
207875e18eSAndy Grover  *        disclaimer in the documentation and/or other materials
217875e18eSAndy Grover  *        provided with the distribution.
227875e18eSAndy Grover  *
237875e18eSAndy Grover  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
247875e18eSAndy Grover  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
257875e18eSAndy Grover  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
267875e18eSAndy Grover  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
277875e18eSAndy Grover  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
287875e18eSAndy Grover  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
297875e18eSAndy Grover  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
307875e18eSAndy Grover  * SOFTWARE.
317875e18eSAndy Grover  *
327875e18eSAndy Grover  */
337875e18eSAndy Grover #include <linux/highmem.h>
345a0e3ad6STejun Heo #include <linux/gfp.h>
3580f1ff97SAmerigo Wang #include <linux/cpu.h>
36bc3b2d7fSPaul Gortmaker #include <linux/export.h>
377875e18eSAndy Grover 
387875e18eSAndy Grover #include "rds.h"
397875e18eSAndy Grover 
407875e18eSAndy Grover struct rds_page_remainder {
417875e18eSAndy Grover 	struct page	*r_page;
427875e18eSAndy Grover 	unsigned long	r_offset;
437875e18eSAndy Grover };
447875e18eSAndy Grover 
4525528213SPeter Zijlstra static
4625528213SPeter Zijlstra DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_page_remainder, rds_page_remainders);
477875e18eSAndy Grover 
482c53040fSBen Hutchings /**
492c53040fSBen Hutchings  * rds_page_remainder_alloc - build up regions of a message.
507875e18eSAndy Grover  *
512c53040fSBen Hutchings  * @scat: Scatter list for message
522c53040fSBen Hutchings  * @bytes: the number of bytes needed.
532c53040fSBen Hutchings  * @gfp: the waiting behaviour of the allocation
547875e18eSAndy Grover  *
557875e18eSAndy Grover  * @gfp is always ored with __GFP_HIGHMEM.  Callers must be prepared to
567875e18eSAndy Grover  * kmap the pages, etc.
577875e18eSAndy Grover  *
587875e18eSAndy Grover  * If @bytes is at least a full page then this just returns a page from
597875e18eSAndy Grover  * alloc_page().
607875e18eSAndy Grover  *
617875e18eSAndy Grover  * If @bytes is a partial page then this stores the unused region of the
627875e18eSAndy Grover  * page in a per-cpu structure.  Future partial-page allocations may be
637875e18eSAndy Grover  * satisfied from that cached region.  This lets us waste less memory on
647875e18eSAndy Grover  * small allocations with minimal complexity.  It works because the transmit
657875e18eSAndy Grover  * path passes read-only page regions down to devices.  They hold a page
667875e18eSAndy Grover  * reference until they are done with the region.
677875e18eSAndy Grover  */
rds_page_remainder_alloc(struct scatterlist * scat,unsigned long bytes,gfp_t gfp)687875e18eSAndy Grover int rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes,
697875e18eSAndy Grover 			     gfp_t gfp)
707875e18eSAndy Grover {
717875e18eSAndy Grover 	struct rds_page_remainder *rem;
727875e18eSAndy Grover 	unsigned long flags;
737875e18eSAndy Grover 	struct page *page;
747875e18eSAndy Grover 	int ret;
757875e18eSAndy Grover 
767875e18eSAndy Grover 	gfp |= __GFP_HIGHMEM;
777875e18eSAndy Grover 
787875e18eSAndy Grover 	/* jump straight to allocation if we're trying for a huge page */
797875e18eSAndy Grover 	if (bytes >= PAGE_SIZE) {
807875e18eSAndy Grover 		page = alloc_page(gfp);
818690bfa1SAndy Grover 		if (!page) {
827875e18eSAndy Grover 			ret = -ENOMEM;
837875e18eSAndy Grover 		} else {
847875e18eSAndy Grover 			sg_set_page(scat, page, PAGE_SIZE, 0);
857875e18eSAndy Grover 			ret = 0;
867875e18eSAndy Grover 		}
877875e18eSAndy Grover 		goto out;
887875e18eSAndy Grover 	}
897875e18eSAndy Grover 
907875e18eSAndy Grover 	rem = &per_cpu(rds_page_remainders, get_cpu());
917875e18eSAndy Grover 	local_irq_save(flags);
927875e18eSAndy Grover 
937875e18eSAndy Grover 	while (1) {
947875e18eSAndy Grover 		/* avoid a tiny region getting stuck by tossing it */
957875e18eSAndy Grover 		if (rem->r_page && bytes > (PAGE_SIZE - rem->r_offset)) {
967875e18eSAndy Grover 			rds_stats_inc(s_page_remainder_miss);
977875e18eSAndy Grover 			__free_page(rem->r_page);
987875e18eSAndy Grover 			rem->r_page = NULL;
997875e18eSAndy Grover 		}
1007875e18eSAndy Grover 
1017875e18eSAndy Grover 		/* hand out a fragment from the cached page */
1027875e18eSAndy Grover 		if (rem->r_page && bytes <= (PAGE_SIZE - rem->r_offset)) {
1037875e18eSAndy Grover 			sg_set_page(scat, rem->r_page, bytes, rem->r_offset);
1047875e18eSAndy Grover 			get_page(sg_page(scat));
1057875e18eSAndy Grover 
1067875e18eSAndy Grover 			if (rem->r_offset != 0)
1077875e18eSAndy Grover 				rds_stats_inc(s_page_remainder_hit);
1087875e18eSAndy Grover 
109e98499acSshamir rabinovitch 			rem->r_offset += ALIGN(bytes, 8);
110e98499acSshamir rabinovitch 			if (rem->r_offset >= PAGE_SIZE) {
1117875e18eSAndy Grover 				__free_page(rem->r_page);
1127875e18eSAndy Grover 				rem->r_page = NULL;
1137875e18eSAndy Grover 			}
1147875e18eSAndy Grover 			ret = 0;
1157875e18eSAndy Grover 			break;
1167875e18eSAndy Grover 		}
1177875e18eSAndy Grover 
1187875e18eSAndy Grover 		/* alloc if there is nothing for us to use */
1197875e18eSAndy Grover 		local_irq_restore(flags);
1207875e18eSAndy Grover 		put_cpu();
1217875e18eSAndy Grover 
1227875e18eSAndy Grover 		page = alloc_page(gfp);
1237875e18eSAndy Grover 
1247875e18eSAndy Grover 		rem = &per_cpu(rds_page_remainders, get_cpu());
1257875e18eSAndy Grover 		local_irq_save(flags);
1267875e18eSAndy Grover 
1278690bfa1SAndy Grover 		if (!page) {
1287875e18eSAndy Grover 			ret = -ENOMEM;
1297875e18eSAndy Grover 			break;
1307875e18eSAndy Grover 		}
1317875e18eSAndy Grover 
1327875e18eSAndy Grover 		/* did someone race to fill the remainder before us? */
1337875e18eSAndy Grover 		if (rem->r_page) {
1347875e18eSAndy Grover 			__free_page(page);
1357875e18eSAndy Grover 			continue;
1367875e18eSAndy Grover 		}
1377875e18eSAndy Grover 
1387875e18eSAndy Grover 		/* otherwise install our page and loop around to alloc */
1397875e18eSAndy Grover 		rem->r_page = page;
1407875e18eSAndy Grover 		rem->r_offset = 0;
1417875e18eSAndy Grover 	}
1427875e18eSAndy Grover 
1437875e18eSAndy Grover 	local_irq_restore(flags);
1447875e18eSAndy Grover 	put_cpu();
1457875e18eSAndy Grover out:
1467875e18eSAndy Grover 	rdsdebug("bytes %lu ret %d %p %u %u\n", bytes, ret,
1477875e18eSAndy Grover 		 ret ? NULL : sg_page(scat), ret ? 0 : scat->offset,
1487875e18eSAndy Grover 		 ret ? 0 : scat->length);
1497875e18eSAndy Grover 	return ret;
1507875e18eSAndy Grover }
1510b088e00SAndy Grover EXPORT_SYMBOL_GPL(rds_page_remainder_alloc);
1527875e18eSAndy Grover 
rds_page_exit(void)153f2830d09SSebastian Andrzej Siewior void rds_page_exit(void)
1547875e18eSAndy Grover {
155f2830d09SSebastian Andrzej Siewior 	unsigned int cpu;
156f2830d09SSebastian Andrzej Siewior 
157f2830d09SSebastian Andrzej Siewior 	for_each_possible_cpu(cpu) {
1587875e18eSAndy Grover 		struct rds_page_remainder *rem;
1597875e18eSAndy Grover 
1607875e18eSAndy Grover 		rem = &per_cpu(rds_page_remainders, cpu);
161f2830d09SSebastian Andrzej Siewior 		rdsdebug("cpu %u\n", cpu);
1627875e18eSAndy Grover 
1637875e18eSAndy Grover 		if (rem->r_page)
1647875e18eSAndy Grover 			__free_page(rem->r_page);
1657875e18eSAndy Grover 		rem->r_page = NULL;
1667875e18eSAndy Grover 	}
1677875e18eSAndy Grover }
168