17875e18eSAndy Grover /*
27875e18eSAndy Grover * Copyright (c) 2006 Oracle. All rights reserved.
37875e18eSAndy Grover *
47875e18eSAndy Grover * This software is available to you under a choice of one of two
57875e18eSAndy Grover * licenses. You may choose to be licensed under the terms of the GNU
67875e18eSAndy Grover * General Public License (GPL) Version 2, available from the file
77875e18eSAndy Grover * COPYING in the main directory of this source tree, or the
87875e18eSAndy Grover * OpenIB.org BSD license below:
97875e18eSAndy Grover *
107875e18eSAndy Grover * Redistribution and use in source and binary forms, with or
117875e18eSAndy Grover * without modification, are permitted provided that the following
127875e18eSAndy Grover * conditions are met:
137875e18eSAndy Grover *
147875e18eSAndy Grover * - Redistributions of source code must retain the above
157875e18eSAndy Grover * copyright notice, this list of conditions and the following
167875e18eSAndy Grover * disclaimer.
177875e18eSAndy Grover *
187875e18eSAndy Grover * - Redistributions in binary form must reproduce the above
197875e18eSAndy Grover * copyright notice, this list of conditions and the following
207875e18eSAndy Grover * disclaimer in the documentation and/or other materials
217875e18eSAndy Grover * provided with the distribution.
227875e18eSAndy Grover *
237875e18eSAndy Grover * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
247875e18eSAndy Grover * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
257875e18eSAndy Grover * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
267875e18eSAndy Grover * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
277875e18eSAndy Grover * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
287875e18eSAndy Grover * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
297875e18eSAndy Grover * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
307875e18eSAndy Grover * SOFTWARE.
317875e18eSAndy Grover *
327875e18eSAndy Grover */
337875e18eSAndy Grover #include <linux/highmem.h>
345a0e3ad6STejun Heo #include <linux/gfp.h>
3580f1ff97SAmerigo Wang #include <linux/cpu.h>
36bc3b2d7fSPaul Gortmaker #include <linux/export.h>
377875e18eSAndy Grover
387875e18eSAndy Grover #include "rds.h"
397875e18eSAndy Grover
407875e18eSAndy Grover struct rds_page_remainder {
417875e18eSAndy Grover struct page *r_page;
427875e18eSAndy Grover unsigned long r_offset;
437875e18eSAndy Grover };
447875e18eSAndy Grover
4525528213SPeter Zijlstra static
4625528213SPeter Zijlstra DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_page_remainder, rds_page_remainders);
477875e18eSAndy Grover
482c53040fSBen Hutchings /**
492c53040fSBen Hutchings * rds_page_remainder_alloc - build up regions of a message.
507875e18eSAndy Grover *
512c53040fSBen Hutchings * @scat: Scatter list for message
522c53040fSBen Hutchings * @bytes: the number of bytes needed.
532c53040fSBen Hutchings * @gfp: the waiting behaviour of the allocation
547875e18eSAndy Grover *
557875e18eSAndy Grover * @gfp is always ored with __GFP_HIGHMEM. Callers must be prepared to
567875e18eSAndy Grover * kmap the pages, etc.
577875e18eSAndy Grover *
587875e18eSAndy Grover * If @bytes is at least a full page then this just returns a page from
597875e18eSAndy Grover * alloc_page().
607875e18eSAndy Grover *
617875e18eSAndy Grover * If @bytes is a partial page then this stores the unused region of the
627875e18eSAndy Grover * page in a per-cpu structure. Future partial-page allocations may be
637875e18eSAndy Grover * satisfied from that cached region. This lets us waste less memory on
647875e18eSAndy Grover * small allocations with minimal complexity. It works because the transmit
657875e18eSAndy Grover * path passes read-only page regions down to devices. They hold a page
667875e18eSAndy Grover * reference until they are done with the region.
677875e18eSAndy Grover */
rds_page_remainder_alloc(struct scatterlist * scat,unsigned long bytes,gfp_t gfp)687875e18eSAndy Grover int rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes,
697875e18eSAndy Grover gfp_t gfp)
707875e18eSAndy Grover {
717875e18eSAndy Grover struct rds_page_remainder *rem;
727875e18eSAndy Grover unsigned long flags;
737875e18eSAndy Grover struct page *page;
747875e18eSAndy Grover int ret;
757875e18eSAndy Grover
767875e18eSAndy Grover gfp |= __GFP_HIGHMEM;
777875e18eSAndy Grover
787875e18eSAndy Grover /* jump straight to allocation if we're trying for a huge page */
797875e18eSAndy Grover if (bytes >= PAGE_SIZE) {
807875e18eSAndy Grover page = alloc_page(gfp);
818690bfa1SAndy Grover if (!page) {
827875e18eSAndy Grover ret = -ENOMEM;
837875e18eSAndy Grover } else {
847875e18eSAndy Grover sg_set_page(scat, page, PAGE_SIZE, 0);
857875e18eSAndy Grover ret = 0;
867875e18eSAndy Grover }
877875e18eSAndy Grover goto out;
887875e18eSAndy Grover }
897875e18eSAndy Grover
907875e18eSAndy Grover rem = &per_cpu(rds_page_remainders, get_cpu());
917875e18eSAndy Grover local_irq_save(flags);
927875e18eSAndy Grover
937875e18eSAndy Grover while (1) {
947875e18eSAndy Grover /* avoid a tiny region getting stuck by tossing it */
957875e18eSAndy Grover if (rem->r_page && bytes > (PAGE_SIZE - rem->r_offset)) {
967875e18eSAndy Grover rds_stats_inc(s_page_remainder_miss);
977875e18eSAndy Grover __free_page(rem->r_page);
987875e18eSAndy Grover rem->r_page = NULL;
997875e18eSAndy Grover }
1007875e18eSAndy Grover
1017875e18eSAndy Grover /* hand out a fragment from the cached page */
1027875e18eSAndy Grover if (rem->r_page && bytes <= (PAGE_SIZE - rem->r_offset)) {
1037875e18eSAndy Grover sg_set_page(scat, rem->r_page, bytes, rem->r_offset);
1047875e18eSAndy Grover get_page(sg_page(scat));
1057875e18eSAndy Grover
1067875e18eSAndy Grover if (rem->r_offset != 0)
1077875e18eSAndy Grover rds_stats_inc(s_page_remainder_hit);
1087875e18eSAndy Grover
109*e98499acSshamir rabinovitch rem->r_offset += ALIGN(bytes, 8);
110*e98499acSshamir rabinovitch if (rem->r_offset >= PAGE_SIZE) {
1117875e18eSAndy Grover __free_page(rem->r_page);
1127875e18eSAndy Grover rem->r_page = NULL;
1137875e18eSAndy Grover }
1147875e18eSAndy Grover ret = 0;
1157875e18eSAndy Grover break;
1167875e18eSAndy Grover }
1177875e18eSAndy Grover
1187875e18eSAndy Grover /* alloc if there is nothing for us to use */
1197875e18eSAndy Grover local_irq_restore(flags);
1207875e18eSAndy Grover put_cpu();
1217875e18eSAndy Grover
1227875e18eSAndy Grover page = alloc_page(gfp);
1237875e18eSAndy Grover
1247875e18eSAndy Grover rem = &per_cpu(rds_page_remainders, get_cpu());
1257875e18eSAndy Grover local_irq_save(flags);
1267875e18eSAndy Grover
1278690bfa1SAndy Grover if (!page) {
1287875e18eSAndy Grover ret = -ENOMEM;
1297875e18eSAndy Grover break;
1307875e18eSAndy Grover }
1317875e18eSAndy Grover
1327875e18eSAndy Grover /* did someone race to fill the remainder before us? */
1337875e18eSAndy Grover if (rem->r_page) {
1347875e18eSAndy Grover __free_page(page);
1357875e18eSAndy Grover continue;
1367875e18eSAndy Grover }
1377875e18eSAndy Grover
1387875e18eSAndy Grover /* otherwise install our page and loop around to alloc */
1397875e18eSAndy Grover rem->r_page = page;
1407875e18eSAndy Grover rem->r_offset = 0;
1417875e18eSAndy Grover }
1427875e18eSAndy Grover
1437875e18eSAndy Grover local_irq_restore(flags);
1447875e18eSAndy Grover put_cpu();
1457875e18eSAndy Grover out:
1467875e18eSAndy Grover rdsdebug("bytes %lu ret %d %p %u %u\n", bytes, ret,
1477875e18eSAndy Grover ret ? NULL : sg_page(scat), ret ? 0 : scat->offset,
1487875e18eSAndy Grover ret ? 0 : scat->length);
1497875e18eSAndy Grover return ret;
1507875e18eSAndy Grover }
1510b088e00SAndy Grover EXPORT_SYMBOL_GPL(rds_page_remainder_alloc);
1527875e18eSAndy Grover
rds_page_exit(void)153f2830d09SSebastian Andrzej Siewior void rds_page_exit(void)
1547875e18eSAndy Grover {
155f2830d09SSebastian Andrzej Siewior unsigned int cpu;
156f2830d09SSebastian Andrzej Siewior
157f2830d09SSebastian Andrzej Siewior for_each_possible_cpu(cpu) {
1587875e18eSAndy Grover struct rds_page_remainder *rem;
1597875e18eSAndy Grover
1607875e18eSAndy Grover rem = &per_cpu(rds_page_remainders, cpu);
161f2830d09SSebastian Andrzej Siewior rdsdebug("cpu %u\n", cpu);
1627875e18eSAndy Grover
1637875e18eSAndy Grover if (rem->r_page)
1647875e18eSAndy Grover __free_page(rem->r_page);
1657875e18eSAndy Grover rem->r_page = NULL;
1667875e18eSAndy Grover }
1677875e18eSAndy Grover }
168