xref: /openbmc/linux/mm/page_reporting.c (revision c1e01cdbe0312d95b8c1542abd67fe786b534f57)
136e66c55SAlexander Duyck // SPDX-License-Identifier: GPL-2.0
236e66c55SAlexander Duyck #include <linux/mm.h>
336e66c55SAlexander Duyck #include <linux/mmzone.h>
436e66c55SAlexander Duyck #include <linux/page_reporting.h>
536e66c55SAlexander Duyck #include <linux/gfp.h>
636e66c55SAlexander Duyck #include <linux/export.h>
7f58780a8SGavin Shan #include <linux/module.h>
836e66c55SAlexander Duyck #include <linux/delay.h>
936e66c55SAlexander Duyck #include <linux/scatterlist.h>
1036e66c55SAlexander Duyck 
1136e66c55SAlexander Duyck #include "page_reporting.h"
1236e66c55SAlexander Duyck #include "internal.h"
1336e66c55SAlexander Duyck 
14aebb02ceSShradha Gupta /* Initialize to an unsupported value */
15aebb02ceSShradha Gupta unsigned int page_reporting_order = -1;
16aebb02ceSShradha Gupta 
page_order_update_notify(const char * val,const struct kernel_param * kp)17aebb02ceSShradha Gupta static int page_order_update_notify(const char *val, const struct kernel_param *kp)
18aebb02ceSShradha Gupta {
19aebb02ceSShradha Gupta 	/*
20aebb02ceSShradha Gupta 	 * If param is set beyond this limit, order is set to default
21aebb02ceSShradha Gupta 	 * pageblock_order value
22aebb02ceSShradha Gupta 	 */
2323baf831SKirill A. Shutemov 	return  param_set_uint_minmax(val, kp, 0, MAX_ORDER);
24aebb02ceSShradha Gupta }
25aebb02ceSShradha Gupta 
26aebb02ceSShradha Gupta static const struct kernel_param_ops page_reporting_param_ops = {
27aebb02ceSShradha Gupta 	.set = &page_order_update_notify,
28aebb02ceSShradha Gupta 	/*
29aebb02ceSShradha Gupta 	 * For the get op, use param_get_int instead of param_get_uint.
30aebb02ceSShradha Gupta 	 * This is to make sure that when unset the initialized value of
31aebb02ceSShradha Gupta 	 * -1 is shown correctly
32aebb02ceSShradha Gupta 	 */
33aebb02ceSShradha Gupta 	.get = &param_get_int,
34aebb02ceSShradha Gupta };
35aebb02ceSShradha Gupta 
36aebb02ceSShradha Gupta module_param_cb(page_reporting_order, &page_reporting_param_ops,
37aebb02ceSShradha Gupta 			&page_reporting_order, 0644);
38f58780a8SGavin Shan MODULE_PARM_DESC(page_reporting_order, "Set page reporting order");
39f58780a8SGavin Shan 
40aebb02ceSShradha Gupta /*
41aebb02ceSShradha Gupta  * This symbol is also a kernel parameter. Export the page_reporting_order
42aebb02ceSShradha Gupta  * symbol so that other drivers can access it to control order values without
43aebb02ceSShradha Gupta  * having to introduce another configurable parameter. Only one driver can
44aebb02ceSShradha Gupta  * register with the page_reporting driver for the service, so we have just
45aebb02ceSShradha Gupta  * one control parameter for the use case(which can be accessed in both
46aebb02ceSShradha Gupta  * drivers)
47aebb02ceSShradha Gupta  */
48aebb02ceSShradha Gupta EXPORT_SYMBOL_GPL(page_reporting_order);
49aebb02ceSShradha Gupta 
5036e66c55SAlexander Duyck #define PAGE_REPORTING_DELAY	(2 * HZ)
5136e66c55SAlexander Duyck static struct page_reporting_dev_info __rcu *pr_dev_info __read_mostly;
5236e66c55SAlexander Duyck 
5336e66c55SAlexander Duyck enum {
5436e66c55SAlexander Duyck 	PAGE_REPORTING_IDLE = 0,
5536e66c55SAlexander Duyck 	PAGE_REPORTING_REQUESTED,
5636e66c55SAlexander Duyck 	PAGE_REPORTING_ACTIVE
5736e66c55SAlexander Duyck };
5836e66c55SAlexander Duyck 
5936e66c55SAlexander Duyck /* request page reporting */
6036e66c55SAlexander Duyck static void
__page_reporting_request(struct page_reporting_dev_info * prdev)6136e66c55SAlexander Duyck __page_reporting_request(struct page_reporting_dev_info *prdev)
6236e66c55SAlexander Duyck {
6336e66c55SAlexander Duyck 	unsigned int state;
6436e66c55SAlexander Duyck 
6536e66c55SAlexander Duyck 	/* Check to see if we are in desired state */
6636e66c55SAlexander Duyck 	state = atomic_read(&prdev->state);
6736e66c55SAlexander Duyck 	if (state == PAGE_REPORTING_REQUESTED)
6836e66c55SAlexander Duyck 		return;
6936e66c55SAlexander Duyck 
7036e66c55SAlexander Duyck 	/*
7136e66c55SAlexander Duyck 	 * If reporting is already active there is nothing we need to do.
7236e66c55SAlexander Duyck 	 * Test against 0 as that represents PAGE_REPORTING_IDLE.
7336e66c55SAlexander Duyck 	 */
7436e66c55SAlexander Duyck 	state = atomic_xchg(&prdev->state, PAGE_REPORTING_REQUESTED);
7536e66c55SAlexander Duyck 	if (state != PAGE_REPORTING_IDLE)
7636e66c55SAlexander Duyck 		return;
7736e66c55SAlexander Duyck 
7836e66c55SAlexander Duyck 	/*
7936e66c55SAlexander Duyck 	 * Delay the start of work to allow a sizable queue to build. For
8036e66c55SAlexander Duyck 	 * now we are limiting this to running no more than once every
8136e66c55SAlexander Duyck 	 * couple of seconds.
8236e66c55SAlexander Duyck 	 */
8336e66c55SAlexander Duyck 	schedule_delayed_work(&prdev->work, PAGE_REPORTING_DELAY);
8436e66c55SAlexander Duyck }
8536e66c55SAlexander Duyck 
8636e66c55SAlexander Duyck /* notify prdev of free page reporting request */
__page_reporting_notify(void)8736e66c55SAlexander Duyck void __page_reporting_notify(void)
8836e66c55SAlexander Duyck {
8936e66c55SAlexander Duyck 	struct page_reporting_dev_info *prdev;
9036e66c55SAlexander Duyck 
9136e66c55SAlexander Duyck 	/*
9236e66c55SAlexander Duyck 	 * We use RCU to protect the pr_dev_info pointer. In almost all
9336e66c55SAlexander Duyck 	 * cases this should be present, however in the unlikely case of
9436e66c55SAlexander Duyck 	 * a shutdown this will be NULL and we should exit.
9536e66c55SAlexander Duyck 	 */
9636e66c55SAlexander Duyck 	rcu_read_lock();
9736e66c55SAlexander Duyck 	prdev = rcu_dereference(pr_dev_info);
9836e66c55SAlexander Duyck 	if (likely(prdev))
9936e66c55SAlexander Duyck 		__page_reporting_request(prdev);
10036e66c55SAlexander Duyck 
10136e66c55SAlexander Duyck 	rcu_read_unlock();
10236e66c55SAlexander Duyck }
10336e66c55SAlexander Duyck 
10436e66c55SAlexander Duyck static void
page_reporting_drain(struct page_reporting_dev_info * prdev,struct scatterlist * sgl,unsigned int nents,bool reported)10536e66c55SAlexander Duyck page_reporting_drain(struct page_reporting_dev_info *prdev,
10636e66c55SAlexander Duyck 		     struct scatterlist *sgl, unsigned int nents, bool reported)
10736e66c55SAlexander Duyck {
10836e66c55SAlexander Duyck 	struct scatterlist *sg = sgl;
10936e66c55SAlexander Duyck 
11036e66c55SAlexander Duyck 	/*
11136e66c55SAlexander Duyck 	 * Drain the now reported pages back into their respective
11236e66c55SAlexander Duyck 	 * free lists/areas. We assume at least one page is populated.
11336e66c55SAlexander Duyck 	 */
11436e66c55SAlexander Duyck 	do {
11536e66c55SAlexander Duyck 		struct page *page = sg_page(sg);
11636e66c55SAlexander Duyck 		int mt = get_pageblock_migratetype(page);
11736e66c55SAlexander Duyck 		unsigned int order = get_order(sg->length);
11836e66c55SAlexander Duyck 
11936e66c55SAlexander Duyck 		__putback_isolated_page(page, order, mt);
12036e66c55SAlexander Duyck 
12136e66c55SAlexander Duyck 		/* If the pages were not reported due to error skip flagging */
12236e66c55SAlexander Duyck 		if (!reported)
12336e66c55SAlexander Duyck 			continue;
12436e66c55SAlexander Duyck 
12536e66c55SAlexander Duyck 		/*
12636e66c55SAlexander Duyck 		 * If page was not comingled with another page we can
12736e66c55SAlexander Duyck 		 * consider the result to be "reported" since the page
12836e66c55SAlexander Duyck 		 * hasn't been modified, otherwise we will need to
12936e66c55SAlexander Duyck 		 * report on the new larger page when we make our way
13036e66c55SAlexander Duyck 		 * up to that higher order.
13136e66c55SAlexander Duyck 		 */
132ab130f91SMatthew Wilcox (Oracle) 		if (PageBuddy(page) && buddy_order(page) == order)
13336e66c55SAlexander Duyck 			__SetPageReported(page);
13436e66c55SAlexander Duyck 	} while ((sg = sg_next(sg)));
13536e66c55SAlexander Duyck 
13636e66c55SAlexander Duyck 	/* reinitialize scatterlist now that it is empty */
13736e66c55SAlexander Duyck 	sg_init_table(sgl, nents);
13836e66c55SAlexander Duyck }
13936e66c55SAlexander Duyck 
14036e66c55SAlexander Duyck /*
14136e66c55SAlexander Duyck  * The page reporting cycle consists of 4 stages, fill, report, drain, and
14236e66c55SAlexander Duyck  * idle. We will cycle through the first 3 stages until we cannot obtain a
14336e66c55SAlexander Duyck  * full scatterlist of pages, in that case we will switch to idle.
14436e66c55SAlexander Duyck  */
14536e66c55SAlexander Duyck static int
page_reporting_cycle(struct page_reporting_dev_info * prdev,struct zone * zone,unsigned int order,unsigned int mt,struct scatterlist * sgl,unsigned int * offset)14636e66c55SAlexander Duyck page_reporting_cycle(struct page_reporting_dev_info *prdev, struct zone *zone,
14736e66c55SAlexander Duyck 		     unsigned int order, unsigned int mt,
14836e66c55SAlexander Duyck 		     struct scatterlist *sgl, unsigned int *offset)
14936e66c55SAlexander Duyck {
15036e66c55SAlexander Duyck 	struct free_area *area = &zone->free_area[order];
15136e66c55SAlexander Duyck 	struct list_head *list = &area->free_list[mt];
15236e66c55SAlexander Duyck 	unsigned int page_len = PAGE_SIZE << order;
15336e66c55SAlexander Duyck 	struct page *page, *next;
15443b76f29SAlexander Duyck 	long budget;
15536e66c55SAlexander Duyck 	int err = 0;
15636e66c55SAlexander Duyck 
15736e66c55SAlexander Duyck 	/*
15836e66c55SAlexander Duyck 	 * Perform early check, if free area is empty there is
15936e66c55SAlexander Duyck 	 * nothing to process so we can skip this free_list.
16036e66c55SAlexander Duyck 	 */
16136e66c55SAlexander Duyck 	if (list_empty(list))
16236e66c55SAlexander Duyck 		return err;
16336e66c55SAlexander Duyck 
16436e66c55SAlexander Duyck 	spin_lock_irq(&zone->lock);
16536e66c55SAlexander Duyck 
16643b76f29SAlexander Duyck 	/*
16743b76f29SAlexander Duyck 	 * Limit how many calls we will be making to the page reporting
16843b76f29SAlexander Duyck 	 * device for this list. By doing this we avoid processing any
16943b76f29SAlexander Duyck 	 * given list for too long.
17043b76f29SAlexander Duyck 	 *
17143b76f29SAlexander Duyck 	 * The current value used allows us enough calls to process over a
17243b76f29SAlexander Duyck 	 * sixteenth of the current list plus one additional call to handle
17343b76f29SAlexander Duyck 	 * any pages that may have already been present from the previous
17443b76f29SAlexander Duyck 	 * list processed. This should result in us reporting all pages on
17543b76f29SAlexander Duyck 	 * an idle system in about 30 seconds.
17643b76f29SAlexander Duyck 	 *
17743b76f29SAlexander Duyck 	 * The division here should be cheap since PAGE_REPORTING_CAPACITY
17843b76f29SAlexander Duyck 	 * should always be a power of 2.
17943b76f29SAlexander Duyck 	 */
18043b76f29SAlexander Duyck 	budget = DIV_ROUND_UP(area->nr_free, PAGE_REPORTING_CAPACITY * 16);
18143b76f29SAlexander Duyck 
18236e66c55SAlexander Duyck 	/* loop through free list adding unreported pages to sg list */
18336e66c55SAlexander Duyck 	list_for_each_entry_safe(page, next, list, lru) {
18436e66c55SAlexander Duyck 		/* We are going to skip over the reported pages. */
18536e66c55SAlexander Duyck 		if (PageReported(page))
18636e66c55SAlexander Duyck 			continue;
18736e66c55SAlexander Duyck 
18843b76f29SAlexander Duyck 		/*
18943b76f29SAlexander Duyck 		 * If we fully consumed our budget then update our
19043b76f29SAlexander Duyck 		 * state to indicate that we are requesting additional
19143b76f29SAlexander Duyck 		 * processing and exit this list.
19243b76f29SAlexander Duyck 		 */
19343b76f29SAlexander Duyck 		if (budget < 0) {
19443b76f29SAlexander Duyck 			atomic_set(&prdev->state, PAGE_REPORTING_REQUESTED);
19543b76f29SAlexander Duyck 			next = page;
19643b76f29SAlexander Duyck 			break;
19743b76f29SAlexander Duyck 		}
19843b76f29SAlexander Duyck 
19902cf8719SAlexander Duyck 		/* Attempt to pull page from list and place in scatterlist */
20002cf8719SAlexander Duyck 		if (*offset) {
20102cf8719SAlexander Duyck 			if (!__isolate_free_page(page, order)) {
20202cf8719SAlexander Duyck 				next = page;
20336e66c55SAlexander Duyck 				break;
20402cf8719SAlexander Duyck 			}
20536e66c55SAlexander Duyck 
20636e66c55SAlexander Duyck 			/* Add page to scatter list */
20736e66c55SAlexander Duyck 			--(*offset);
20836e66c55SAlexander Duyck 			sg_set_page(&sgl[*offset], page, page_len, 0);
20936e66c55SAlexander Duyck 
21036e66c55SAlexander Duyck 			continue;
21102cf8719SAlexander Duyck 		}
21202cf8719SAlexander Duyck 
21302cf8719SAlexander Duyck 		/*
21443b76f29SAlexander Duyck 		 * Make the first non-reported page in the free list
21502cf8719SAlexander Duyck 		 * the new head of the free list before we release the
21602cf8719SAlexander Duyck 		 * zone lock.
21702cf8719SAlexander Duyck 		 */
21858f6f034SWei Yang 		if (!list_is_first(&page->lru, list))
21902cf8719SAlexander Duyck 			list_rotate_to_front(&page->lru, list);
22036e66c55SAlexander Duyck 
22136e66c55SAlexander Duyck 		/* release lock before waiting on report processing */
22236e66c55SAlexander Duyck 		spin_unlock_irq(&zone->lock);
22336e66c55SAlexander Duyck 
22436e66c55SAlexander Duyck 		/* begin processing pages in local list */
22536e66c55SAlexander Duyck 		err = prdev->report(prdev, sgl, PAGE_REPORTING_CAPACITY);
22636e66c55SAlexander Duyck 
22736e66c55SAlexander Duyck 		/* reset offset since the full list was reported */
22836e66c55SAlexander Duyck 		*offset = PAGE_REPORTING_CAPACITY;
22936e66c55SAlexander Duyck 
23043b76f29SAlexander Duyck 		/* update budget to reflect call to report function */
23143b76f29SAlexander Duyck 		budget--;
23243b76f29SAlexander Duyck 
23336e66c55SAlexander Duyck 		/* reacquire zone lock and resume processing */
23436e66c55SAlexander Duyck 		spin_lock_irq(&zone->lock);
23536e66c55SAlexander Duyck 
23636e66c55SAlexander Duyck 		/* flush reported pages from the sg list */
23736e66c55SAlexander Duyck 		page_reporting_drain(prdev, sgl, PAGE_REPORTING_CAPACITY, !err);
23836e66c55SAlexander Duyck 
23936e66c55SAlexander Duyck 		/*
24036e66c55SAlexander Duyck 		 * Reset next to first entry, the old next isn't valid
24136e66c55SAlexander Duyck 		 * since we dropped the lock to report the pages
24236e66c55SAlexander Duyck 		 */
24336e66c55SAlexander Duyck 		next = list_first_entry(list, struct page, lru);
24436e66c55SAlexander Duyck 
24536e66c55SAlexander Duyck 		/* exit on error */
24636e66c55SAlexander Duyck 		if (err)
24736e66c55SAlexander Duyck 			break;
24836e66c55SAlexander Duyck 	}
24936e66c55SAlexander Duyck 
25002cf8719SAlexander Duyck 	/* Rotate any leftover pages to the head of the freelist */
2515df6d792Ssh_def@163.com 	if (!list_entry_is_head(next, list, lru) && !list_is_first(&next->lru, list))
25202cf8719SAlexander Duyck 		list_rotate_to_front(&next->lru, list);
25302cf8719SAlexander Duyck 
25436e66c55SAlexander Duyck 	spin_unlock_irq(&zone->lock);
25536e66c55SAlexander Duyck 
25636e66c55SAlexander Duyck 	return err;
25736e66c55SAlexander Duyck }
25836e66c55SAlexander Duyck 
25936e66c55SAlexander Duyck static int
page_reporting_process_zone(struct page_reporting_dev_info * prdev,struct scatterlist * sgl,struct zone * zone)26036e66c55SAlexander Duyck page_reporting_process_zone(struct page_reporting_dev_info *prdev,
26136e66c55SAlexander Duyck 			    struct scatterlist *sgl, struct zone *zone)
26236e66c55SAlexander Duyck {
26336e66c55SAlexander Duyck 	unsigned int order, mt, leftover, offset = PAGE_REPORTING_CAPACITY;
26436e66c55SAlexander Duyck 	unsigned long watermark;
26536e66c55SAlexander Duyck 	int err = 0;
26636e66c55SAlexander Duyck 
26736e66c55SAlexander Duyck 	/* Generate minimum watermark to be able to guarantee progress */
26836e66c55SAlexander Duyck 	watermark = low_wmark_pages(zone) +
269f58780a8SGavin Shan 		    (PAGE_REPORTING_CAPACITY << page_reporting_order);
27036e66c55SAlexander Duyck 
27136e66c55SAlexander Duyck 	/*
27236e66c55SAlexander Duyck 	 * Cancel request if insufficient free memory or if we failed
27336e66c55SAlexander Duyck 	 * to allocate page reporting statistics for the zone.
27436e66c55SAlexander Duyck 	 */
27536e66c55SAlexander Duyck 	if (!zone_watermark_ok(zone, 0, watermark, 0, ALLOC_CMA))
27636e66c55SAlexander Duyck 		return err;
27736e66c55SAlexander Duyck 
27836e66c55SAlexander Duyck 	/* Process each free list starting from lowest order/mt */
279*ded1ffeaSKirill A. Shutemov 	for (order = page_reporting_order; order < NR_PAGE_ORDERS; order++) {
28036e66c55SAlexander Duyck 		for (mt = 0; mt < MIGRATE_TYPES; mt++) {
28136e66c55SAlexander Duyck 			/* We do not pull pages from the isolate free list */
28236e66c55SAlexander Duyck 			if (is_migrate_isolate(mt))
28336e66c55SAlexander Duyck 				continue;
28436e66c55SAlexander Duyck 
28536e66c55SAlexander Duyck 			err = page_reporting_cycle(prdev, zone, order, mt,
28636e66c55SAlexander Duyck 						   sgl, &offset);
28736e66c55SAlexander Duyck 			if (err)
28836e66c55SAlexander Duyck 				return err;
28936e66c55SAlexander Duyck 		}
29036e66c55SAlexander Duyck 	}
29136e66c55SAlexander Duyck 
29236e66c55SAlexander Duyck 	/* report the leftover pages before going idle */
29336e66c55SAlexander Duyck 	leftover = PAGE_REPORTING_CAPACITY - offset;
29436e66c55SAlexander Duyck 	if (leftover) {
29536e66c55SAlexander Duyck 		sgl = &sgl[offset];
29636e66c55SAlexander Duyck 		err = prdev->report(prdev, sgl, leftover);
29736e66c55SAlexander Duyck 
29836e66c55SAlexander Duyck 		/* flush any remaining pages out from the last report */
29936e66c55SAlexander Duyck 		spin_lock_irq(&zone->lock);
30036e66c55SAlexander Duyck 		page_reporting_drain(prdev, sgl, leftover, !err);
30136e66c55SAlexander Duyck 		spin_unlock_irq(&zone->lock);
30236e66c55SAlexander Duyck 	}
30336e66c55SAlexander Duyck 
30436e66c55SAlexander Duyck 	return err;
30536e66c55SAlexander Duyck }
30636e66c55SAlexander Duyck 
page_reporting_process(struct work_struct * work)30736e66c55SAlexander Duyck static void page_reporting_process(struct work_struct *work)
30836e66c55SAlexander Duyck {
30936e66c55SAlexander Duyck 	struct delayed_work *d_work = to_delayed_work(work);
31036e66c55SAlexander Duyck 	struct page_reporting_dev_info *prdev =
31136e66c55SAlexander Duyck 		container_of(d_work, struct page_reporting_dev_info, work);
31236e66c55SAlexander Duyck 	int err = 0, state = PAGE_REPORTING_ACTIVE;
31336e66c55SAlexander Duyck 	struct scatterlist *sgl;
31436e66c55SAlexander Duyck 	struct zone *zone;
31536e66c55SAlexander Duyck 
31636e66c55SAlexander Duyck 	/*
31736e66c55SAlexander Duyck 	 * Change the state to "Active" so that we can track if there is
31836e66c55SAlexander Duyck 	 * anyone requests page reporting after we complete our pass. If
31936e66c55SAlexander Duyck 	 * the state is not altered by the end of the pass we will switch
32036e66c55SAlexander Duyck 	 * to idle and quit scheduling reporting runs.
32136e66c55SAlexander Duyck 	 */
32236e66c55SAlexander Duyck 	atomic_set(&prdev->state, state);
32336e66c55SAlexander Duyck 
32436e66c55SAlexander Duyck 	/* allocate scatterlist to store pages being reported on */
32536e66c55SAlexander Duyck 	sgl = kmalloc_array(PAGE_REPORTING_CAPACITY, sizeof(*sgl), GFP_KERNEL);
32636e66c55SAlexander Duyck 	if (!sgl)
32736e66c55SAlexander Duyck 		goto err_out;
32836e66c55SAlexander Duyck 
32936e66c55SAlexander Duyck 	sg_init_table(sgl, PAGE_REPORTING_CAPACITY);
33036e66c55SAlexander Duyck 
33136e66c55SAlexander Duyck 	for_each_zone(zone) {
33236e66c55SAlexander Duyck 		err = page_reporting_process_zone(prdev, sgl, zone);
33336e66c55SAlexander Duyck 		if (err)
33436e66c55SAlexander Duyck 			break;
33536e66c55SAlexander Duyck 	}
33636e66c55SAlexander Duyck 
33736e66c55SAlexander Duyck 	kfree(sgl);
33836e66c55SAlexander Duyck err_out:
33936e66c55SAlexander Duyck 	/*
34036e66c55SAlexander Duyck 	 * If the state has reverted back to requested then there may be
34136e66c55SAlexander Duyck 	 * additional pages to be processed. We will defer for 2s to allow
34236e66c55SAlexander Duyck 	 * more pages to accumulate.
34336e66c55SAlexander Duyck 	 */
34436e66c55SAlexander Duyck 	state = atomic_cmpxchg(&prdev->state, state, PAGE_REPORTING_IDLE);
34536e66c55SAlexander Duyck 	if (state == PAGE_REPORTING_REQUESTED)
34636e66c55SAlexander Duyck 		schedule_delayed_work(&prdev->work, PAGE_REPORTING_DELAY);
34736e66c55SAlexander Duyck }
34836e66c55SAlexander Duyck 
34936e66c55SAlexander Duyck static DEFINE_MUTEX(page_reporting_mutex);
35036e66c55SAlexander Duyck DEFINE_STATIC_KEY_FALSE(page_reporting_enabled);
35136e66c55SAlexander Duyck 
page_reporting_register(struct page_reporting_dev_info * prdev)35236e66c55SAlexander Duyck int page_reporting_register(struct page_reporting_dev_info *prdev)
35336e66c55SAlexander Duyck {
35436e66c55SAlexander Duyck 	int err = 0;
35536e66c55SAlexander Duyck 
35636e66c55SAlexander Duyck 	mutex_lock(&page_reporting_mutex);
35736e66c55SAlexander Duyck 
35836e66c55SAlexander Duyck 	/* nothing to do if already in use */
35901b5022fSSeongJae Park 	if (rcu_dereference_protected(pr_dev_info,
36001b5022fSSeongJae Park 				lockdep_is_held(&page_reporting_mutex))) {
36136e66c55SAlexander Duyck 		err = -EBUSY;
36236e66c55SAlexander Duyck 		goto err_out;
36336e66c55SAlexander Duyck 	}
36436e66c55SAlexander Duyck 
3659f849c6fSGavin Shan 	/*
366aebb02ceSShradha Gupta 	 * If the page_reporting_order value is not set, we check if
367aebb02ceSShradha Gupta 	 * an order is provided from the driver that is performing the
368aebb02ceSShradha Gupta 	 * registration. If that is not provided either, we default to
369aebb02ceSShradha Gupta 	 * pageblock_order.
3709f849c6fSGavin Shan 	 */
371aebb02ceSShradha Gupta 
372aebb02ceSShradha Gupta 	if (page_reporting_order == -1) {
37323baf831SKirill A. Shutemov 		if (prdev->order > 0 && prdev->order <= MAX_ORDER)
374aebb02ceSShradha Gupta 			page_reporting_order = prdev->order;
375aebb02ceSShradha Gupta 		else
376aebb02ceSShradha Gupta 			page_reporting_order = pageblock_order;
377aebb02ceSShradha Gupta 	}
3789f849c6fSGavin Shan 
37936e66c55SAlexander Duyck 	/* initialize state and work structures */
38036e66c55SAlexander Duyck 	atomic_set(&prdev->state, PAGE_REPORTING_IDLE);
38136e66c55SAlexander Duyck 	INIT_DELAYED_WORK(&prdev->work, &page_reporting_process);
38236e66c55SAlexander Duyck 
38336e66c55SAlexander Duyck 	/* Begin initial flush of zones */
38436e66c55SAlexander Duyck 	__page_reporting_request(prdev);
38536e66c55SAlexander Duyck 
38636e66c55SAlexander Duyck 	/* Assign device to allow notifications */
38736e66c55SAlexander Duyck 	rcu_assign_pointer(pr_dev_info, prdev);
38836e66c55SAlexander Duyck 
38936e66c55SAlexander Duyck 	/* enable page reporting notification */
39036e66c55SAlexander Duyck 	if (!static_key_enabled(&page_reporting_enabled)) {
39136e66c55SAlexander Duyck 		static_branch_enable(&page_reporting_enabled);
39236e66c55SAlexander Duyck 		pr_info("Free page reporting enabled\n");
39336e66c55SAlexander Duyck 	}
39436e66c55SAlexander Duyck err_out:
39536e66c55SAlexander Duyck 	mutex_unlock(&page_reporting_mutex);
39636e66c55SAlexander Duyck 
39736e66c55SAlexander Duyck 	return err;
39836e66c55SAlexander Duyck }
39936e66c55SAlexander Duyck EXPORT_SYMBOL_GPL(page_reporting_register);
40036e66c55SAlexander Duyck 
page_reporting_unregister(struct page_reporting_dev_info * prdev)40136e66c55SAlexander Duyck void page_reporting_unregister(struct page_reporting_dev_info *prdev)
40236e66c55SAlexander Duyck {
40336e66c55SAlexander Duyck 	mutex_lock(&page_reporting_mutex);
40436e66c55SAlexander Duyck 
40501b5022fSSeongJae Park 	if (prdev == rcu_dereference_protected(pr_dev_info,
40601b5022fSSeongJae Park 				lockdep_is_held(&page_reporting_mutex))) {
40736e66c55SAlexander Duyck 		/* Disable page reporting notification */
40836e66c55SAlexander Duyck 		RCU_INIT_POINTER(pr_dev_info, NULL);
40936e66c55SAlexander Duyck 		synchronize_rcu();
41036e66c55SAlexander Duyck 
41136e66c55SAlexander Duyck 		/* Flush any existing work, and lock it out */
41236e66c55SAlexander Duyck 		cancel_delayed_work_sync(&prdev->work);
41336e66c55SAlexander Duyck 	}
41436e66c55SAlexander Duyck 
41536e66c55SAlexander Duyck 	mutex_unlock(&page_reporting_mutex);
41636e66c55SAlexander Duyck }
41736e66c55SAlexander Duyck EXPORT_SYMBOL_GPL(page_reporting_unregister);
418