xref: /openbmc/linux/mm/page_counter.c (revision 160b8e75)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Lockless hierarchical page accounting & limiting
4  *
5  * Copyright (C) 2014 Red Hat, Inc., Johannes Weiner
6  */
7 
8 #include <linux/page_counter.h>
9 #include <linux/atomic.h>
10 #include <linux/kernel.h>
11 #include <linux/string.h>
12 #include <linux/sched.h>
13 #include <linux/bug.h>
14 #include <asm/page.h>
15 
16 /**
17  * page_counter_cancel - take pages out of the local counter
18  * @counter: counter
19  * @nr_pages: number of pages to cancel
20  */
21 void page_counter_cancel(struct page_counter *counter, unsigned long nr_pages)
22 {
23 	long new;
24 
25 	new = atomic_long_sub_return(nr_pages, &counter->count);
26 	/* More uncharges than charges? */
27 	WARN_ON_ONCE(new < 0);
28 }
29 
30 /**
31  * page_counter_charge - hierarchically charge pages
32  * @counter: counter
33  * @nr_pages: number of pages to charge
34  *
35  * NOTE: This does not consider any configured counter limits.
36  */
37 void page_counter_charge(struct page_counter *counter, unsigned long nr_pages)
38 {
39 	struct page_counter *c;
40 
41 	for (c = counter; c; c = c->parent) {
42 		long new;
43 
44 		new = atomic_long_add_return(nr_pages, &c->count);
45 		/*
46 		 * This is indeed racy, but we can live with some
47 		 * inaccuracy in the watermark.
48 		 */
49 		if (new > c->watermark)
50 			c->watermark = new;
51 	}
52 }
53 
54 /**
55  * page_counter_try_charge - try to hierarchically charge pages
56  * @counter: counter
57  * @nr_pages: number of pages to charge
58  * @fail: points first counter to hit its limit, if any
59  *
60  * Returns %true on success, or %false and @fail if the counter or one
61  * of its ancestors has hit its configured limit.
62  */
63 bool page_counter_try_charge(struct page_counter *counter,
64 			     unsigned long nr_pages,
65 			     struct page_counter **fail)
66 {
67 	struct page_counter *c;
68 
69 	for (c = counter; c; c = c->parent) {
70 		long new;
71 		/*
72 		 * Charge speculatively to avoid an expensive CAS.  If
73 		 * a bigger charge fails, it might falsely lock out a
74 		 * racing smaller charge and send it into reclaim
75 		 * early, but the error is limited to the difference
76 		 * between the two sizes, which is less than 2M/4M in
77 		 * case of a THP locking out a regular page charge.
78 		 *
79 		 * The atomic_long_add_return() implies a full memory
80 		 * barrier between incrementing the count and reading
81 		 * the limit.  When racing with page_counter_limit(),
82 		 * we either see the new limit or the setter sees the
83 		 * counter has changed and retries.
84 		 */
85 		new = atomic_long_add_return(nr_pages, &c->count);
86 		if (new > c->limit) {
87 			atomic_long_sub(nr_pages, &c->count);
88 			/*
89 			 * This is racy, but we can live with some
90 			 * inaccuracy in the failcnt.
91 			 */
92 			c->failcnt++;
93 			*fail = c;
94 			goto failed;
95 		}
96 		/*
97 		 * Just like with failcnt, we can live with some
98 		 * inaccuracy in the watermark.
99 		 */
100 		if (new > c->watermark)
101 			c->watermark = new;
102 	}
103 	return true;
104 
105 failed:
106 	for (c = counter; c != *fail; c = c->parent)
107 		page_counter_cancel(c, nr_pages);
108 
109 	return false;
110 }
111 
112 /**
113  * page_counter_uncharge - hierarchically uncharge pages
114  * @counter: counter
115  * @nr_pages: number of pages to uncharge
116  */
117 void page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages)
118 {
119 	struct page_counter *c;
120 
121 	for (c = counter; c; c = c->parent)
122 		page_counter_cancel(c, nr_pages);
123 }
124 
125 /**
126  * page_counter_limit - limit the number of pages allowed
127  * @counter: counter
128  * @limit: limit to set
129  *
130  * Returns 0 on success, -EBUSY if the current number of pages on the
131  * counter already exceeds the specified limit.
132  *
133  * The caller must serialize invocations on the same counter.
134  */
135 int page_counter_limit(struct page_counter *counter, unsigned long limit)
136 {
137 	for (;;) {
138 		unsigned long old;
139 		long count;
140 
141 		/*
142 		 * Update the limit while making sure that it's not
143 		 * below the concurrently-changing counter value.
144 		 *
145 		 * The xchg implies two full memory barriers before
146 		 * and after, so the read-swap-read is ordered and
147 		 * ensures coherency with page_counter_try_charge():
148 		 * that function modifies the count before checking
149 		 * the limit, so if it sees the old limit, we see the
150 		 * modified counter and retry.
151 		 */
152 		count = atomic_long_read(&counter->count);
153 
154 		if (count > limit)
155 			return -EBUSY;
156 
157 		old = xchg(&counter->limit, limit);
158 
159 		if (atomic_long_read(&counter->count) <= count)
160 			return 0;
161 
162 		counter->limit = old;
163 		cond_resched();
164 	}
165 }
166 
167 /**
168  * page_counter_memparse - memparse() for page counter limits
169  * @buf: string to parse
170  * @max: string meaning maximum possible value
171  * @nr_pages: returns the result in number of pages
172  *
173  * Returns -EINVAL, or 0 and @nr_pages on success.  @nr_pages will be
174  * limited to %PAGE_COUNTER_MAX.
175  */
176 int page_counter_memparse(const char *buf, const char *max,
177 			  unsigned long *nr_pages)
178 {
179 	char *end;
180 	u64 bytes;
181 
182 	if (!strcmp(buf, max)) {
183 		*nr_pages = PAGE_COUNTER_MAX;
184 		return 0;
185 	}
186 
187 	bytes = memparse(buf, &end);
188 	if (*end != '\0')
189 		return -EINVAL;
190 
191 	*nr_pages = min(bytes / PAGE_SIZE, (u64)PAGE_COUNTER_MAX);
192 
193 	return 0;
194 }
195