1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Lockless hierarchical page accounting & limiting 4 * 5 * Copyright (C) 2014 Red Hat, Inc., Johannes Weiner 6 */ 7 8 #include <linux/page_counter.h> 9 #include <linux/atomic.h> 10 #include <linux/kernel.h> 11 #include <linux/string.h> 12 #include <linux/sched.h> 13 #include <linux/bug.h> 14 #include <asm/page.h> 15 16 static void propagate_protected_usage(struct page_counter *c, 17 unsigned long usage) 18 { 19 unsigned long protected, old_protected; 20 long delta; 21 22 if (!c->parent) 23 return; 24 25 if (c->min || atomic_long_read(&c->min_usage)) { 26 if (usage <= c->min) 27 protected = usage; 28 else 29 protected = 0; 30 31 old_protected = atomic_long_xchg(&c->min_usage, protected); 32 delta = protected - old_protected; 33 if (delta) 34 atomic_long_add(delta, &c->parent->children_min_usage); 35 } 36 37 if (c->low || atomic_long_read(&c->low_usage)) { 38 if (usage <= c->low) 39 protected = usage; 40 else 41 protected = 0; 42 43 old_protected = atomic_long_xchg(&c->low_usage, protected); 44 delta = protected - old_protected; 45 if (delta) 46 atomic_long_add(delta, &c->parent->children_low_usage); 47 } 48 } 49 50 /** 51 * page_counter_cancel - take pages out of the local counter 52 * @counter: counter 53 * @nr_pages: number of pages to cancel 54 */ 55 void page_counter_cancel(struct page_counter *counter, unsigned long nr_pages) 56 { 57 long new; 58 59 new = atomic_long_sub_return(nr_pages, &counter->usage); 60 propagate_protected_usage(counter, new); 61 /* More uncharges than charges? */ 62 WARN_ON_ONCE(new < 0); 63 } 64 65 /** 66 * page_counter_charge - hierarchically charge pages 67 * @counter: counter 68 * @nr_pages: number of pages to charge 69 * 70 * NOTE: This does not consider any configured counter limits. 71 */ 72 void page_counter_charge(struct page_counter *counter, unsigned long nr_pages) 73 { 74 struct page_counter *c; 75 76 for (c = counter; c; c = c->parent) { 77 long new; 78 79 new = atomic_long_add_return(nr_pages, &c->usage); 80 propagate_protected_usage(counter, new); 81 /* 82 * This is indeed racy, but we can live with some 83 * inaccuracy in the watermark. 84 */ 85 if (new > c->watermark) 86 c->watermark = new; 87 } 88 } 89 90 /** 91 * page_counter_try_charge - try to hierarchically charge pages 92 * @counter: counter 93 * @nr_pages: number of pages to charge 94 * @fail: points first counter to hit its limit, if any 95 * 96 * Returns %true on success, or %false and @fail if the counter or one 97 * of its ancestors has hit its configured limit. 98 */ 99 bool page_counter_try_charge(struct page_counter *counter, 100 unsigned long nr_pages, 101 struct page_counter **fail) 102 { 103 struct page_counter *c; 104 105 for (c = counter; c; c = c->parent) { 106 long new; 107 /* 108 * Charge speculatively to avoid an expensive CAS. If 109 * a bigger charge fails, it might falsely lock out a 110 * racing smaller charge and send it into reclaim 111 * early, but the error is limited to the difference 112 * between the two sizes, which is less than 2M/4M in 113 * case of a THP locking out a regular page charge. 114 * 115 * The atomic_long_add_return() implies a full memory 116 * barrier between incrementing the count and reading 117 * the limit. When racing with page_counter_limit(), 118 * we either see the new limit or the setter sees the 119 * counter has changed and retries. 120 */ 121 new = atomic_long_add_return(nr_pages, &c->usage); 122 if (new > c->max) { 123 atomic_long_sub(nr_pages, &c->usage); 124 propagate_protected_usage(counter, new); 125 /* 126 * This is racy, but we can live with some 127 * inaccuracy in the failcnt. 128 */ 129 c->failcnt++; 130 *fail = c; 131 goto failed; 132 } 133 propagate_protected_usage(counter, new); 134 /* 135 * Just like with failcnt, we can live with some 136 * inaccuracy in the watermark. 137 */ 138 if (new > c->watermark) 139 c->watermark = new; 140 } 141 return true; 142 143 failed: 144 for (c = counter; c != *fail; c = c->parent) 145 page_counter_cancel(c, nr_pages); 146 147 return false; 148 } 149 150 /** 151 * page_counter_uncharge - hierarchically uncharge pages 152 * @counter: counter 153 * @nr_pages: number of pages to uncharge 154 */ 155 void page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages) 156 { 157 struct page_counter *c; 158 159 for (c = counter; c; c = c->parent) 160 page_counter_cancel(c, nr_pages); 161 } 162 163 /** 164 * page_counter_set_max - set the maximum number of pages allowed 165 * @counter: counter 166 * @nr_pages: limit to set 167 * 168 * Returns 0 on success, -EBUSY if the current number of pages on the 169 * counter already exceeds the specified limit. 170 * 171 * The caller must serialize invocations on the same counter. 172 */ 173 int page_counter_set_max(struct page_counter *counter, unsigned long nr_pages) 174 { 175 for (;;) { 176 unsigned long old; 177 long usage; 178 179 /* 180 * Update the limit while making sure that it's not 181 * below the concurrently-changing counter value. 182 * 183 * The xchg implies two full memory barriers before 184 * and after, so the read-swap-read is ordered and 185 * ensures coherency with page_counter_try_charge(): 186 * that function modifies the count before checking 187 * the limit, so if it sees the old limit, we see the 188 * modified counter and retry. 189 */ 190 usage = atomic_long_read(&counter->usage); 191 192 if (usage > nr_pages) 193 return -EBUSY; 194 195 old = xchg(&counter->max, nr_pages); 196 197 if (atomic_long_read(&counter->usage) <= usage) 198 return 0; 199 200 counter->max = old; 201 cond_resched(); 202 } 203 } 204 205 /** 206 * page_counter_set_min - set the amount of protected memory 207 * @counter: counter 208 * @nr_pages: value to set 209 * 210 * The caller must serialize invocations on the same counter. 211 */ 212 void page_counter_set_min(struct page_counter *counter, unsigned long nr_pages) 213 { 214 struct page_counter *c; 215 216 counter->min = nr_pages; 217 218 for (c = counter; c; c = c->parent) 219 propagate_protected_usage(c, atomic_long_read(&c->usage)); 220 } 221 222 /** 223 * page_counter_set_low - set the amount of protected memory 224 * @counter: counter 225 * @nr_pages: value to set 226 * 227 * The caller must serialize invocations on the same counter. 228 */ 229 void page_counter_set_low(struct page_counter *counter, unsigned long nr_pages) 230 { 231 struct page_counter *c; 232 233 counter->low = nr_pages; 234 235 for (c = counter; c; c = c->parent) 236 propagate_protected_usage(c, atomic_long_read(&c->usage)); 237 } 238 239 /** 240 * page_counter_memparse - memparse() for page counter limits 241 * @buf: string to parse 242 * @max: string meaning maximum possible value 243 * @nr_pages: returns the result in number of pages 244 * 245 * Returns -EINVAL, or 0 and @nr_pages on success. @nr_pages will be 246 * limited to %PAGE_COUNTER_MAX. 247 */ 248 int page_counter_memparse(const char *buf, const char *max, 249 unsigned long *nr_pages) 250 { 251 char *end; 252 u64 bytes; 253 254 if (!strcmp(buf, max)) { 255 *nr_pages = PAGE_COUNTER_MAX; 256 return 0; 257 } 258 259 bytes = memparse(buf, &end); 260 if (*end != '\0') 261 return -EINVAL; 262 263 *nr_pages = min(bytes / PAGE_SIZE, (u64)PAGE_COUNTER_MAX); 264 265 return 0; 266 } 267