1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Lockless hierarchical page accounting & limiting 4 * 5 * Copyright (C) 2014 Red Hat, Inc., Johannes Weiner 6 */ 7 8 #include <linux/page_counter.h> 9 #include <linux/atomic.h> 10 #include <linux/kernel.h> 11 #include <linux/string.h> 12 #include <linux/sched.h> 13 #include <linux/bug.h> 14 #include <asm/page.h> 15 16 static void propagate_protected_usage(struct page_counter *c, 17 unsigned long usage) 18 { 19 unsigned long protected, old_protected; 20 unsigned long low, min; 21 long delta; 22 23 if (!c->parent) 24 return; 25 26 min = READ_ONCE(c->min); 27 if (min || atomic_long_read(&c->min_usage)) { 28 protected = min(usage, min); 29 old_protected = atomic_long_xchg(&c->min_usage, protected); 30 delta = protected - old_protected; 31 if (delta) 32 atomic_long_add(delta, &c->parent->children_min_usage); 33 } 34 35 low = READ_ONCE(c->low); 36 if (low || atomic_long_read(&c->low_usage)) { 37 protected = min(usage, low); 38 old_protected = atomic_long_xchg(&c->low_usage, protected); 39 delta = protected - old_protected; 40 if (delta) 41 atomic_long_add(delta, &c->parent->children_low_usage); 42 } 43 } 44 45 /** 46 * page_counter_cancel - take pages out of the local counter 47 * @counter: counter 48 * @nr_pages: number of pages to cancel 49 */ 50 void page_counter_cancel(struct page_counter *counter, unsigned long nr_pages) 51 { 52 long new; 53 54 new = atomic_long_sub_return(nr_pages, &counter->usage); 55 /* More uncharges than charges? */ 56 if (WARN_ONCE(new < 0, "page_counter underflow: %ld nr_pages=%lu\n", 57 new, nr_pages)) { 58 new = 0; 59 atomic_long_set(&counter->usage, new); 60 } 61 propagate_protected_usage(counter, new); 62 } 63 64 /** 65 * page_counter_charge - hierarchically charge pages 66 * @counter: counter 67 * @nr_pages: number of pages to charge 68 * 69 * NOTE: This does not consider any configured counter limits. 70 */ 71 void page_counter_charge(struct page_counter *counter, unsigned long nr_pages) 72 { 73 struct page_counter *c; 74 75 for (c = counter; c; c = c->parent) { 76 long new; 77 78 new = atomic_long_add_return(nr_pages, &c->usage); 79 propagate_protected_usage(c, new); 80 /* 81 * This is indeed racy, but we can live with some 82 * inaccuracy in the watermark. 83 */ 84 if (new > READ_ONCE(c->watermark)) 85 WRITE_ONCE(c->watermark, new); 86 } 87 } 88 89 /** 90 * page_counter_try_charge - try to hierarchically charge pages 91 * @counter: counter 92 * @nr_pages: number of pages to charge 93 * @fail: points first counter to hit its limit, if any 94 * 95 * Returns %true on success, or %false and @fail if the counter or one 96 * of its ancestors has hit its configured limit. 97 */ 98 bool page_counter_try_charge(struct page_counter *counter, 99 unsigned long nr_pages, 100 struct page_counter **fail) 101 { 102 struct page_counter *c; 103 104 for (c = counter; c; c = c->parent) { 105 long new; 106 /* 107 * Charge speculatively to avoid an expensive CAS. If 108 * a bigger charge fails, it might falsely lock out a 109 * racing smaller charge and send it into reclaim 110 * early, but the error is limited to the difference 111 * between the two sizes, which is less than 2M/4M in 112 * case of a THP locking out a regular page charge. 113 * 114 * The atomic_long_add_return() implies a full memory 115 * barrier between incrementing the count and reading 116 * the limit. When racing with page_counter_set_max(), 117 * we either see the new limit or the setter sees the 118 * counter has changed and retries. 119 */ 120 new = atomic_long_add_return(nr_pages, &c->usage); 121 if (new > c->max) { 122 atomic_long_sub(nr_pages, &c->usage); 123 propagate_protected_usage(c, new); 124 /* 125 * This is racy, but we can live with some 126 * inaccuracy in the failcnt which is only used 127 * to report stats. 128 */ 129 data_race(c->failcnt++); 130 *fail = c; 131 goto failed; 132 } 133 propagate_protected_usage(c, new); 134 /* 135 * Just like with failcnt, we can live with some 136 * inaccuracy in the watermark. 137 */ 138 if (new > READ_ONCE(c->watermark)) 139 WRITE_ONCE(c->watermark, new); 140 } 141 return true; 142 143 failed: 144 for (c = counter; c != *fail; c = c->parent) 145 page_counter_cancel(c, nr_pages); 146 147 return false; 148 } 149 150 /** 151 * page_counter_uncharge - hierarchically uncharge pages 152 * @counter: counter 153 * @nr_pages: number of pages to uncharge 154 */ 155 void page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages) 156 { 157 struct page_counter *c; 158 159 for (c = counter; c; c = c->parent) 160 page_counter_cancel(c, nr_pages); 161 } 162 163 /** 164 * page_counter_set_max - set the maximum number of pages allowed 165 * @counter: counter 166 * @nr_pages: limit to set 167 * 168 * Returns 0 on success, -EBUSY if the current number of pages on the 169 * counter already exceeds the specified limit. 170 * 171 * The caller must serialize invocations on the same counter. 172 */ 173 int page_counter_set_max(struct page_counter *counter, unsigned long nr_pages) 174 { 175 for (;;) { 176 unsigned long old; 177 long usage; 178 179 /* 180 * Update the limit while making sure that it's not 181 * below the concurrently-changing counter value. 182 * 183 * The xchg implies two full memory barriers before 184 * and after, so the read-swap-read is ordered and 185 * ensures coherency with page_counter_try_charge(): 186 * that function modifies the count before checking 187 * the limit, so if it sees the old limit, we see the 188 * modified counter and retry. 189 */ 190 usage = page_counter_read(counter); 191 192 if (usage > nr_pages) 193 return -EBUSY; 194 195 old = xchg(&counter->max, nr_pages); 196 197 if (page_counter_read(counter) <= usage) 198 return 0; 199 200 counter->max = old; 201 cond_resched(); 202 } 203 } 204 205 /** 206 * page_counter_set_min - set the amount of protected memory 207 * @counter: counter 208 * @nr_pages: value to set 209 * 210 * The caller must serialize invocations on the same counter. 211 */ 212 void page_counter_set_min(struct page_counter *counter, unsigned long nr_pages) 213 { 214 struct page_counter *c; 215 216 WRITE_ONCE(counter->min, nr_pages); 217 218 for (c = counter; c; c = c->parent) 219 propagate_protected_usage(c, atomic_long_read(&c->usage)); 220 } 221 222 /** 223 * page_counter_set_low - set the amount of protected memory 224 * @counter: counter 225 * @nr_pages: value to set 226 * 227 * The caller must serialize invocations on the same counter. 228 */ 229 void page_counter_set_low(struct page_counter *counter, unsigned long nr_pages) 230 { 231 struct page_counter *c; 232 233 WRITE_ONCE(counter->low, nr_pages); 234 235 for (c = counter; c; c = c->parent) 236 propagate_protected_usage(c, atomic_long_read(&c->usage)); 237 } 238 239 /** 240 * page_counter_memparse - memparse() for page counter limits 241 * @buf: string to parse 242 * @max: string meaning maximum possible value 243 * @nr_pages: returns the result in number of pages 244 * 245 * Returns -EINVAL, or 0 and @nr_pages on success. @nr_pages will be 246 * limited to %PAGE_COUNTER_MAX. 247 */ 248 int page_counter_memparse(const char *buf, const char *max, 249 unsigned long *nr_pages) 250 { 251 char *end; 252 u64 bytes; 253 254 if (!strcmp(buf, max)) { 255 *nr_pages = PAGE_COUNTER_MAX; 256 return 0; 257 } 258 259 bytes = memparse(buf, &end); 260 if (*end != '\0') 261 return -EINVAL; 262 263 *nr_pages = min(bytes / PAGE_SIZE, (u64)PAGE_COUNTER_MAX); 264 265 return 0; 266 } 267