1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Lockless hierarchical page accounting & limiting 4 * 5 * Copyright (C) 2014 Red Hat, Inc., Johannes Weiner 6 */ 7 8 #include <linux/page_counter.h> 9 #include <linux/atomic.h> 10 #include <linux/kernel.h> 11 #include <linux/string.h> 12 #include <linux/sched.h> 13 #include <linux/bug.h> 14 #include <asm/page.h> 15 16 static void propagate_protected_usage(struct page_counter *c, 17 unsigned long usage) 18 { 19 unsigned long protected, old_protected; 20 unsigned long low, min; 21 long delta; 22 23 if (!c->parent) 24 return; 25 26 min = READ_ONCE(c->min); 27 if (min || atomic_long_read(&c->min_usage)) { 28 protected = min(usage, min); 29 old_protected = atomic_long_xchg(&c->min_usage, protected); 30 delta = protected - old_protected; 31 if (delta) 32 atomic_long_add(delta, &c->parent->children_min_usage); 33 } 34 35 low = READ_ONCE(c->low); 36 if (low || atomic_long_read(&c->low_usage)) { 37 protected = min(usage, low); 38 old_protected = atomic_long_xchg(&c->low_usage, protected); 39 delta = protected - old_protected; 40 if (delta) 41 atomic_long_add(delta, &c->parent->children_low_usage); 42 } 43 } 44 45 /** 46 * page_counter_cancel - take pages out of the local counter 47 * @counter: counter 48 * @nr_pages: number of pages to cancel 49 */ 50 void page_counter_cancel(struct page_counter *counter, unsigned long nr_pages) 51 { 52 long new; 53 54 new = atomic_long_sub_return(nr_pages, &counter->usage); 55 propagate_protected_usage(counter, new); 56 /* More uncharges than charges? */ 57 WARN_ON_ONCE(new < 0); 58 } 59 60 /** 61 * page_counter_charge - hierarchically charge pages 62 * @counter: counter 63 * @nr_pages: number of pages to charge 64 * 65 * NOTE: This does not consider any configured counter limits. 66 */ 67 void page_counter_charge(struct page_counter *counter, unsigned long nr_pages) 68 { 69 struct page_counter *c; 70 71 for (c = counter; c; c = c->parent) { 72 long new; 73 74 new = atomic_long_add_return(nr_pages, &c->usage); 75 propagate_protected_usage(c, new); 76 /* 77 * This is indeed racy, but we can live with some 78 * inaccuracy in the watermark. 79 */ 80 if (new > READ_ONCE(c->watermark)) 81 WRITE_ONCE(c->watermark, new); 82 } 83 } 84 85 /** 86 * page_counter_try_charge - try to hierarchically charge pages 87 * @counter: counter 88 * @nr_pages: number of pages to charge 89 * @fail: points first counter to hit its limit, if any 90 * 91 * Returns %true on success, or %false and @fail if the counter or one 92 * of its ancestors has hit its configured limit. 93 */ 94 bool page_counter_try_charge(struct page_counter *counter, 95 unsigned long nr_pages, 96 struct page_counter **fail) 97 { 98 struct page_counter *c; 99 100 for (c = counter; c; c = c->parent) { 101 long new; 102 /* 103 * Charge speculatively to avoid an expensive CAS. If 104 * a bigger charge fails, it might falsely lock out a 105 * racing smaller charge and send it into reclaim 106 * early, but the error is limited to the difference 107 * between the two sizes, which is less than 2M/4M in 108 * case of a THP locking out a regular page charge. 109 * 110 * The atomic_long_add_return() implies a full memory 111 * barrier between incrementing the count and reading 112 * the limit. When racing with page_counter_limit(), 113 * we either see the new limit or the setter sees the 114 * counter has changed and retries. 115 */ 116 new = atomic_long_add_return(nr_pages, &c->usage); 117 if (new > c->max) { 118 atomic_long_sub(nr_pages, &c->usage); 119 propagate_protected_usage(c, new); 120 /* 121 * This is racy, but we can live with some 122 * inaccuracy in the failcnt which is only used 123 * to report stats. 124 */ 125 data_race(c->failcnt++); 126 *fail = c; 127 goto failed; 128 } 129 propagate_protected_usage(c, new); 130 /* 131 * Just like with failcnt, we can live with some 132 * inaccuracy in the watermark. 133 */ 134 if (new > READ_ONCE(c->watermark)) 135 WRITE_ONCE(c->watermark, new); 136 } 137 return true; 138 139 failed: 140 for (c = counter; c != *fail; c = c->parent) 141 page_counter_cancel(c, nr_pages); 142 143 return false; 144 } 145 146 /** 147 * page_counter_uncharge - hierarchically uncharge pages 148 * @counter: counter 149 * @nr_pages: number of pages to uncharge 150 */ 151 void page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages) 152 { 153 struct page_counter *c; 154 155 for (c = counter; c; c = c->parent) 156 page_counter_cancel(c, nr_pages); 157 } 158 159 /** 160 * page_counter_set_max - set the maximum number of pages allowed 161 * @counter: counter 162 * @nr_pages: limit to set 163 * 164 * Returns 0 on success, -EBUSY if the current number of pages on the 165 * counter already exceeds the specified limit. 166 * 167 * The caller must serialize invocations on the same counter. 168 */ 169 int page_counter_set_max(struct page_counter *counter, unsigned long nr_pages) 170 { 171 for (;;) { 172 unsigned long old; 173 long usage; 174 175 /* 176 * Update the limit while making sure that it's not 177 * below the concurrently-changing counter value. 178 * 179 * The xchg implies two full memory barriers before 180 * and after, so the read-swap-read is ordered and 181 * ensures coherency with page_counter_try_charge(): 182 * that function modifies the count before checking 183 * the limit, so if it sees the old limit, we see the 184 * modified counter and retry. 185 */ 186 usage = atomic_long_read(&counter->usage); 187 188 if (usage > nr_pages) 189 return -EBUSY; 190 191 old = xchg(&counter->max, nr_pages); 192 193 if (atomic_long_read(&counter->usage) <= usage) 194 return 0; 195 196 counter->max = old; 197 cond_resched(); 198 } 199 } 200 201 /** 202 * page_counter_set_min - set the amount of protected memory 203 * @counter: counter 204 * @nr_pages: value to set 205 * 206 * The caller must serialize invocations on the same counter. 207 */ 208 void page_counter_set_min(struct page_counter *counter, unsigned long nr_pages) 209 { 210 struct page_counter *c; 211 212 WRITE_ONCE(counter->min, nr_pages); 213 214 for (c = counter; c; c = c->parent) 215 propagate_protected_usage(c, atomic_long_read(&c->usage)); 216 } 217 218 /** 219 * page_counter_set_low - set the amount of protected memory 220 * @counter: counter 221 * @nr_pages: value to set 222 * 223 * The caller must serialize invocations on the same counter. 224 */ 225 void page_counter_set_low(struct page_counter *counter, unsigned long nr_pages) 226 { 227 struct page_counter *c; 228 229 WRITE_ONCE(counter->low, nr_pages); 230 231 for (c = counter; c; c = c->parent) 232 propagate_protected_usage(c, atomic_long_read(&c->usage)); 233 } 234 235 /** 236 * page_counter_memparse - memparse() for page counter limits 237 * @buf: string to parse 238 * @max: string meaning maximum possible value 239 * @nr_pages: returns the result in number of pages 240 * 241 * Returns -EINVAL, or 0 and @nr_pages on success. @nr_pages will be 242 * limited to %PAGE_COUNTER_MAX. 243 */ 244 int page_counter_memparse(const char *buf, const char *max, 245 unsigned long *nr_pages) 246 { 247 char *end; 248 u64 bytes; 249 250 if (!strcmp(buf, max)) { 251 *nr_pages = PAGE_COUNTER_MAX; 252 return 0; 253 } 254 255 bytes = memparse(buf, &end); 256 if (*end != '\0') 257 return -EINVAL; 258 259 *nr_pages = min(bytes / PAGE_SIZE, (u64)PAGE_COUNTER_MAX); 260 261 return 0; 262 } 263