1 /* 2 * 3 * Copyright IBM Corporation, 2012 4 * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> 5 * 6 * Cgroup v2 7 * Copyright (C) 2019 Red Hat, Inc. 8 * Author: Giuseppe Scrivano <gscrivan@redhat.com> 9 * 10 * This program is free software; you can redistribute it and/or modify it 11 * under the terms of version 2.1 of the GNU Lesser General Public License 12 * as published by the Free Software Foundation. 13 * 14 * This program is distributed in the hope that it would be useful, but 15 * WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 17 * 18 */ 19 20 #include <linux/cgroup.h> 21 #include <linux/page_counter.h> 22 #include <linux/slab.h> 23 #include <linux/hugetlb.h> 24 #include <linux/hugetlb_cgroup.h> 25 26 #define MEMFILE_PRIVATE(x, val) (((x) << 16) | (val)) 27 #define MEMFILE_IDX(val) (((val) >> 16) & 0xffff) 28 #define MEMFILE_ATTR(val) ((val) & 0xffff) 29 30 #define hugetlb_cgroup_from_counter(counter, idx) \ 31 container_of(counter, struct hugetlb_cgroup, hugepage[idx]) 32 33 static struct hugetlb_cgroup *root_h_cgroup __read_mostly; 34 35 static inline struct page_counter * 36 __hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup *h_cg, int idx, 37 bool rsvd) 38 { 39 if (rsvd) 40 return &h_cg->rsvd_hugepage[idx]; 41 return &h_cg->hugepage[idx]; 42 } 43 44 static inline struct page_counter * 45 hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup *h_cg, int idx) 46 { 47 return __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, false); 48 } 49 50 static inline struct page_counter * 51 hugetlb_cgroup_counter_from_cgroup_rsvd(struct hugetlb_cgroup *h_cg, int idx) 52 { 53 return __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, true); 54 } 55 56 static inline 57 struct hugetlb_cgroup *hugetlb_cgroup_from_css(struct cgroup_subsys_state *s) 58 { 59 return s ? container_of(s, struct hugetlb_cgroup, css) : NULL; 60 } 61 62 static inline 63 struct hugetlb_cgroup *hugetlb_cgroup_from_task(struct task_struct *task) 64 { 65 return hugetlb_cgroup_from_css(task_css(task, hugetlb_cgrp_id)); 66 } 67 68 static inline bool hugetlb_cgroup_is_root(struct hugetlb_cgroup *h_cg) 69 { 70 return (h_cg == root_h_cgroup); 71 } 72 73 static inline struct hugetlb_cgroup * 74 parent_hugetlb_cgroup(struct hugetlb_cgroup *h_cg) 75 { 76 return hugetlb_cgroup_from_css(h_cg->css.parent); 77 } 78 79 static inline bool hugetlb_cgroup_have_usage(struct hugetlb_cgroup *h_cg) 80 { 81 int idx; 82 83 for (idx = 0; idx < hugetlb_max_hstate; idx++) { 84 if (page_counter_read( 85 hugetlb_cgroup_counter_from_cgroup(h_cg, idx)) || 86 page_counter_read(hugetlb_cgroup_counter_from_cgroup_rsvd( 87 h_cg, idx))) { 88 return true; 89 } 90 } 91 return false; 92 } 93 94 static void hugetlb_cgroup_init(struct hugetlb_cgroup *h_cgroup, 95 struct hugetlb_cgroup *parent_h_cgroup) 96 { 97 int idx; 98 99 for (idx = 0; idx < HUGE_MAX_HSTATE; idx++) { 100 struct page_counter *fault_parent = NULL; 101 struct page_counter *rsvd_parent = NULL; 102 unsigned long limit; 103 int ret; 104 105 if (parent_h_cgroup) { 106 fault_parent = hugetlb_cgroup_counter_from_cgroup( 107 parent_h_cgroup, idx); 108 rsvd_parent = hugetlb_cgroup_counter_from_cgroup_rsvd( 109 parent_h_cgroup, idx); 110 } 111 page_counter_init(hugetlb_cgroup_counter_from_cgroup(h_cgroup, 112 idx), 113 fault_parent); 114 page_counter_init( 115 hugetlb_cgroup_counter_from_cgroup_rsvd(h_cgroup, idx), 116 rsvd_parent); 117 118 limit = round_down(PAGE_COUNTER_MAX, 119 1 << huge_page_order(&hstates[idx])); 120 121 ret = page_counter_set_max( 122 hugetlb_cgroup_counter_from_cgroup(h_cgroup, idx), 123 limit); 124 VM_BUG_ON(ret); 125 ret = page_counter_set_max( 126 hugetlb_cgroup_counter_from_cgroup_rsvd(h_cgroup, idx), 127 limit); 128 VM_BUG_ON(ret); 129 } 130 } 131 132 static struct cgroup_subsys_state * 133 hugetlb_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) 134 { 135 struct hugetlb_cgroup *parent_h_cgroup = hugetlb_cgroup_from_css(parent_css); 136 struct hugetlb_cgroup *h_cgroup; 137 138 h_cgroup = kzalloc(sizeof(*h_cgroup), GFP_KERNEL); 139 if (!h_cgroup) 140 return ERR_PTR(-ENOMEM); 141 142 if (!parent_h_cgroup) 143 root_h_cgroup = h_cgroup; 144 145 hugetlb_cgroup_init(h_cgroup, parent_h_cgroup); 146 return &h_cgroup->css; 147 } 148 149 static void hugetlb_cgroup_css_free(struct cgroup_subsys_state *css) 150 { 151 struct hugetlb_cgroup *h_cgroup; 152 153 h_cgroup = hugetlb_cgroup_from_css(css); 154 kfree(h_cgroup); 155 } 156 157 /* 158 * Should be called with hugetlb_lock held. 159 * Since we are holding hugetlb_lock, pages cannot get moved from 160 * active list or uncharged from the cgroup, So no need to get 161 * page reference and test for page active here. This function 162 * cannot fail. 163 */ 164 static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg, 165 struct page *page) 166 { 167 unsigned int nr_pages; 168 struct page_counter *counter; 169 struct hugetlb_cgroup *page_hcg; 170 struct hugetlb_cgroup *parent = parent_hugetlb_cgroup(h_cg); 171 172 page_hcg = hugetlb_cgroup_from_page(page); 173 /* 174 * We can have pages in active list without any cgroup 175 * ie, hugepage with less than 3 pages. We can safely 176 * ignore those pages. 177 */ 178 if (!page_hcg || page_hcg != h_cg) 179 goto out; 180 181 nr_pages = compound_nr(page); 182 if (!parent) { 183 parent = root_h_cgroup; 184 /* root has no limit */ 185 page_counter_charge(&parent->hugepage[idx], nr_pages); 186 } 187 counter = &h_cg->hugepage[idx]; 188 /* Take the pages off the local counter */ 189 page_counter_cancel(counter, nr_pages); 190 191 set_hugetlb_cgroup(page, parent); 192 out: 193 return; 194 } 195 196 /* 197 * Force the hugetlb cgroup to empty the hugetlb resources by moving them to 198 * the parent cgroup. 199 */ 200 static void hugetlb_cgroup_css_offline(struct cgroup_subsys_state *css) 201 { 202 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css); 203 struct hstate *h; 204 struct page *page; 205 int idx = 0; 206 207 do { 208 for_each_hstate(h) { 209 spin_lock(&hugetlb_lock); 210 list_for_each_entry(page, &h->hugepage_activelist, lru) 211 hugetlb_cgroup_move_parent(idx, h_cg, page); 212 213 spin_unlock(&hugetlb_lock); 214 idx++; 215 } 216 cond_resched(); 217 } while (hugetlb_cgroup_have_usage(h_cg)); 218 } 219 220 static inline void hugetlb_event(struct hugetlb_cgroup *hugetlb, int idx, 221 enum hugetlb_memory_event event) 222 { 223 atomic_long_inc(&hugetlb->events_local[idx][event]); 224 cgroup_file_notify(&hugetlb->events_local_file[idx]); 225 226 do { 227 atomic_long_inc(&hugetlb->events[idx][event]); 228 cgroup_file_notify(&hugetlb->events_file[idx]); 229 } while ((hugetlb = parent_hugetlb_cgroup(hugetlb)) && 230 !hugetlb_cgroup_is_root(hugetlb)); 231 } 232 233 static int __hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, 234 struct hugetlb_cgroup **ptr, 235 bool rsvd) 236 { 237 int ret = 0; 238 struct page_counter *counter; 239 struct hugetlb_cgroup *h_cg = NULL; 240 241 if (hugetlb_cgroup_disabled()) 242 goto done; 243 /* 244 * We don't charge any cgroup if the compound page have less 245 * than 3 pages. 246 */ 247 if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER) 248 goto done; 249 again: 250 rcu_read_lock(); 251 h_cg = hugetlb_cgroup_from_task(current); 252 if (!css_tryget(&h_cg->css)) { 253 rcu_read_unlock(); 254 goto again; 255 } 256 rcu_read_unlock(); 257 258 if (!page_counter_try_charge( 259 __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd), 260 nr_pages, &counter)) { 261 ret = -ENOMEM; 262 hugetlb_event(h_cg, idx, HUGETLB_MAX); 263 css_put(&h_cg->css); 264 goto done; 265 } 266 /* Reservations take a reference to the css because they do not get 267 * reparented. 268 */ 269 if (!rsvd) 270 css_put(&h_cg->css); 271 done: 272 *ptr = h_cg; 273 return ret; 274 } 275 276 int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, 277 struct hugetlb_cgroup **ptr) 278 { 279 return __hugetlb_cgroup_charge_cgroup(idx, nr_pages, ptr, false); 280 } 281 282 int hugetlb_cgroup_charge_cgroup_rsvd(int idx, unsigned long nr_pages, 283 struct hugetlb_cgroup **ptr) 284 { 285 return __hugetlb_cgroup_charge_cgroup(idx, nr_pages, ptr, true); 286 } 287 288 /* Should be called with hugetlb_lock held */ 289 static void __hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages, 290 struct hugetlb_cgroup *h_cg, 291 struct page *page, bool rsvd) 292 { 293 if (hugetlb_cgroup_disabled() || !h_cg) 294 return; 295 296 __set_hugetlb_cgroup(page, h_cg, rsvd); 297 return; 298 } 299 300 void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages, 301 struct hugetlb_cgroup *h_cg, 302 struct page *page) 303 { 304 __hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, page, false); 305 } 306 307 void hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages, 308 struct hugetlb_cgroup *h_cg, 309 struct page *page) 310 { 311 __hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, page, true); 312 } 313 314 /* 315 * Should be called with hugetlb_lock held 316 */ 317 static void __hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, 318 struct page *page, bool rsvd) 319 { 320 struct hugetlb_cgroup *h_cg; 321 322 if (hugetlb_cgroup_disabled()) 323 return; 324 lockdep_assert_held(&hugetlb_lock); 325 h_cg = __hugetlb_cgroup_from_page(page, rsvd); 326 if (unlikely(!h_cg)) 327 return; 328 __set_hugetlb_cgroup(page, NULL, rsvd); 329 330 page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg, idx, 331 rsvd), 332 nr_pages); 333 334 if (rsvd) 335 css_put(&h_cg->css); 336 337 return; 338 } 339 340 void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, 341 struct page *page) 342 { 343 __hugetlb_cgroup_uncharge_page(idx, nr_pages, page, false); 344 } 345 346 void hugetlb_cgroup_uncharge_page_rsvd(int idx, unsigned long nr_pages, 347 struct page *page) 348 { 349 __hugetlb_cgroup_uncharge_page(idx, nr_pages, page, true); 350 } 351 352 static void __hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages, 353 struct hugetlb_cgroup *h_cg, 354 bool rsvd) 355 { 356 if (hugetlb_cgroup_disabled() || !h_cg) 357 return; 358 359 if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER) 360 return; 361 362 page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg, idx, 363 rsvd), 364 nr_pages); 365 366 if (rsvd) 367 css_put(&h_cg->css); 368 } 369 370 void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages, 371 struct hugetlb_cgroup *h_cg) 372 { 373 __hugetlb_cgroup_uncharge_cgroup(idx, nr_pages, h_cg, false); 374 } 375 376 void hugetlb_cgroup_uncharge_cgroup_rsvd(int idx, unsigned long nr_pages, 377 struct hugetlb_cgroup *h_cg) 378 { 379 __hugetlb_cgroup_uncharge_cgroup(idx, nr_pages, h_cg, true); 380 } 381 382 void hugetlb_cgroup_uncharge_counter(struct resv_map *resv, unsigned long start, 383 unsigned long end) 384 { 385 if (hugetlb_cgroup_disabled() || !resv || !resv->reservation_counter || 386 !resv->css) 387 return; 388 389 page_counter_uncharge(resv->reservation_counter, 390 (end - start) * resv->pages_per_hpage); 391 css_put(resv->css); 392 } 393 394 enum { 395 RES_USAGE, 396 RES_RSVD_USAGE, 397 RES_LIMIT, 398 RES_RSVD_LIMIT, 399 RES_MAX_USAGE, 400 RES_RSVD_MAX_USAGE, 401 RES_FAILCNT, 402 RES_RSVD_FAILCNT, 403 }; 404 405 static u64 hugetlb_cgroup_read_u64(struct cgroup_subsys_state *css, 406 struct cftype *cft) 407 { 408 struct page_counter *counter; 409 struct page_counter *rsvd_counter; 410 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css); 411 412 counter = &h_cg->hugepage[MEMFILE_IDX(cft->private)]; 413 rsvd_counter = &h_cg->rsvd_hugepage[MEMFILE_IDX(cft->private)]; 414 415 switch (MEMFILE_ATTR(cft->private)) { 416 case RES_USAGE: 417 return (u64)page_counter_read(counter) * PAGE_SIZE; 418 case RES_RSVD_USAGE: 419 return (u64)page_counter_read(rsvd_counter) * PAGE_SIZE; 420 case RES_LIMIT: 421 return (u64)counter->max * PAGE_SIZE; 422 case RES_RSVD_LIMIT: 423 return (u64)rsvd_counter->max * PAGE_SIZE; 424 case RES_MAX_USAGE: 425 return (u64)counter->watermark * PAGE_SIZE; 426 case RES_RSVD_MAX_USAGE: 427 return (u64)rsvd_counter->watermark * PAGE_SIZE; 428 case RES_FAILCNT: 429 return counter->failcnt; 430 case RES_RSVD_FAILCNT: 431 return rsvd_counter->failcnt; 432 default: 433 BUG(); 434 } 435 } 436 437 static int hugetlb_cgroup_read_u64_max(struct seq_file *seq, void *v) 438 { 439 int idx; 440 u64 val; 441 struct cftype *cft = seq_cft(seq); 442 unsigned long limit; 443 struct page_counter *counter; 444 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq)); 445 446 idx = MEMFILE_IDX(cft->private); 447 counter = &h_cg->hugepage[idx]; 448 449 limit = round_down(PAGE_COUNTER_MAX, 450 1 << huge_page_order(&hstates[idx])); 451 452 switch (MEMFILE_ATTR(cft->private)) { 453 case RES_RSVD_USAGE: 454 counter = &h_cg->rsvd_hugepage[idx]; 455 /* Fall through. */ 456 case RES_USAGE: 457 val = (u64)page_counter_read(counter); 458 seq_printf(seq, "%llu\n", val * PAGE_SIZE); 459 break; 460 case RES_RSVD_LIMIT: 461 counter = &h_cg->rsvd_hugepage[idx]; 462 /* Fall through. */ 463 case RES_LIMIT: 464 val = (u64)counter->max; 465 if (val == limit) 466 seq_puts(seq, "max\n"); 467 else 468 seq_printf(seq, "%llu\n", val * PAGE_SIZE); 469 break; 470 default: 471 BUG(); 472 } 473 474 return 0; 475 } 476 477 static DEFINE_MUTEX(hugetlb_limit_mutex); 478 479 static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of, 480 char *buf, size_t nbytes, loff_t off, 481 const char *max) 482 { 483 int ret, idx; 484 unsigned long nr_pages; 485 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of)); 486 bool rsvd = false; 487 488 if (hugetlb_cgroup_is_root(h_cg)) /* Can't set limit on root */ 489 return -EINVAL; 490 491 buf = strstrip(buf); 492 ret = page_counter_memparse(buf, max, &nr_pages); 493 if (ret) 494 return ret; 495 496 idx = MEMFILE_IDX(of_cft(of)->private); 497 nr_pages = round_down(nr_pages, 1 << huge_page_order(&hstates[idx])); 498 499 switch (MEMFILE_ATTR(of_cft(of)->private)) { 500 case RES_RSVD_LIMIT: 501 rsvd = true; 502 /* Fall through. */ 503 case RES_LIMIT: 504 mutex_lock(&hugetlb_limit_mutex); 505 ret = page_counter_set_max( 506 __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd), 507 nr_pages); 508 mutex_unlock(&hugetlb_limit_mutex); 509 break; 510 default: 511 ret = -EINVAL; 512 break; 513 } 514 return ret ?: nbytes; 515 } 516 517 static ssize_t hugetlb_cgroup_write_legacy(struct kernfs_open_file *of, 518 char *buf, size_t nbytes, loff_t off) 519 { 520 return hugetlb_cgroup_write(of, buf, nbytes, off, "-1"); 521 } 522 523 static ssize_t hugetlb_cgroup_write_dfl(struct kernfs_open_file *of, 524 char *buf, size_t nbytes, loff_t off) 525 { 526 return hugetlb_cgroup_write(of, buf, nbytes, off, "max"); 527 } 528 529 static ssize_t hugetlb_cgroup_reset(struct kernfs_open_file *of, 530 char *buf, size_t nbytes, loff_t off) 531 { 532 int ret = 0; 533 struct page_counter *counter, *rsvd_counter; 534 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of)); 535 536 counter = &h_cg->hugepage[MEMFILE_IDX(of_cft(of)->private)]; 537 rsvd_counter = &h_cg->rsvd_hugepage[MEMFILE_IDX(of_cft(of)->private)]; 538 539 switch (MEMFILE_ATTR(of_cft(of)->private)) { 540 case RES_MAX_USAGE: 541 page_counter_reset_watermark(counter); 542 break; 543 case RES_RSVD_MAX_USAGE: 544 page_counter_reset_watermark(rsvd_counter); 545 break; 546 case RES_FAILCNT: 547 counter->failcnt = 0; 548 break; 549 case RES_RSVD_FAILCNT: 550 rsvd_counter->failcnt = 0; 551 break; 552 default: 553 ret = -EINVAL; 554 break; 555 } 556 return ret ?: nbytes; 557 } 558 559 static char *mem_fmt(char *buf, int size, unsigned long hsize) 560 { 561 if (hsize >= (1UL << 30)) 562 snprintf(buf, size, "%luGB", hsize >> 30); 563 else if (hsize >= (1UL << 20)) 564 snprintf(buf, size, "%luMB", hsize >> 20); 565 else 566 snprintf(buf, size, "%luKB", hsize >> 10); 567 return buf; 568 } 569 570 static int __hugetlb_events_show(struct seq_file *seq, bool local) 571 { 572 int idx; 573 long max; 574 struct cftype *cft = seq_cft(seq); 575 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq)); 576 577 idx = MEMFILE_IDX(cft->private); 578 579 if (local) 580 max = atomic_long_read(&h_cg->events_local[idx][HUGETLB_MAX]); 581 else 582 max = atomic_long_read(&h_cg->events[idx][HUGETLB_MAX]); 583 584 seq_printf(seq, "max %lu\n", max); 585 586 return 0; 587 } 588 589 static int hugetlb_events_show(struct seq_file *seq, void *v) 590 { 591 return __hugetlb_events_show(seq, false); 592 } 593 594 static int hugetlb_events_local_show(struct seq_file *seq, void *v) 595 { 596 return __hugetlb_events_show(seq, true); 597 } 598 599 static void __init __hugetlb_cgroup_file_dfl_init(int idx) 600 { 601 char buf[32]; 602 struct cftype *cft; 603 struct hstate *h = &hstates[idx]; 604 605 /* format the size */ 606 mem_fmt(buf, sizeof(buf), huge_page_size(h)); 607 608 /* Add the limit file */ 609 cft = &h->cgroup_files_dfl[0]; 610 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max", buf); 611 cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT); 612 cft->seq_show = hugetlb_cgroup_read_u64_max; 613 cft->write = hugetlb_cgroup_write_dfl; 614 cft->flags = CFTYPE_NOT_ON_ROOT; 615 616 /* Add the reservation limit file */ 617 cft = &h->cgroup_files_dfl[1]; 618 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.max", buf); 619 cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_LIMIT); 620 cft->seq_show = hugetlb_cgroup_read_u64_max; 621 cft->write = hugetlb_cgroup_write_dfl; 622 cft->flags = CFTYPE_NOT_ON_ROOT; 623 624 /* Add the current usage file */ 625 cft = &h->cgroup_files_dfl[2]; 626 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.current", buf); 627 cft->private = MEMFILE_PRIVATE(idx, RES_USAGE); 628 cft->seq_show = hugetlb_cgroup_read_u64_max; 629 cft->flags = CFTYPE_NOT_ON_ROOT; 630 631 /* Add the current reservation usage file */ 632 cft = &h->cgroup_files_dfl[3]; 633 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.current", buf); 634 cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_USAGE); 635 cft->seq_show = hugetlb_cgroup_read_u64_max; 636 cft->flags = CFTYPE_NOT_ON_ROOT; 637 638 /* Add the events file */ 639 cft = &h->cgroup_files_dfl[4]; 640 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.events", buf); 641 cft->private = MEMFILE_PRIVATE(idx, 0); 642 cft->seq_show = hugetlb_events_show; 643 cft->file_offset = offsetof(struct hugetlb_cgroup, events_file[idx]), 644 cft->flags = CFTYPE_NOT_ON_ROOT; 645 646 /* Add the events.local file */ 647 cft = &h->cgroup_files_dfl[5]; 648 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.events.local", buf); 649 cft->private = MEMFILE_PRIVATE(idx, 0); 650 cft->seq_show = hugetlb_events_local_show; 651 cft->file_offset = offsetof(struct hugetlb_cgroup, 652 events_local_file[idx]), 653 cft->flags = CFTYPE_NOT_ON_ROOT; 654 655 /* NULL terminate the last cft */ 656 cft = &h->cgroup_files_dfl[6]; 657 memset(cft, 0, sizeof(*cft)); 658 659 WARN_ON(cgroup_add_dfl_cftypes(&hugetlb_cgrp_subsys, 660 h->cgroup_files_dfl)); 661 } 662 663 static void __init __hugetlb_cgroup_file_legacy_init(int idx) 664 { 665 char buf[32]; 666 struct cftype *cft; 667 struct hstate *h = &hstates[idx]; 668 669 /* format the size */ 670 mem_fmt(buf, sizeof(buf), huge_page_size(h)); 671 672 /* Add the limit file */ 673 cft = &h->cgroup_files_legacy[0]; 674 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.limit_in_bytes", buf); 675 cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT); 676 cft->read_u64 = hugetlb_cgroup_read_u64; 677 cft->write = hugetlb_cgroup_write_legacy; 678 679 /* Add the reservation limit file */ 680 cft = &h->cgroup_files_legacy[1]; 681 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.limit_in_bytes", buf); 682 cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_LIMIT); 683 cft->read_u64 = hugetlb_cgroup_read_u64; 684 cft->write = hugetlb_cgroup_write_legacy; 685 686 /* Add the usage file */ 687 cft = &h->cgroup_files_legacy[2]; 688 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.usage_in_bytes", buf); 689 cft->private = MEMFILE_PRIVATE(idx, RES_USAGE); 690 cft->read_u64 = hugetlb_cgroup_read_u64; 691 692 /* Add the reservation usage file */ 693 cft = &h->cgroup_files_legacy[3]; 694 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.usage_in_bytes", buf); 695 cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_USAGE); 696 cft->read_u64 = hugetlb_cgroup_read_u64; 697 698 /* Add the MAX usage file */ 699 cft = &h->cgroup_files_legacy[4]; 700 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max_usage_in_bytes", buf); 701 cft->private = MEMFILE_PRIVATE(idx, RES_MAX_USAGE); 702 cft->write = hugetlb_cgroup_reset; 703 cft->read_u64 = hugetlb_cgroup_read_u64; 704 705 /* Add the MAX reservation usage file */ 706 cft = &h->cgroup_files_legacy[5]; 707 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.max_usage_in_bytes", buf); 708 cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_MAX_USAGE); 709 cft->write = hugetlb_cgroup_reset; 710 cft->read_u64 = hugetlb_cgroup_read_u64; 711 712 /* Add the failcntfile */ 713 cft = &h->cgroup_files_legacy[6]; 714 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.failcnt", buf); 715 cft->private = MEMFILE_PRIVATE(idx, RES_FAILCNT); 716 cft->write = hugetlb_cgroup_reset; 717 cft->read_u64 = hugetlb_cgroup_read_u64; 718 719 /* Add the reservation failcntfile */ 720 cft = &h->cgroup_files_legacy[7]; 721 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.failcnt", buf); 722 cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_FAILCNT); 723 cft->write = hugetlb_cgroup_reset; 724 cft->read_u64 = hugetlb_cgroup_read_u64; 725 726 /* NULL terminate the last cft */ 727 cft = &h->cgroup_files_legacy[8]; 728 memset(cft, 0, sizeof(*cft)); 729 730 WARN_ON(cgroup_add_legacy_cftypes(&hugetlb_cgrp_subsys, 731 h->cgroup_files_legacy)); 732 } 733 734 static void __init __hugetlb_cgroup_file_init(int idx) 735 { 736 __hugetlb_cgroup_file_dfl_init(idx); 737 __hugetlb_cgroup_file_legacy_init(idx); 738 } 739 740 void __init hugetlb_cgroup_file_init(void) 741 { 742 struct hstate *h; 743 744 for_each_hstate(h) { 745 /* 746 * Add cgroup control files only if the huge page consists 747 * of more than two normal pages. This is because we use 748 * page[2].private for storing cgroup details. 749 */ 750 if (huge_page_order(h) >= HUGETLB_CGROUP_MIN_ORDER) 751 __hugetlb_cgroup_file_init(hstate_index(h)); 752 } 753 } 754 755 /* 756 * hugetlb_lock will make sure a parallel cgroup rmdir won't happen 757 * when we migrate hugepages 758 */ 759 void hugetlb_cgroup_migrate(struct page *oldhpage, struct page *newhpage) 760 { 761 struct hugetlb_cgroup *h_cg; 762 struct hugetlb_cgroup *h_cg_rsvd; 763 struct hstate *h = page_hstate(oldhpage); 764 765 if (hugetlb_cgroup_disabled()) 766 return; 767 768 VM_BUG_ON_PAGE(!PageHuge(oldhpage), oldhpage); 769 spin_lock(&hugetlb_lock); 770 h_cg = hugetlb_cgroup_from_page(oldhpage); 771 h_cg_rsvd = hugetlb_cgroup_from_page_rsvd(oldhpage); 772 set_hugetlb_cgroup(oldhpage, NULL); 773 set_hugetlb_cgroup_rsvd(oldhpage, NULL); 774 775 /* move the h_cg details to new cgroup */ 776 set_hugetlb_cgroup(newhpage, h_cg); 777 set_hugetlb_cgroup_rsvd(newhpage, h_cg_rsvd); 778 list_move(&newhpage->lru, &h->hugepage_activelist); 779 spin_unlock(&hugetlb_lock); 780 return; 781 } 782 783 static struct cftype hugetlb_files[] = { 784 {} /* terminate */ 785 }; 786 787 struct cgroup_subsys hugetlb_cgrp_subsys = { 788 .css_alloc = hugetlb_cgroup_css_alloc, 789 .css_offline = hugetlb_cgroup_css_offline, 790 .css_free = hugetlb_cgroup_css_free, 791 .dfl_cftypes = hugetlb_files, 792 .legacy_cftypes = hugetlb_files, 793 }; 794