1 /* 2 * 3 * Copyright IBM Corporation, 2012 4 * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> 5 * 6 * Cgroup v2 7 * Copyright (C) 2019 Red Hat, Inc. 8 * Author: Giuseppe Scrivano <gscrivan@redhat.com> 9 * 10 * This program is free software; you can redistribute it and/or modify it 11 * under the terms of version 2.1 of the GNU Lesser General Public License 12 * as published by the Free Software Foundation. 13 * 14 * This program is distributed in the hope that it would be useful, but 15 * WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 17 * 18 */ 19 20 #include <linux/cgroup.h> 21 #include <linux/page_counter.h> 22 #include <linux/slab.h> 23 #include <linux/hugetlb.h> 24 #include <linux/hugetlb_cgroup.h> 25 26 #define MEMFILE_PRIVATE(x, val) (((x) << 16) | (val)) 27 #define MEMFILE_IDX(val) (((val) >> 16) & 0xffff) 28 #define MEMFILE_ATTR(val) ((val) & 0xffff) 29 30 static struct hugetlb_cgroup *root_h_cgroup __read_mostly; 31 32 static inline struct page_counter * 33 __hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup *h_cg, int idx, 34 bool rsvd) 35 { 36 if (rsvd) 37 return &h_cg->rsvd_hugepage[idx]; 38 return &h_cg->hugepage[idx]; 39 } 40 41 static inline struct page_counter * 42 hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup *h_cg, int idx) 43 { 44 return __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, false); 45 } 46 47 static inline struct page_counter * 48 hugetlb_cgroup_counter_from_cgroup_rsvd(struct hugetlb_cgroup *h_cg, int idx) 49 { 50 return __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, true); 51 } 52 53 static inline 54 struct hugetlb_cgroup *hugetlb_cgroup_from_css(struct cgroup_subsys_state *s) 55 { 56 return s ? container_of(s, struct hugetlb_cgroup, css) : NULL; 57 } 58 59 static inline 60 struct hugetlb_cgroup *hugetlb_cgroup_from_task(struct task_struct *task) 61 { 62 return hugetlb_cgroup_from_css(task_css(task, hugetlb_cgrp_id)); 63 } 64 65 static inline bool hugetlb_cgroup_is_root(struct hugetlb_cgroup *h_cg) 66 { 67 return (h_cg == root_h_cgroup); 68 } 69 70 static inline struct hugetlb_cgroup * 71 parent_hugetlb_cgroup(struct hugetlb_cgroup *h_cg) 72 { 73 return hugetlb_cgroup_from_css(h_cg->css.parent); 74 } 75 76 static inline bool hugetlb_cgroup_have_usage(struct hugetlb_cgroup *h_cg) 77 { 78 int idx; 79 80 for (idx = 0; idx < hugetlb_max_hstate; idx++) { 81 if (page_counter_read( 82 hugetlb_cgroup_counter_from_cgroup(h_cg, idx))) 83 return true; 84 } 85 return false; 86 } 87 88 static void hugetlb_cgroup_init(struct hugetlb_cgroup *h_cgroup, 89 struct hugetlb_cgroup *parent_h_cgroup) 90 { 91 int idx; 92 93 for (idx = 0; idx < HUGE_MAX_HSTATE; idx++) { 94 struct page_counter *fault_parent = NULL; 95 struct page_counter *rsvd_parent = NULL; 96 unsigned long limit; 97 int ret; 98 99 if (parent_h_cgroup) { 100 fault_parent = hugetlb_cgroup_counter_from_cgroup( 101 parent_h_cgroup, idx); 102 rsvd_parent = hugetlb_cgroup_counter_from_cgroup_rsvd( 103 parent_h_cgroup, idx); 104 } 105 page_counter_init(hugetlb_cgroup_counter_from_cgroup(h_cgroup, 106 idx), 107 fault_parent); 108 page_counter_init( 109 hugetlb_cgroup_counter_from_cgroup_rsvd(h_cgroup, idx), 110 rsvd_parent); 111 112 limit = round_down(PAGE_COUNTER_MAX, 113 pages_per_huge_page(&hstates[idx])); 114 115 ret = page_counter_set_max( 116 hugetlb_cgroup_counter_from_cgroup(h_cgroup, idx), 117 limit); 118 VM_BUG_ON(ret); 119 ret = page_counter_set_max( 120 hugetlb_cgroup_counter_from_cgroup_rsvd(h_cgroup, idx), 121 limit); 122 VM_BUG_ON(ret); 123 } 124 } 125 126 static struct cgroup_subsys_state * 127 hugetlb_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) 128 { 129 struct hugetlb_cgroup *parent_h_cgroup = hugetlb_cgroup_from_css(parent_css); 130 struct hugetlb_cgroup *h_cgroup; 131 132 h_cgroup = kzalloc(sizeof(*h_cgroup), GFP_KERNEL); 133 if (!h_cgroup) 134 return ERR_PTR(-ENOMEM); 135 136 if (!parent_h_cgroup) 137 root_h_cgroup = h_cgroup; 138 139 hugetlb_cgroup_init(h_cgroup, parent_h_cgroup); 140 return &h_cgroup->css; 141 } 142 143 static void hugetlb_cgroup_css_free(struct cgroup_subsys_state *css) 144 { 145 struct hugetlb_cgroup *h_cgroup; 146 147 h_cgroup = hugetlb_cgroup_from_css(css); 148 kfree(h_cgroup); 149 } 150 151 /* 152 * Should be called with hugetlb_lock held. 153 * Since we are holding hugetlb_lock, pages cannot get moved from 154 * active list or uncharged from the cgroup, So no need to get 155 * page reference and test for page active here. This function 156 * cannot fail. 157 */ 158 static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg, 159 struct page *page) 160 { 161 unsigned int nr_pages; 162 struct page_counter *counter; 163 struct hugetlb_cgroup *page_hcg; 164 struct hugetlb_cgroup *parent = parent_hugetlb_cgroup(h_cg); 165 166 page_hcg = hugetlb_cgroup_from_page(page); 167 /* 168 * We can have pages in active list without any cgroup 169 * ie, hugepage with less than 3 pages. We can safely 170 * ignore those pages. 171 */ 172 if (!page_hcg || page_hcg != h_cg) 173 goto out; 174 175 nr_pages = compound_nr(page); 176 if (!parent) { 177 parent = root_h_cgroup; 178 /* root has no limit */ 179 page_counter_charge(&parent->hugepage[idx], nr_pages); 180 } 181 counter = &h_cg->hugepage[idx]; 182 /* Take the pages off the local counter */ 183 page_counter_cancel(counter, nr_pages); 184 185 set_hugetlb_cgroup(page, parent); 186 out: 187 return; 188 } 189 190 /* 191 * Force the hugetlb cgroup to empty the hugetlb resources by moving them to 192 * the parent cgroup. 193 */ 194 static void hugetlb_cgroup_css_offline(struct cgroup_subsys_state *css) 195 { 196 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css); 197 struct hstate *h; 198 struct page *page; 199 int idx; 200 201 do { 202 idx = 0; 203 for_each_hstate(h) { 204 spin_lock_irq(&hugetlb_lock); 205 list_for_each_entry(page, &h->hugepage_activelist, lru) 206 hugetlb_cgroup_move_parent(idx, h_cg, page); 207 208 spin_unlock_irq(&hugetlb_lock); 209 idx++; 210 } 211 cond_resched(); 212 } while (hugetlb_cgroup_have_usage(h_cg)); 213 } 214 215 static inline void hugetlb_event(struct hugetlb_cgroup *hugetlb, int idx, 216 enum hugetlb_memory_event event) 217 { 218 atomic_long_inc(&hugetlb->events_local[idx][event]); 219 cgroup_file_notify(&hugetlb->events_local_file[idx]); 220 221 do { 222 atomic_long_inc(&hugetlb->events[idx][event]); 223 cgroup_file_notify(&hugetlb->events_file[idx]); 224 } while ((hugetlb = parent_hugetlb_cgroup(hugetlb)) && 225 !hugetlb_cgroup_is_root(hugetlb)); 226 } 227 228 static int __hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, 229 struct hugetlb_cgroup **ptr, 230 bool rsvd) 231 { 232 int ret = 0; 233 struct page_counter *counter; 234 struct hugetlb_cgroup *h_cg = NULL; 235 236 if (hugetlb_cgroup_disabled()) 237 goto done; 238 /* 239 * We don't charge any cgroup if the compound page have less 240 * than 3 pages. 241 */ 242 if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER) 243 goto done; 244 again: 245 rcu_read_lock(); 246 h_cg = hugetlb_cgroup_from_task(current); 247 if (!css_tryget(&h_cg->css)) { 248 rcu_read_unlock(); 249 goto again; 250 } 251 rcu_read_unlock(); 252 253 if (!page_counter_try_charge( 254 __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd), 255 nr_pages, &counter)) { 256 ret = -ENOMEM; 257 hugetlb_event(h_cg, idx, HUGETLB_MAX); 258 css_put(&h_cg->css); 259 goto done; 260 } 261 /* Reservations take a reference to the css because they do not get 262 * reparented. 263 */ 264 if (!rsvd) 265 css_put(&h_cg->css); 266 done: 267 *ptr = h_cg; 268 return ret; 269 } 270 271 int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, 272 struct hugetlb_cgroup **ptr) 273 { 274 return __hugetlb_cgroup_charge_cgroup(idx, nr_pages, ptr, false); 275 } 276 277 int hugetlb_cgroup_charge_cgroup_rsvd(int idx, unsigned long nr_pages, 278 struct hugetlb_cgroup **ptr) 279 { 280 return __hugetlb_cgroup_charge_cgroup(idx, nr_pages, ptr, true); 281 } 282 283 /* Should be called with hugetlb_lock held */ 284 static void __hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages, 285 struct hugetlb_cgroup *h_cg, 286 struct page *page, bool rsvd) 287 { 288 if (hugetlb_cgroup_disabled() || !h_cg) 289 return; 290 291 __set_hugetlb_cgroup(page, h_cg, rsvd); 292 return; 293 } 294 295 void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages, 296 struct hugetlb_cgroup *h_cg, 297 struct page *page) 298 { 299 __hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, page, false); 300 } 301 302 void hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages, 303 struct hugetlb_cgroup *h_cg, 304 struct page *page) 305 { 306 __hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, page, true); 307 } 308 309 /* 310 * Should be called with hugetlb_lock held 311 */ 312 static void __hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, 313 struct page *page, bool rsvd) 314 { 315 struct hugetlb_cgroup *h_cg; 316 317 if (hugetlb_cgroup_disabled()) 318 return; 319 lockdep_assert_held(&hugetlb_lock); 320 h_cg = __hugetlb_cgroup_from_page(page, rsvd); 321 if (unlikely(!h_cg)) 322 return; 323 __set_hugetlb_cgroup(page, NULL, rsvd); 324 325 page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg, idx, 326 rsvd), 327 nr_pages); 328 329 if (rsvd) 330 css_put(&h_cg->css); 331 332 return; 333 } 334 335 void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, 336 struct page *page) 337 { 338 __hugetlb_cgroup_uncharge_page(idx, nr_pages, page, false); 339 } 340 341 void hugetlb_cgroup_uncharge_page_rsvd(int idx, unsigned long nr_pages, 342 struct page *page) 343 { 344 __hugetlb_cgroup_uncharge_page(idx, nr_pages, page, true); 345 } 346 347 static void __hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages, 348 struct hugetlb_cgroup *h_cg, 349 bool rsvd) 350 { 351 if (hugetlb_cgroup_disabled() || !h_cg) 352 return; 353 354 if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER) 355 return; 356 357 page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg, idx, 358 rsvd), 359 nr_pages); 360 361 if (rsvd) 362 css_put(&h_cg->css); 363 } 364 365 void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages, 366 struct hugetlb_cgroup *h_cg) 367 { 368 __hugetlb_cgroup_uncharge_cgroup(idx, nr_pages, h_cg, false); 369 } 370 371 void hugetlb_cgroup_uncharge_cgroup_rsvd(int idx, unsigned long nr_pages, 372 struct hugetlb_cgroup *h_cg) 373 { 374 __hugetlb_cgroup_uncharge_cgroup(idx, nr_pages, h_cg, true); 375 } 376 377 void hugetlb_cgroup_uncharge_counter(struct resv_map *resv, unsigned long start, 378 unsigned long end) 379 { 380 if (hugetlb_cgroup_disabled() || !resv || !resv->reservation_counter || 381 !resv->css) 382 return; 383 384 page_counter_uncharge(resv->reservation_counter, 385 (end - start) * resv->pages_per_hpage); 386 css_put(resv->css); 387 } 388 389 void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv, 390 struct file_region *rg, 391 unsigned long nr_pages, 392 bool region_del) 393 { 394 if (hugetlb_cgroup_disabled() || !resv || !rg || !nr_pages) 395 return; 396 397 if (rg->reservation_counter && resv->pages_per_hpage && nr_pages > 0 && 398 !resv->reservation_counter) { 399 page_counter_uncharge(rg->reservation_counter, 400 nr_pages * resv->pages_per_hpage); 401 /* 402 * Only do css_put(rg->css) when we delete the entire region 403 * because one file_region must hold exactly one css reference. 404 */ 405 if (region_del) 406 css_put(rg->css); 407 } 408 } 409 410 enum { 411 RES_USAGE, 412 RES_RSVD_USAGE, 413 RES_LIMIT, 414 RES_RSVD_LIMIT, 415 RES_MAX_USAGE, 416 RES_RSVD_MAX_USAGE, 417 RES_FAILCNT, 418 RES_RSVD_FAILCNT, 419 }; 420 421 static u64 hugetlb_cgroup_read_u64(struct cgroup_subsys_state *css, 422 struct cftype *cft) 423 { 424 struct page_counter *counter; 425 struct page_counter *rsvd_counter; 426 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css); 427 428 counter = &h_cg->hugepage[MEMFILE_IDX(cft->private)]; 429 rsvd_counter = &h_cg->rsvd_hugepage[MEMFILE_IDX(cft->private)]; 430 431 switch (MEMFILE_ATTR(cft->private)) { 432 case RES_USAGE: 433 return (u64)page_counter_read(counter) * PAGE_SIZE; 434 case RES_RSVD_USAGE: 435 return (u64)page_counter_read(rsvd_counter) * PAGE_SIZE; 436 case RES_LIMIT: 437 return (u64)counter->max * PAGE_SIZE; 438 case RES_RSVD_LIMIT: 439 return (u64)rsvd_counter->max * PAGE_SIZE; 440 case RES_MAX_USAGE: 441 return (u64)counter->watermark * PAGE_SIZE; 442 case RES_RSVD_MAX_USAGE: 443 return (u64)rsvd_counter->watermark * PAGE_SIZE; 444 case RES_FAILCNT: 445 return counter->failcnt; 446 case RES_RSVD_FAILCNT: 447 return rsvd_counter->failcnt; 448 default: 449 BUG(); 450 } 451 } 452 453 static int hugetlb_cgroup_read_u64_max(struct seq_file *seq, void *v) 454 { 455 int idx; 456 u64 val; 457 struct cftype *cft = seq_cft(seq); 458 unsigned long limit; 459 struct page_counter *counter; 460 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq)); 461 462 idx = MEMFILE_IDX(cft->private); 463 counter = &h_cg->hugepage[idx]; 464 465 limit = round_down(PAGE_COUNTER_MAX, 466 pages_per_huge_page(&hstates[idx])); 467 468 switch (MEMFILE_ATTR(cft->private)) { 469 case RES_RSVD_USAGE: 470 counter = &h_cg->rsvd_hugepage[idx]; 471 fallthrough; 472 case RES_USAGE: 473 val = (u64)page_counter_read(counter); 474 seq_printf(seq, "%llu\n", val * PAGE_SIZE); 475 break; 476 case RES_RSVD_LIMIT: 477 counter = &h_cg->rsvd_hugepage[idx]; 478 fallthrough; 479 case RES_LIMIT: 480 val = (u64)counter->max; 481 if (val == limit) 482 seq_puts(seq, "max\n"); 483 else 484 seq_printf(seq, "%llu\n", val * PAGE_SIZE); 485 break; 486 default: 487 BUG(); 488 } 489 490 return 0; 491 } 492 493 static DEFINE_MUTEX(hugetlb_limit_mutex); 494 495 static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of, 496 char *buf, size_t nbytes, loff_t off, 497 const char *max) 498 { 499 int ret, idx; 500 unsigned long nr_pages; 501 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of)); 502 bool rsvd = false; 503 504 if (hugetlb_cgroup_is_root(h_cg)) /* Can't set limit on root */ 505 return -EINVAL; 506 507 buf = strstrip(buf); 508 ret = page_counter_memparse(buf, max, &nr_pages); 509 if (ret) 510 return ret; 511 512 idx = MEMFILE_IDX(of_cft(of)->private); 513 nr_pages = round_down(nr_pages, pages_per_huge_page(&hstates[idx])); 514 515 switch (MEMFILE_ATTR(of_cft(of)->private)) { 516 case RES_RSVD_LIMIT: 517 rsvd = true; 518 fallthrough; 519 case RES_LIMIT: 520 mutex_lock(&hugetlb_limit_mutex); 521 ret = page_counter_set_max( 522 __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd), 523 nr_pages); 524 mutex_unlock(&hugetlb_limit_mutex); 525 break; 526 default: 527 ret = -EINVAL; 528 break; 529 } 530 return ret ?: nbytes; 531 } 532 533 static ssize_t hugetlb_cgroup_write_legacy(struct kernfs_open_file *of, 534 char *buf, size_t nbytes, loff_t off) 535 { 536 return hugetlb_cgroup_write(of, buf, nbytes, off, "-1"); 537 } 538 539 static ssize_t hugetlb_cgroup_write_dfl(struct kernfs_open_file *of, 540 char *buf, size_t nbytes, loff_t off) 541 { 542 return hugetlb_cgroup_write(of, buf, nbytes, off, "max"); 543 } 544 545 static ssize_t hugetlb_cgroup_reset(struct kernfs_open_file *of, 546 char *buf, size_t nbytes, loff_t off) 547 { 548 int ret = 0; 549 struct page_counter *counter, *rsvd_counter; 550 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of)); 551 552 counter = &h_cg->hugepage[MEMFILE_IDX(of_cft(of)->private)]; 553 rsvd_counter = &h_cg->rsvd_hugepage[MEMFILE_IDX(of_cft(of)->private)]; 554 555 switch (MEMFILE_ATTR(of_cft(of)->private)) { 556 case RES_MAX_USAGE: 557 page_counter_reset_watermark(counter); 558 break; 559 case RES_RSVD_MAX_USAGE: 560 page_counter_reset_watermark(rsvd_counter); 561 break; 562 case RES_FAILCNT: 563 counter->failcnt = 0; 564 break; 565 case RES_RSVD_FAILCNT: 566 rsvd_counter->failcnt = 0; 567 break; 568 default: 569 ret = -EINVAL; 570 break; 571 } 572 return ret ?: nbytes; 573 } 574 575 static char *mem_fmt(char *buf, int size, unsigned long hsize) 576 { 577 if (hsize >= (1UL << 30)) 578 snprintf(buf, size, "%luGB", hsize >> 30); 579 else if (hsize >= (1UL << 20)) 580 snprintf(buf, size, "%luMB", hsize >> 20); 581 else 582 snprintf(buf, size, "%luKB", hsize >> 10); 583 return buf; 584 } 585 586 static int __hugetlb_events_show(struct seq_file *seq, bool local) 587 { 588 int idx; 589 long max; 590 struct cftype *cft = seq_cft(seq); 591 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq)); 592 593 idx = MEMFILE_IDX(cft->private); 594 595 if (local) 596 max = atomic_long_read(&h_cg->events_local[idx][HUGETLB_MAX]); 597 else 598 max = atomic_long_read(&h_cg->events[idx][HUGETLB_MAX]); 599 600 seq_printf(seq, "max %lu\n", max); 601 602 return 0; 603 } 604 605 static int hugetlb_events_show(struct seq_file *seq, void *v) 606 { 607 return __hugetlb_events_show(seq, false); 608 } 609 610 static int hugetlb_events_local_show(struct seq_file *seq, void *v) 611 { 612 return __hugetlb_events_show(seq, true); 613 } 614 615 static void __init __hugetlb_cgroup_file_dfl_init(int idx) 616 { 617 char buf[32]; 618 struct cftype *cft; 619 struct hstate *h = &hstates[idx]; 620 621 /* format the size */ 622 mem_fmt(buf, sizeof(buf), huge_page_size(h)); 623 624 /* Add the limit file */ 625 cft = &h->cgroup_files_dfl[0]; 626 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max", buf); 627 cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT); 628 cft->seq_show = hugetlb_cgroup_read_u64_max; 629 cft->write = hugetlb_cgroup_write_dfl; 630 cft->flags = CFTYPE_NOT_ON_ROOT; 631 632 /* Add the reservation limit file */ 633 cft = &h->cgroup_files_dfl[1]; 634 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.max", buf); 635 cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_LIMIT); 636 cft->seq_show = hugetlb_cgroup_read_u64_max; 637 cft->write = hugetlb_cgroup_write_dfl; 638 cft->flags = CFTYPE_NOT_ON_ROOT; 639 640 /* Add the current usage file */ 641 cft = &h->cgroup_files_dfl[2]; 642 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.current", buf); 643 cft->private = MEMFILE_PRIVATE(idx, RES_USAGE); 644 cft->seq_show = hugetlb_cgroup_read_u64_max; 645 cft->flags = CFTYPE_NOT_ON_ROOT; 646 647 /* Add the current reservation usage file */ 648 cft = &h->cgroup_files_dfl[3]; 649 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.current", buf); 650 cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_USAGE); 651 cft->seq_show = hugetlb_cgroup_read_u64_max; 652 cft->flags = CFTYPE_NOT_ON_ROOT; 653 654 /* Add the events file */ 655 cft = &h->cgroup_files_dfl[4]; 656 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.events", buf); 657 cft->private = MEMFILE_PRIVATE(idx, 0); 658 cft->seq_show = hugetlb_events_show; 659 cft->file_offset = offsetof(struct hugetlb_cgroup, events_file[idx]); 660 cft->flags = CFTYPE_NOT_ON_ROOT; 661 662 /* Add the events.local file */ 663 cft = &h->cgroup_files_dfl[5]; 664 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.events.local", buf); 665 cft->private = MEMFILE_PRIVATE(idx, 0); 666 cft->seq_show = hugetlb_events_local_show; 667 cft->file_offset = offsetof(struct hugetlb_cgroup, 668 events_local_file[idx]); 669 cft->flags = CFTYPE_NOT_ON_ROOT; 670 671 /* NULL terminate the last cft */ 672 cft = &h->cgroup_files_dfl[6]; 673 memset(cft, 0, sizeof(*cft)); 674 675 WARN_ON(cgroup_add_dfl_cftypes(&hugetlb_cgrp_subsys, 676 h->cgroup_files_dfl)); 677 } 678 679 static void __init __hugetlb_cgroup_file_legacy_init(int idx) 680 { 681 char buf[32]; 682 struct cftype *cft; 683 struct hstate *h = &hstates[idx]; 684 685 /* format the size */ 686 mem_fmt(buf, sizeof(buf), huge_page_size(h)); 687 688 /* Add the limit file */ 689 cft = &h->cgroup_files_legacy[0]; 690 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.limit_in_bytes", buf); 691 cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT); 692 cft->read_u64 = hugetlb_cgroup_read_u64; 693 cft->write = hugetlb_cgroup_write_legacy; 694 695 /* Add the reservation limit file */ 696 cft = &h->cgroup_files_legacy[1]; 697 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.limit_in_bytes", buf); 698 cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_LIMIT); 699 cft->read_u64 = hugetlb_cgroup_read_u64; 700 cft->write = hugetlb_cgroup_write_legacy; 701 702 /* Add the usage file */ 703 cft = &h->cgroup_files_legacy[2]; 704 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.usage_in_bytes", buf); 705 cft->private = MEMFILE_PRIVATE(idx, RES_USAGE); 706 cft->read_u64 = hugetlb_cgroup_read_u64; 707 708 /* Add the reservation usage file */ 709 cft = &h->cgroup_files_legacy[3]; 710 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.usage_in_bytes", buf); 711 cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_USAGE); 712 cft->read_u64 = hugetlb_cgroup_read_u64; 713 714 /* Add the MAX usage file */ 715 cft = &h->cgroup_files_legacy[4]; 716 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max_usage_in_bytes", buf); 717 cft->private = MEMFILE_PRIVATE(idx, RES_MAX_USAGE); 718 cft->write = hugetlb_cgroup_reset; 719 cft->read_u64 = hugetlb_cgroup_read_u64; 720 721 /* Add the MAX reservation usage file */ 722 cft = &h->cgroup_files_legacy[5]; 723 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.max_usage_in_bytes", buf); 724 cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_MAX_USAGE); 725 cft->write = hugetlb_cgroup_reset; 726 cft->read_u64 = hugetlb_cgroup_read_u64; 727 728 /* Add the failcntfile */ 729 cft = &h->cgroup_files_legacy[6]; 730 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.failcnt", buf); 731 cft->private = MEMFILE_PRIVATE(idx, RES_FAILCNT); 732 cft->write = hugetlb_cgroup_reset; 733 cft->read_u64 = hugetlb_cgroup_read_u64; 734 735 /* Add the reservation failcntfile */ 736 cft = &h->cgroup_files_legacy[7]; 737 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.failcnt", buf); 738 cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_FAILCNT); 739 cft->write = hugetlb_cgroup_reset; 740 cft->read_u64 = hugetlb_cgroup_read_u64; 741 742 /* NULL terminate the last cft */ 743 cft = &h->cgroup_files_legacy[8]; 744 memset(cft, 0, sizeof(*cft)); 745 746 WARN_ON(cgroup_add_legacy_cftypes(&hugetlb_cgrp_subsys, 747 h->cgroup_files_legacy)); 748 } 749 750 static void __init __hugetlb_cgroup_file_init(int idx) 751 { 752 __hugetlb_cgroup_file_dfl_init(idx); 753 __hugetlb_cgroup_file_legacy_init(idx); 754 } 755 756 void __init hugetlb_cgroup_file_init(void) 757 { 758 struct hstate *h; 759 760 for_each_hstate(h) { 761 /* 762 * Add cgroup control files only if the huge page consists 763 * of more than two normal pages. This is because we use 764 * page[2].private for storing cgroup details. 765 */ 766 if (huge_page_order(h) >= HUGETLB_CGROUP_MIN_ORDER) 767 __hugetlb_cgroup_file_init(hstate_index(h)); 768 } 769 } 770 771 /* 772 * hugetlb_lock will make sure a parallel cgroup rmdir won't happen 773 * when we migrate hugepages 774 */ 775 void hugetlb_cgroup_migrate(struct page *oldhpage, struct page *newhpage) 776 { 777 struct hugetlb_cgroup *h_cg; 778 struct hugetlb_cgroup *h_cg_rsvd; 779 struct hstate *h = page_hstate(oldhpage); 780 781 if (hugetlb_cgroup_disabled()) 782 return; 783 784 spin_lock_irq(&hugetlb_lock); 785 h_cg = hugetlb_cgroup_from_page(oldhpage); 786 h_cg_rsvd = hugetlb_cgroup_from_page_rsvd(oldhpage); 787 set_hugetlb_cgroup(oldhpage, NULL); 788 set_hugetlb_cgroup_rsvd(oldhpage, NULL); 789 790 /* move the h_cg details to new cgroup */ 791 set_hugetlb_cgroup(newhpage, h_cg); 792 set_hugetlb_cgroup_rsvd(newhpage, h_cg_rsvd); 793 list_move(&newhpage->lru, &h->hugepage_activelist); 794 spin_unlock_irq(&hugetlb_lock); 795 return; 796 } 797 798 static struct cftype hugetlb_files[] = { 799 {} /* terminate */ 800 }; 801 802 struct cgroup_subsys hugetlb_cgrp_subsys = { 803 .css_alloc = hugetlb_cgroup_css_alloc, 804 .css_offline = hugetlb_cgroup_css_offline, 805 .css_free = hugetlb_cgroup_css_free, 806 .dfl_cftypes = hugetlb_files, 807 .legacy_cftypes = hugetlb_files, 808 }; 809