1 /* 2 * 3 * Copyright IBM Corporation, 2012 4 * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> 5 * 6 * Cgroup v2 7 * Copyright (C) 2019 Red Hat, Inc. 8 * Author: Giuseppe Scrivano <gscrivan@redhat.com> 9 * 10 * This program is free software; you can redistribute it and/or modify it 11 * under the terms of version 2.1 of the GNU Lesser General Public License 12 * as published by the Free Software Foundation. 13 * 14 * This program is distributed in the hope that it would be useful, but 15 * WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 17 * 18 */ 19 20 #include <linux/cgroup.h> 21 #include <linux/page_counter.h> 22 #include <linux/slab.h> 23 #include <linux/hugetlb.h> 24 #include <linux/hugetlb_cgroup.h> 25 26 #define MEMFILE_PRIVATE(x, val) (((x) << 16) | (val)) 27 #define MEMFILE_IDX(val) (((val) >> 16) & 0xffff) 28 #define MEMFILE_ATTR(val) ((val) & 0xffff) 29 30 #define hugetlb_cgroup_from_counter(counter, idx) \ 31 container_of(counter, struct hugetlb_cgroup, hugepage[idx]) 32 33 static struct hugetlb_cgroup *root_h_cgroup __read_mostly; 34 35 static inline struct page_counter * 36 __hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup *h_cg, int idx, 37 bool rsvd) 38 { 39 if (rsvd) 40 return &h_cg->rsvd_hugepage[idx]; 41 return &h_cg->hugepage[idx]; 42 } 43 44 static inline struct page_counter * 45 hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup *h_cg, int idx) 46 { 47 return __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, false); 48 } 49 50 static inline struct page_counter * 51 hugetlb_cgroup_counter_from_cgroup_rsvd(struct hugetlb_cgroup *h_cg, int idx) 52 { 53 return __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, true); 54 } 55 56 static inline 57 struct hugetlb_cgroup *hugetlb_cgroup_from_css(struct cgroup_subsys_state *s) 58 { 59 return s ? container_of(s, struct hugetlb_cgroup, css) : NULL; 60 } 61 62 static inline 63 struct hugetlb_cgroup *hugetlb_cgroup_from_task(struct task_struct *task) 64 { 65 return hugetlb_cgroup_from_css(task_css(task, hugetlb_cgrp_id)); 66 } 67 68 static inline bool hugetlb_cgroup_is_root(struct hugetlb_cgroup *h_cg) 69 { 70 return (h_cg == root_h_cgroup); 71 } 72 73 static inline struct hugetlb_cgroup * 74 parent_hugetlb_cgroup(struct hugetlb_cgroup *h_cg) 75 { 76 return hugetlb_cgroup_from_css(h_cg->css.parent); 77 } 78 79 static inline bool hugetlb_cgroup_have_usage(struct hugetlb_cgroup *h_cg) 80 { 81 int idx; 82 83 for (idx = 0; idx < hugetlb_max_hstate; idx++) { 84 if (page_counter_read( 85 hugetlb_cgroup_counter_from_cgroup(h_cg, idx))) 86 return true; 87 } 88 return false; 89 } 90 91 static void hugetlb_cgroup_init(struct hugetlb_cgroup *h_cgroup, 92 struct hugetlb_cgroup *parent_h_cgroup) 93 { 94 int idx; 95 96 for (idx = 0; idx < HUGE_MAX_HSTATE; idx++) { 97 struct page_counter *fault_parent = NULL; 98 struct page_counter *rsvd_parent = NULL; 99 unsigned long limit; 100 int ret; 101 102 if (parent_h_cgroup) { 103 fault_parent = hugetlb_cgroup_counter_from_cgroup( 104 parent_h_cgroup, idx); 105 rsvd_parent = hugetlb_cgroup_counter_from_cgroup_rsvd( 106 parent_h_cgroup, idx); 107 } 108 page_counter_init(hugetlb_cgroup_counter_from_cgroup(h_cgroup, 109 idx), 110 fault_parent); 111 page_counter_init( 112 hugetlb_cgroup_counter_from_cgroup_rsvd(h_cgroup, idx), 113 rsvd_parent); 114 115 limit = round_down(PAGE_COUNTER_MAX, 116 1 << huge_page_order(&hstates[idx])); 117 118 ret = page_counter_set_max( 119 hugetlb_cgroup_counter_from_cgroup(h_cgroup, idx), 120 limit); 121 VM_BUG_ON(ret); 122 ret = page_counter_set_max( 123 hugetlb_cgroup_counter_from_cgroup_rsvd(h_cgroup, idx), 124 limit); 125 VM_BUG_ON(ret); 126 } 127 } 128 129 static struct cgroup_subsys_state * 130 hugetlb_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) 131 { 132 struct hugetlb_cgroup *parent_h_cgroup = hugetlb_cgroup_from_css(parent_css); 133 struct hugetlb_cgroup *h_cgroup; 134 135 h_cgroup = kzalloc(sizeof(*h_cgroup), GFP_KERNEL); 136 if (!h_cgroup) 137 return ERR_PTR(-ENOMEM); 138 139 if (!parent_h_cgroup) 140 root_h_cgroup = h_cgroup; 141 142 hugetlb_cgroup_init(h_cgroup, parent_h_cgroup); 143 return &h_cgroup->css; 144 } 145 146 static void hugetlb_cgroup_css_free(struct cgroup_subsys_state *css) 147 { 148 struct hugetlb_cgroup *h_cgroup; 149 150 h_cgroup = hugetlb_cgroup_from_css(css); 151 kfree(h_cgroup); 152 } 153 154 /* 155 * Should be called with hugetlb_lock held. 156 * Since we are holding hugetlb_lock, pages cannot get moved from 157 * active list or uncharged from the cgroup, So no need to get 158 * page reference and test for page active here. This function 159 * cannot fail. 160 */ 161 static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg, 162 struct page *page) 163 { 164 unsigned int nr_pages; 165 struct page_counter *counter; 166 struct hugetlb_cgroup *page_hcg; 167 struct hugetlb_cgroup *parent = parent_hugetlb_cgroup(h_cg); 168 169 page_hcg = hugetlb_cgroup_from_page(page); 170 /* 171 * We can have pages in active list without any cgroup 172 * ie, hugepage with less than 3 pages. We can safely 173 * ignore those pages. 174 */ 175 if (!page_hcg || page_hcg != h_cg) 176 goto out; 177 178 nr_pages = compound_nr(page); 179 if (!parent) { 180 parent = root_h_cgroup; 181 /* root has no limit */ 182 page_counter_charge(&parent->hugepage[idx], nr_pages); 183 } 184 counter = &h_cg->hugepage[idx]; 185 /* Take the pages off the local counter */ 186 page_counter_cancel(counter, nr_pages); 187 188 set_hugetlb_cgroup(page, parent); 189 out: 190 return; 191 } 192 193 /* 194 * Force the hugetlb cgroup to empty the hugetlb resources by moving them to 195 * the parent cgroup. 196 */ 197 static void hugetlb_cgroup_css_offline(struct cgroup_subsys_state *css) 198 { 199 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css); 200 struct hstate *h; 201 struct page *page; 202 int idx; 203 204 do { 205 idx = 0; 206 for_each_hstate(h) { 207 spin_lock(&hugetlb_lock); 208 list_for_each_entry(page, &h->hugepage_activelist, lru) 209 hugetlb_cgroup_move_parent(idx, h_cg, page); 210 211 spin_unlock(&hugetlb_lock); 212 idx++; 213 } 214 cond_resched(); 215 } while (hugetlb_cgroup_have_usage(h_cg)); 216 } 217 218 static inline void hugetlb_event(struct hugetlb_cgroup *hugetlb, int idx, 219 enum hugetlb_memory_event event) 220 { 221 atomic_long_inc(&hugetlb->events_local[idx][event]); 222 cgroup_file_notify(&hugetlb->events_local_file[idx]); 223 224 do { 225 atomic_long_inc(&hugetlb->events[idx][event]); 226 cgroup_file_notify(&hugetlb->events_file[idx]); 227 } while ((hugetlb = parent_hugetlb_cgroup(hugetlb)) && 228 !hugetlb_cgroup_is_root(hugetlb)); 229 } 230 231 static int __hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, 232 struct hugetlb_cgroup **ptr, 233 bool rsvd) 234 { 235 int ret = 0; 236 struct page_counter *counter; 237 struct hugetlb_cgroup *h_cg = NULL; 238 239 if (hugetlb_cgroup_disabled()) 240 goto done; 241 /* 242 * We don't charge any cgroup if the compound page have less 243 * than 3 pages. 244 */ 245 if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER) 246 goto done; 247 again: 248 rcu_read_lock(); 249 h_cg = hugetlb_cgroup_from_task(current); 250 if (!css_tryget(&h_cg->css)) { 251 rcu_read_unlock(); 252 goto again; 253 } 254 rcu_read_unlock(); 255 256 if (!page_counter_try_charge( 257 __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd), 258 nr_pages, &counter)) { 259 ret = -ENOMEM; 260 hugetlb_event(h_cg, idx, HUGETLB_MAX); 261 css_put(&h_cg->css); 262 goto done; 263 } 264 /* Reservations take a reference to the css because they do not get 265 * reparented. 266 */ 267 if (!rsvd) 268 css_put(&h_cg->css); 269 done: 270 *ptr = h_cg; 271 return ret; 272 } 273 274 int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, 275 struct hugetlb_cgroup **ptr) 276 { 277 return __hugetlb_cgroup_charge_cgroup(idx, nr_pages, ptr, false); 278 } 279 280 int hugetlb_cgroup_charge_cgroup_rsvd(int idx, unsigned long nr_pages, 281 struct hugetlb_cgroup **ptr) 282 { 283 return __hugetlb_cgroup_charge_cgroup(idx, nr_pages, ptr, true); 284 } 285 286 /* Should be called with hugetlb_lock held */ 287 static void __hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages, 288 struct hugetlb_cgroup *h_cg, 289 struct page *page, bool rsvd) 290 { 291 if (hugetlb_cgroup_disabled() || !h_cg) 292 return; 293 294 __set_hugetlb_cgroup(page, h_cg, rsvd); 295 return; 296 } 297 298 void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages, 299 struct hugetlb_cgroup *h_cg, 300 struct page *page) 301 { 302 __hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, page, false); 303 } 304 305 void hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages, 306 struct hugetlb_cgroup *h_cg, 307 struct page *page) 308 { 309 __hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, page, true); 310 } 311 312 /* 313 * Should be called with hugetlb_lock held 314 */ 315 static void __hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, 316 struct page *page, bool rsvd) 317 { 318 struct hugetlb_cgroup *h_cg; 319 320 if (hugetlb_cgroup_disabled()) 321 return; 322 lockdep_assert_held(&hugetlb_lock); 323 h_cg = __hugetlb_cgroup_from_page(page, rsvd); 324 if (unlikely(!h_cg)) 325 return; 326 __set_hugetlb_cgroup(page, NULL, rsvd); 327 328 page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg, idx, 329 rsvd), 330 nr_pages); 331 332 if (rsvd) 333 css_put(&h_cg->css); 334 335 return; 336 } 337 338 void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, 339 struct page *page) 340 { 341 __hugetlb_cgroup_uncharge_page(idx, nr_pages, page, false); 342 } 343 344 void hugetlb_cgroup_uncharge_page_rsvd(int idx, unsigned long nr_pages, 345 struct page *page) 346 { 347 __hugetlb_cgroup_uncharge_page(idx, nr_pages, page, true); 348 } 349 350 static void __hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages, 351 struct hugetlb_cgroup *h_cg, 352 bool rsvd) 353 { 354 if (hugetlb_cgroup_disabled() || !h_cg) 355 return; 356 357 if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER) 358 return; 359 360 page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg, idx, 361 rsvd), 362 nr_pages); 363 364 if (rsvd) 365 css_put(&h_cg->css); 366 } 367 368 void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages, 369 struct hugetlb_cgroup *h_cg) 370 { 371 __hugetlb_cgroup_uncharge_cgroup(idx, nr_pages, h_cg, false); 372 } 373 374 void hugetlb_cgroup_uncharge_cgroup_rsvd(int idx, unsigned long nr_pages, 375 struct hugetlb_cgroup *h_cg) 376 { 377 __hugetlb_cgroup_uncharge_cgroup(idx, nr_pages, h_cg, true); 378 } 379 380 void hugetlb_cgroup_uncharge_counter(struct resv_map *resv, unsigned long start, 381 unsigned long end) 382 { 383 if (hugetlb_cgroup_disabled() || !resv || !resv->reservation_counter || 384 !resv->css) 385 return; 386 387 page_counter_uncharge(resv->reservation_counter, 388 (end - start) * resv->pages_per_hpage); 389 css_put(resv->css); 390 } 391 392 void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv, 393 struct file_region *rg, 394 unsigned long nr_pages) 395 { 396 if (hugetlb_cgroup_disabled() || !resv || !rg || !nr_pages) 397 return; 398 399 if (rg->reservation_counter && resv->pages_per_hpage && nr_pages > 0 && 400 !resv->reservation_counter) { 401 page_counter_uncharge(rg->reservation_counter, 402 nr_pages * resv->pages_per_hpage); 403 css_put(rg->css); 404 } 405 } 406 407 enum { 408 RES_USAGE, 409 RES_RSVD_USAGE, 410 RES_LIMIT, 411 RES_RSVD_LIMIT, 412 RES_MAX_USAGE, 413 RES_RSVD_MAX_USAGE, 414 RES_FAILCNT, 415 RES_RSVD_FAILCNT, 416 }; 417 418 static u64 hugetlb_cgroup_read_u64(struct cgroup_subsys_state *css, 419 struct cftype *cft) 420 { 421 struct page_counter *counter; 422 struct page_counter *rsvd_counter; 423 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css); 424 425 counter = &h_cg->hugepage[MEMFILE_IDX(cft->private)]; 426 rsvd_counter = &h_cg->rsvd_hugepage[MEMFILE_IDX(cft->private)]; 427 428 switch (MEMFILE_ATTR(cft->private)) { 429 case RES_USAGE: 430 return (u64)page_counter_read(counter) * PAGE_SIZE; 431 case RES_RSVD_USAGE: 432 return (u64)page_counter_read(rsvd_counter) * PAGE_SIZE; 433 case RES_LIMIT: 434 return (u64)counter->max * PAGE_SIZE; 435 case RES_RSVD_LIMIT: 436 return (u64)rsvd_counter->max * PAGE_SIZE; 437 case RES_MAX_USAGE: 438 return (u64)counter->watermark * PAGE_SIZE; 439 case RES_RSVD_MAX_USAGE: 440 return (u64)rsvd_counter->watermark * PAGE_SIZE; 441 case RES_FAILCNT: 442 return counter->failcnt; 443 case RES_RSVD_FAILCNT: 444 return rsvd_counter->failcnt; 445 default: 446 BUG(); 447 } 448 } 449 450 static int hugetlb_cgroup_read_u64_max(struct seq_file *seq, void *v) 451 { 452 int idx; 453 u64 val; 454 struct cftype *cft = seq_cft(seq); 455 unsigned long limit; 456 struct page_counter *counter; 457 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq)); 458 459 idx = MEMFILE_IDX(cft->private); 460 counter = &h_cg->hugepage[idx]; 461 462 limit = round_down(PAGE_COUNTER_MAX, 463 1 << huge_page_order(&hstates[idx])); 464 465 switch (MEMFILE_ATTR(cft->private)) { 466 case RES_RSVD_USAGE: 467 counter = &h_cg->rsvd_hugepage[idx]; 468 fallthrough; 469 case RES_USAGE: 470 val = (u64)page_counter_read(counter); 471 seq_printf(seq, "%llu\n", val * PAGE_SIZE); 472 break; 473 case RES_RSVD_LIMIT: 474 counter = &h_cg->rsvd_hugepage[idx]; 475 fallthrough; 476 case RES_LIMIT: 477 val = (u64)counter->max; 478 if (val == limit) 479 seq_puts(seq, "max\n"); 480 else 481 seq_printf(seq, "%llu\n", val * PAGE_SIZE); 482 break; 483 default: 484 BUG(); 485 } 486 487 return 0; 488 } 489 490 static DEFINE_MUTEX(hugetlb_limit_mutex); 491 492 static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of, 493 char *buf, size_t nbytes, loff_t off, 494 const char *max) 495 { 496 int ret, idx; 497 unsigned long nr_pages; 498 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of)); 499 bool rsvd = false; 500 501 if (hugetlb_cgroup_is_root(h_cg)) /* Can't set limit on root */ 502 return -EINVAL; 503 504 buf = strstrip(buf); 505 ret = page_counter_memparse(buf, max, &nr_pages); 506 if (ret) 507 return ret; 508 509 idx = MEMFILE_IDX(of_cft(of)->private); 510 nr_pages = round_down(nr_pages, 1 << huge_page_order(&hstates[idx])); 511 512 switch (MEMFILE_ATTR(of_cft(of)->private)) { 513 case RES_RSVD_LIMIT: 514 rsvd = true; 515 fallthrough; 516 case RES_LIMIT: 517 mutex_lock(&hugetlb_limit_mutex); 518 ret = page_counter_set_max( 519 __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd), 520 nr_pages); 521 mutex_unlock(&hugetlb_limit_mutex); 522 break; 523 default: 524 ret = -EINVAL; 525 break; 526 } 527 return ret ?: nbytes; 528 } 529 530 static ssize_t hugetlb_cgroup_write_legacy(struct kernfs_open_file *of, 531 char *buf, size_t nbytes, loff_t off) 532 { 533 return hugetlb_cgroup_write(of, buf, nbytes, off, "-1"); 534 } 535 536 static ssize_t hugetlb_cgroup_write_dfl(struct kernfs_open_file *of, 537 char *buf, size_t nbytes, loff_t off) 538 { 539 return hugetlb_cgroup_write(of, buf, nbytes, off, "max"); 540 } 541 542 static ssize_t hugetlb_cgroup_reset(struct kernfs_open_file *of, 543 char *buf, size_t nbytes, loff_t off) 544 { 545 int ret = 0; 546 struct page_counter *counter, *rsvd_counter; 547 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of)); 548 549 counter = &h_cg->hugepage[MEMFILE_IDX(of_cft(of)->private)]; 550 rsvd_counter = &h_cg->rsvd_hugepage[MEMFILE_IDX(of_cft(of)->private)]; 551 552 switch (MEMFILE_ATTR(of_cft(of)->private)) { 553 case RES_MAX_USAGE: 554 page_counter_reset_watermark(counter); 555 break; 556 case RES_RSVD_MAX_USAGE: 557 page_counter_reset_watermark(rsvd_counter); 558 break; 559 case RES_FAILCNT: 560 counter->failcnt = 0; 561 break; 562 case RES_RSVD_FAILCNT: 563 rsvd_counter->failcnt = 0; 564 break; 565 default: 566 ret = -EINVAL; 567 break; 568 } 569 return ret ?: nbytes; 570 } 571 572 static char *mem_fmt(char *buf, int size, unsigned long hsize) 573 { 574 if (hsize >= (1UL << 30)) 575 snprintf(buf, size, "%luGB", hsize >> 30); 576 else if (hsize >= (1UL << 20)) 577 snprintf(buf, size, "%luMB", hsize >> 20); 578 else 579 snprintf(buf, size, "%luKB", hsize >> 10); 580 return buf; 581 } 582 583 static int __hugetlb_events_show(struct seq_file *seq, bool local) 584 { 585 int idx; 586 long max; 587 struct cftype *cft = seq_cft(seq); 588 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq)); 589 590 idx = MEMFILE_IDX(cft->private); 591 592 if (local) 593 max = atomic_long_read(&h_cg->events_local[idx][HUGETLB_MAX]); 594 else 595 max = atomic_long_read(&h_cg->events[idx][HUGETLB_MAX]); 596 597 seq_printf(seq, "max %lu\n", max); 598 599 return 0; 600 } 601 602 static int hugetlb_events_show(struct seq_file *seq, void *v) 603 { 604 return __hugetlb_events_show(seq, false); 605 } 606 607 static int hugetlb_events_local_show(struct seq_file *seq, void *v) 608 { 609 return __hugetlb_events_show(seq, true); 610 } 611 612 static void __init __hugetlb_cgroup_file_dfl_init(int idx) 613 { 614 char buf[32]; 615 struct cftype *cft; 616 struct hstate *h = &hstates[idx]; 617 618 /* format the size */ 619 mem_fmt(buf, sizeof(buf), huge_page_size(h)); 620 621 /* Add the limit file */ 622 cft = &h->cgroup_files_dfl[0]; 623 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max", buf); 624 cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT); 625 cft->seq_show = hugetlb_cgroup_read_u64_max; 626 cft->write = hugetlb_cgroup_write_dfl; 627 cft->flags = CFTYPE_NOT_ON_ROOT; 628 629 /* Add the reservation limit file */ 630 cft = &h->cgroup_files_dfl[1]; 631 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.max", buf); 632 cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_LIMIT); 633 cft->seq_show = hugetlb_cgroup_read_u64_max; 634 cft->write = hugetlb_cgroup_write_dfl; 635 cft->flags = CFTYPE_NOT_ON_ROOT; 636 637 /* Add the current usage file */ 638 cft = &h->cgroup_files_dfl[2]; 639 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.current", buf); 640 cft->private = MEMFILE_PRIVATE(idx, RES_USAGE); 641 cft->seq_show = hugetlb_cgroup_read_u64_max; 642 cft->flags = CFTYPE_NOT_ON_ROOT; 643 644 /* Add the current reservation usage file */ 645 cft = &h->cgroup_files_dfl[3]; 646 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.current", buf); 647 cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_USAGE); 648 cft->seq_show = hugetlb_cgroup_read_u64_max; 649 cft->flags = CFTYPE_NOT_ON_ROOT; 650 651 /* Add the events file */ 652 cft = &h->cgroup_files_dfl[4]; 653 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.events", buf); 654 cft->private = MEMFILE_PRIVATE(idx, 0); 655 cft->seq_show = hugetlb_events_show; 656 cft->file_offset = offsetof(struct hugetlb_cgroup, events_file[idx]); 657 cft->flags = CFTYPE_NOT_ON_ROOT; 658 659 /* Add the events.local file */ 660 cft = &h->cgroup_files_dfl[5]; 661 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.events.local", buf); 662 cft->private = MEMFILE_PRIVATE(idx, 0); 663 cft->seq_show = hugetlb_events_local_show; 664 cft->file_offset = offsetof(struct hugetlb_cgroup, 665 events_local_file[idx]); 666 cft->flags = CFTYPE_NOT_ON_ROOT; 667 668 /* NULL terminate the last cft */ 669 cft = &h->cgroup_files_dfl[6]; 670 memset(cft, 0, sizeof(*cft)); 671 672 WARN_ON(cgroup_add_dfl_cftypes(&hugetlb_cgrp_subsys, 673 h->cgroup_files_dfl)); 674 } 675 676 static void __init __hugetlb_cgroup_file_legacy_init(int idx) 677 { 678 char buf[32]; 679 struct cftype *cft; 680 struct hstate *h = &hstates[idx]; 681 682 /* format the size */ 683 mem_fmt(buf, sizeof(buf), huge_page_size(h)); 684 685 /* Add the limit file */ 686 cft = &h->cgroup_files_legacy[0]; 687 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.limit_in_bytes", buf); 688 cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT); 689 cft->read_u64 = hugetlb_cgroup_read_u64; 690 cft->write = hugetlb_cgroup_write_legacy; 691 692 /* Add the reservation limit file */ 693 cft = &h->cgroup_files_legacy[1]; 694 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.limit_in_bytes", buf); 695 cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_LIMIT); 696 cft->read_u64 = hugetlb_cgroup_read_u64; 697 cft->write = hugetlb_cgroup_write_legacy; 698 699 /* Add the usage file */ 700 cft = &h->cgroup_files_legacy[2]; 701 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.usage_in_bytes", buf); 702 cft->private = MEMFILE_PRIVATE(idx, RES_USAGE); 703 cft->read_u64 = hugetlb_cgroup_read_u64; 704 705 /* Add the reservation usage file */ 706 cft = &h->cgroup_files_legacy[3]; 707 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.usage_in_bytes", buf); 708 cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_USAGE); 709 cft->read_u64 = hugetlb_cgroup_read_u64; 710 711 /* Add the MAX usage file */ 712 cft = &h->cgroup_files_legacy[4]; 713 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max_usage_in_bytes", buf); 714 cft->private = MEMFILE_PRIVATE(idx, RES_MAX_USAGE); 715 cft->write = hugetlb_cgroup_reset; 716 cft->read_u64 = hugetlb_cgroup_read_u64; 717 718 /* Add the MAX reservation usage file */ 719 cft = &h->cgroup_files_legacy[5]; 720 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.max_usage_in_bytes", buf); 721 cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_MAX_USAGE); 722 cft->write = hugetlb_cgroup_reset; 723 cft->read_u64 = hugetlb_cgroup_read_u64; 724 725 /* Add the failcntfile */ 726 cft = &h->cgroup_files_legacy[6]; 727 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.failcnt", buf); 728 cft->private = MEMFILE_PRIVATE(idx, RES_FAILCNT); 729 cft->write = hugetlb_cgroup_reset; 730 cft->read_u64 = hugetlb_cgroup_read_u64; 731 732 /* Add the reservation failcntfile */ 733 cft = &h->cgroup_files_legacy[7]; 734 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.failcnt", buf); 735 cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_FAILCNT); 736 cft->write = hugetlb_cgroup_reset; 737 cft->read_u64 = hugetlb_cgroup_read_u64; 738 739 /* NULL terminate the last cft */ 740 cft = &h->cgroup_files_legacy[8]; 741 memset(cft, 0, sizeof(*cft)); 742 743 WARN_ON(cgroup_add_legacy_cftypes(&hugetlb_cgrp_subsys, 744 h->cgroup_files_legacy)); 745 } 746 747 static void __init __hugetlb_cgroup_file_init(int idx) 748 { 749 __hugetlb_cgroup_file_dfl_init(idx); 750 __hugetlb_cgroup_file_legacy_init(idx); 751 } 752 753 void __init hugetlb_cgroup_file_init(void) 754 { 755 struct hstate *h; 756 757 for_each_hstate(h) { 758 /* 759 * Add cgroup control files only if the huge page consists 760 * of more than two normal pages. This is because we use 761 * page[2].private for storing cgroup details. 762 */ 763 if (huge_page_order(h) >= HUGETLB_CGROUP_MIN_ORDER) 764 __hugetlb_cgroup_file_init(hstate_index(h)); 765 } 766 } 767 768 /* 769 * hugetlb_lock will make sure a parallel cgroup rmdir won't happen 770 * when we migrate hugepages 771 */ 772 void hugetlb_cgroup_migrate(struct page *oldhpage, struct page *newhpage) 773 { 774 struct hugetlb_cgroup *h_cg; 775 struct hugetlb_cgroup *h_cg_rsvd; 776 struct hstate *h = page_hstate(oldhpage); 777 778 if (hugetlb_cgroup_disabled()) 779 return; 780 781 VM_BUG_ON_PAGE(!PageHuge(oldhpage), oldhpage); 782 spin_lock(&hugetlb_lock); 783 h_cg = hugetlb_cgroup_from_page(oldhpage); 784 h_cg_rsvd = hugetlb_cgroup_from_page_rsvd(oldhpage); 785 set_hugetlb_cgroup(oldhpage, NULL); 786 set_hugetlb_cgroup_rsvd(oldhpage, NULL); 787 788 /* move the h_cg details to new cgroup */ 789 set_hugetlb_cgroup(newhpage, h_cg); 790 set_hugetlb_cgroup_rsvd(newhpage, h_cg_rsvd); 791 list_move(&newhpage->lru, &h->hugepage_activelist); 792 spin_unlock(&hugetlb_lock); 793 return; 794 } 795 796 static struct cftype hugetlb_files[] = { 797 {} /* terminate */ 798 }; 799 800 struct cgroup_subsys hugetlb_cgrp_subsys = { 801 .css_alloc = hugetlb_cgroup_css_alloc, 802 .css_offline = hugetlb_cgroup_css_offline, 803 .css_free = hugetlb_cgroup_css_free, 804 .dfl_cftypes = hugetlb_files, 805 .legacy_cftypes = hugetlb_files, 806 }; 807