1 /* 2 * User interface for Resource Alloction in Resource Director Technology(RDT) 3 * 4 * Copyright (C) 2016 Intel Corporation 5 * 6 * Author: Fenghua Yu <fenghua.yu@intel.com> 7 * 8 * This program is free software; you can redistribute it and/or modify it 9 * under the terms and conditions of the GNU General Public License, 10 * version 2, as published by the Free Software Foundation. 11 * 12 * This program is distributed in the hope it will be useful, but WITHOUT 13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 15 * more details. 16 * 17 * More information about RDT be found in the Intel (R) x86 Architecture 18 * Software Developer Manual. 19 */ 20 21 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 22 23 #include <linux/cacheinfo.h> 24 #include <linux/cpu.h> 25 #include <linux/debugfs.h> 26 #include <linux/fs.h> 27 #include <linux/fs_parser.h> 28 #include <linux/sysfs.h> 29 #include <linux/kernfs.h> 30 #include <linux/seq_buf.h> 31 #include <linux/seq_file.h> 32 #include <linux/sched/signal.h> 33 #include <linux/sched/task.h> 34 #include <linux/slab.h> 35 #include <linux/task_work.h> 36 #include <linux/user_namespace.h> 37 38 #include <uapi/linux/magic.h> 39 40 #include <asm/resctrl_sched.h> 41 #include "internal.h" 42 43 DEFINE_STATIC_KEY_FALSE(rdt_enable_key); 44 DEFINE_STATIC_KEY_FALSE(rdt_mon_enable_key); 45 DEFINE_STATIC_KEY_FALSE(rdt_alloc_enable_key); 46 static struct kernfs_root *rdt_root; 47 struct rdtgroup rdtgroup_default; 48 LIST_HEAD(rdt_all_groups); 49 50 /* Kernel fs node for "info" directory under root */ 51 static struct kernfs_node *kn_info; 52 53 /* Kernel fs node for "mon_groups" directory under root */ 54 static struct kernfs_node *kn_mongrp; 55 56 /* Kernel fs node for "mon_data" directory under root */ 57 static struct kernfs_node *kn_mondata; 58 59 static struct seq_buf last_cmd_status; 60 static char last_cmd_status_buf[512]; 61 62 struct dentry *debugfs_resctrl; 63 64 void rdt_last_cmd_clear(void) 65 { 66 lockdep_assert_held(&rdtgroup_mutex); 67 seq_buf_clear(&last_cmd_status); 68 } 69 70 void rdt_last_cmd_puts(const char *s) 71 { 72 lockdep_assert_held(&rdtgroup_mutex); 73 seq_buf_puts(&last_cmd_status, s); 74 } 75 76 void rdt_last_cmd_printf(const char *fmt, ...) 77 { 78 va_list ap; 79 80 va_start(ap, fmt); 81 lockdep_assert_held(&rdtgroup_mutex); 82 seq_buf_vprintf(&last_cmd_status, fmt, ap); 83 va_end(ap); 84 } 85 86 /* 87 * Trivial allocator for CLOSIDs. Since h/w only supports a small number, 88 * we can keep a bitmap of free CLOSIDs in a single integer. 89 * 90 * Using a global CLOSID across all resources has some advantages and 91 * some drawbacks: 92 * + We can simply set "current->closid" to assign a task to a resource 93 * group. 94 * + Context switch code can avoid extra memory references deciding which 95 * CLOSID to load into the PQR_ASSOC MSR 96 * - We give up some options in configuring resource groups across multi-socket 97 * systems. 98 * - Our choices on how to configure each resource become progressively more 99 * limited as the number of resources grows. 100 */ 101 static int closid_free_map; 102 static int closid_free_map_len; 103 104 int closids_supported(void) 105 { 106 return closid_free_map_len; 107 } 108 109 static void closid_init(void) 110 { 111 struct rdt_resource *r; 112 int rdt_min_closid = 32; 113 114 /* Compute rdt_min_closid across all resources */ 115 for_each_alloc_enabled_rdt_resource(r) 116 rdt_min_closid = min(rdt_min_closid, r->num_closid); 117 118 closid_free_map = BIT_MASK(rdt_min_closid) - 1; 119 120 /* CLOSID 0 is always reserved for the default group */ 121 closid_free_map &= ~1; 122 closid_free_map_len = rdt_min_closid; 123 } 124 125 static int closid_alloc(void) 126 { 127 u32 closid = ffs(closid_free_map); 128 129 if (closid == 0) 130 return -ENOSPC; 131 closid--; 132 closid_free_map &= ~(1 << closid); 133 134 return closid; 135 } 136 137 void closid_free(int closid) 138 { 139 closid_free_map |= 1 << closid; 140 } 141 142 /** 143 * closid_allocated - test if provided closid is in use 144 * @closid: closid to be tested 145 * 146 * Return: true if @closid is currently associated with a resource group, 147 * false if @closid is free 148 */ 149 static bool closid_allocated(unsigned int closid) 150 { 151 return (closid_free_map & (1 << closid)) == 0; 152 } 153 154 /** 155 * rdtgroup_mode_by_closid - Return mode of resource group with closid 156 * @closid: closid if the resource group 157 * 158 * Each resource group is associated with a @closid. Here the mode 159 * of a resource group can be queried by searching for it using its closid. 160 * 161 * Return: mode as &enum rdtgrp_mode of resource group with closid @closid 162 */ 163 enum rdtgrp_mode rdtgroup_mode_by_closid(int closid) 164 { 165 struct rdtgroup *rdtgrp; 166 167 list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) { 168 if (rdtgrp->closid == closid) 169 return rdtgrp->mode; 170 } 171 172 return RDT_NUM_MODES; 173 } 174 175 static const char * const rdt_mode_str[] = { 176 [RDT_MODE_SHAREABLE] = "shareable", 177 [RDT_MODE_EXCLUSIVE] = "exclusive", 178 [RDT_MODE_PSEUDO_LOCKSETUP] = "pseudo-locksetup", 179 [RDT_MODE_PSEUDO_LOCKED] = "pseudo-locked", 180 }; 181 182 /** 183 * rdtgroup_mode_str - Return the string representation of mode 184 * @mode: the resource group mode as &enum rdtgroup_mode 185 * 186 * Return: string representation of valid mode, "unknown" otherwise 187 */ 188 static const char *rdtgroup_mode_str(enum rdtgrp_mode mode) 189 { 190 if (mode < RDT_MODE_SHAREABLE || mode >= RDT_NUM_MODES) 191 return "unknown"; 192 193 return rdt_mode_str[mode]; 194 } 195 196 /* set uid and gid of rdtgroup dirs and files to that of the creator */ 197 static int rdtgroup_kn_set_ugid(struct kernfs_node *kn) 198 { 199 struct iattr iattr = { .ia_valid = ATTR_UID | ATTR_GID, 200 .ia_uid = current_fsuid(), 201 .ia_gid = current_fsgid(), }; 202 203 if (uid_eq(iattr.ia_uid, GLOBAL_ROOT_UID) && 204 gid_eq(iattr.ia_gid, GLOBAL_ROOT_GID)) 205 return 0; 206 207 return kernfs_setattr(kn, &iattr); 208 } 209 210 static int rdtgroup_add_file(struct kernfs_node *parent_kn, struct rftype *rft) 211 { 212 struct kernfs_node *kn; 213 int ret; 214 215 kn = __kernfs_create_file(parent_kn, rft->name, rft->mode, 216 GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 217 0, rft->kf_ops, rft, NULL, NULL); 218 if (IS_ERR(kn)) 219 return PTR_ERR(kn); 220 221 ret = rdtgroup_kn_set_ugid(kn); 222 if (ret) { 223 kernfs_remove(kn); 224 return ret; 225 } 226 227 return 0; 228 } 229 230 static int rdtgroup_seqfile_show(struct seq_file *m, void *arg) 231 { 232 struct kernfs_open_file *of = m->private; 233 struct rftype *rft = of->kn->priv; 234 235 if (rft->seq_show) 236 return rft->seq_show(of, m, arg); 237 return 0; 238 } 239 240 static ssize_t rdtgroup_file_write(struct kernfs_open_file *of, char *buf, 241 size_t nbytes, loff_t off) 242 { 243 struct rftype *rft = of->kn->priv; 244 245 if (rft->write) 246 return rft->write(of, buf, nbytes, off); 247 248 return -EINVAL; 249 } 250 251 static struct kernfs_ops rdtgroup_kf_single_ops = { 252 .atomic_write_len = PAGE_SIZE, 253 .write = rdtgroup_file_write, 254 .seq_show = rdtgroup_seqfile_show, 255 }; 256 257 static struct kernfs_ops kf_mondata_ops = { 258 .atomic_write_len = PAGE_SIZE, 259 .seq_show = rdtgroup_mondata_show, 260 }; 261 262 static bool is_cpu_list(struct kernfs_open_file *of) 263 { 264 struct rftype *rft = of->kn->priv; 265 266 return rft->flags & RFTYPE_FLAGS_CPUS_LIST; 267 } 268 269 static int rdtgroup_cpus_show(struct kernfs_open_file *of, 270 struct seq_file *s, void *v) 271 { 272 struct rdtgroup *rdtgrp; 273 struct cpumask *mask; 274 int ret = 0; 275 276 rdtgrp = rdtgroup_kn_lock_live(of->kn); 277 278 if (rdtgrp) { 279 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) { 280 if (!rdtgrp->plr->d) { 281 rdt_last_cmd_clear(); 282 rdt_last_cmd_puts("Cache domain offline\n"); 283 ret = -ENODEV; 284 } else { 285 mask = &rdtgrp->plr->d->cpu_mask; 286 seq_printf(s, is_cpu_list(of) ? 287 "%*pbl\n" : "%*pb\n", 288 cpumask_pr_args(mask)); 289 } 290 } else { 291 seq_printf(s, is_cpu_list(of) ? "%*pbl\n" : "%*pb\n", 292 cpumask_pr_args(&rdtgrp->cpu_mask)); 293 } 294 } else { 295 ret = -ENOENT; 296 } 297 rdtgroup_kn_unlock(of->kn); 298 299 return ret; 300 } 301 302 /* 303 * This is safe against resctrl_sched_in() called from __switch_to() 304 * because __switch_to() is executed with interrupts disabled. A local call 305 * from update_closid_rmid() is proteced against __switch_to() because 306 * preemption is disabled. 307 */ 308 static void update_cpu_closid_rmid(void *info) 309 { 310 struct rdtgroup *r = info; 311 312 if (r) { 313 this_cpu_write(pqr_state.default_closid, r->closid); 314 this_cpu_write(pqr_state.default_rmid, r->mon.rmid); 315 } 316 317 /* 318 * We cannot unconditionally write the MSR because the current 319 * executing task might have its own closid selected. Just reuse 320 * the context switch code. 321 */ 322 resctrl_sched_in(); 323 } 324 325 /* 326 * Update the PGR_ASSOC MSR on all cpus in @cpu_mask, 327 * 328 * Per task closids/rmids must have been set up before calling this function. 329 */ 330 static void 331 update_closid_rmid(const struct cpumask *cpu_mask, struct rdtgroup *r) 332 { 333 int cpu = get_cpu(); 334 335 if (cpumask_test_cpu(cpu, cpu_mask)) 336 update_cpu_closid_rmid(r); 337 smp_call_function_many(cpu_mask, update_cpu_closid_rmid, r, 1); 338 put_cpu(); 339 } 340 341 static int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask, 342 cpumask_var_t tmpmask) 343 { 344 struct rdtgroup *prgrp = rdtgrp->mon.parent, *crgrp; 345 struct list_head *head; 346 347 /* Check whether cpus belong to parent ctrl group */ 348 cpumask_andnot(tmpmask, newmask, &prgrp->cpu_mask); 349 if (cpumask_weight(tmpmask)) { 350 rdt_last_cmd_puts("Can only add CPUs to mongroup that belong to parent\n"); 351 return -EINVAL; 352 } 353 354 /* Check whether cpus are dropped from this group */ 355 cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask); 356 if (cpumask_weight(tmpmask)) { 357 /* Give any dropped cpus to parent rdtgroup */ 358 cpumask_or(&prgrp->cpu_mask, &prgrp->cpu_mask, tmpmask); 359 update_closid_rmid(tmpmask, prgrp); 360 } 361 362 /* 363 * If we added cpus, remove them from previous group that owned them 364 * and update per-cpu rmid 365 */ 366 cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask); 367 if (cpumask_weight(tmpmask)) { 368 head = &prgrp->mon.crdtgrp_list; 369 list_for_each_entry(crgrp, head, mon.crdtgrp_list) { 370 if (crgrp == rdtgrp) 371 continue; 372 cpumask_andnot(&crgrp->cpu_mask, &crgrp->cpu_mask, 373 tmpmask); 374 } 375 update_closid_rmid(tmpmask, rdtgrp); 376 } 377 378 /* Done pushing/pulling - update this group with new mask */ 379 cpumask_copy(&rdtgrp->cpu_mask, newmask); 380 381 return 0; 382 } 383 384 static void cpumask_rdtgrp_clear(struct rdtgroup *r, struct cpumask *m) 385 { 386 struct rdtgroup *crgrp; 387 388 cpumask_andnot(&r->cpu_mask, &r->cpu_mask, m); 389 /* update the child mon group masks as well*/ 390 list_for_each_entry(crgrp, &r->mon.crdtgrp_list, mon.crdtgrp_list) 391 cpumask_and(&crgrp->cpu_mask, &r->cpu_mask, &crgrp->cpu_mask); 392 } 393 394 static int cpus_ctrl_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask, 395 cpumask_var_t tmpmask, cpumask_var_t tmpmask1) 396 { 397 struct rdtgroup *r, *crgrp; 398 struct list_head *head; 399 400 /* Check whether cpus are dropped from this group */ 401 cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask); 402 if (cpumask_weight(tmpmask)) { 403 /* Can't drop from default group */ 404 if (rdtgrp == &rdtgroup_default) { 405 rdt_last_cmd_puts("Can't drop CPUs from default group\n"); 406 return -EINVAL; 407 } 408 409 /* Give any dropped cpus to rdtgroup_default */ 410 cpumask_or(&rdtgroup_default.cpu_mask, 411 &rdtgroup_default.cpu_mask, tmpmask); 412 update_closid_rmid(tmpmask, &rdtgroup_default); 413 } 414 415 /* 416 * If we added cpus, remove them from previous group and 417 * the prev group's child groups that owned them 418 * and update per-cpu closid/rmid. 419 */ 420 cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask); 421 if (cpumask_weight(tmpmask)) { 422 list_for_each_entry(r, &rdt_all_groups, rdtgroup_list) { 423 if (r == rdtgrp) 424 continue; 425 cpumask_and(tmpmask1, &r->cpu_mask, tmpmask); 426 if (cpumask_weight(tmpmask1)) 427 cpumask_rdtgrp_clear(r, tmpmask1); 428 } 429 update_closid_rmid(tmpmask, rdtgrp); 430 } 431 432 /* Done pushing/pulling - update this group with new mask */ 433 cpumask_copy(&rdtgrp->cpu_mask, newmask); 434 435 /* 436 * Clear child mon group masks since there is a new parent mask 437 * now and update the rmid for the cpus the child lost. 438 */ 439 head = &rdtgrp->mon.crdtgrp_list; 440 list_for_each_entry(crgrp, head, mon.crdtgrp_list) { 441 cpumask_and(tmpmask, &rdtgrp->cpu_mask, &crgrp->cpu_mask); 442 update_closid_rmid(tmpmask, rdtgrp); 443 cpumask_clear(&crgrp->cpu_mask); 444 } 445 446 return 0; 447 } 448 449 static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of, 450 char *buf, size_t nbytes, loff_t off) 451 { 452 cpumask_var_t tmpmask, newmask, tmpmask1; 453 struct rdtgroup *rdtgrp; 454 int ret; 455 456 if (!buf) 457 return -EINVAL; 458 459 if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL)) 460 return -ENOMEM; 461 if (!zalloc_cpumask_var(&newmask, GFP_KERNEL)) { 462 free_cpumask_var(tmpmask); 463 return -ENOMEM; 464 } 465 if (!zalloc_cpumask_var(&tmpmask1, GFP_KERNEL)) { 466 free_cpumask_var(tmpmask); 467 free_cpumask_var(newmask); 468 return -ENOMEM; 469 } 470 471 rdtgrp = rdtgroup_kn_lock_live(of->kn); 472 rdt_last_cmd_clear(); 473 if (!rdtgrp) { 474 ret = -ENOENT; 475 rdt_last_cmd_puts("Directory was removed\n"); 476 goto unlock; 477 } 478 479 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED || 480 rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { 481 ret = -EINVAL; 482 rdt_last_cmd_puts("Pseudo-locking in progress\n"); 483 goto unlock; 484 } 485 486 if (is_cpu_list(of)) 487 ret = cpulist_parse(buf, newmask); 488 else 489 ret = cpumask_parse(buf, newmask); 490 491 if (ret) { 492 rdt_last_cmd_puts("Bad CPU list/mask\n"); 493 goto unlock; 494 } 495 496 /* check that user didn't specify any offline cpus */ 497 cpumask_andnot(tmpmask, newmask, cpu_online_mask); 498 if (cpumask_weight(tmpmask)) { 499 ret = -EINVAL; 500 rdt_last_cmd_puts("Can only assign online CPUs\n"); 501 goto unlock; 502 } 503 504 if (rdtgrp->type == RDTCTRL_GROUP) 505 ret = cpus_ctrl_write(rdtgrp, newmask, tmpmask, tmpmask1); 506 else if (rdtgrp->type == RDTMON_GROUP) 507 ret = cpus_mon_write(rdtgrp, newmask, tmpmask); 508 else 509 ret = -EINVAL; 510 511 unlock: 512 rdtgroup_kn_unlock(of->kn); 513 free_cpumask_var(tmpmask); 514 free_cpumask_var(newmask); 515 free_cpumask_var(tmpmask1); 516 517 return ret ?: nbytes; 518 } 519 520 struct task_move_callback { 521 struct callback_head work; 522 struct rdtgroup *rdtgrp; 523 }; 524 525 static void move_myself(struct callback_head *head) 526 { 527 struct task_move_callback *callback; 528 struct rdtgroup *rdtgrp; 529 530 callback = container_of(head, struct task_move_callback, work); 531 rdtgrp = callback->rdtgrp; 532 533 /* 534 * If resource group was deleted before this task work callback 535 * was invoked, then assign the task to root group and free the 536 * resource group. 537 */ 538 if (atomic_dec_and_test(&rdtgrp->waitcount) && 539 (rdtgrp->flags & RDT_DELETED)) { 540 current->closid = 0; 541 current->rmid = 0; 542 kfree(rdtgrp); 543 } 544 545 preempt_disable(); 546 /* update PQR_ASSOC MSR to make resource group go into effect */ 547 resctrl_sched_in(); 548 preempt_enable(); 549 550 kfree(callback); 551 } 552 553 static int __rdtgroup_move_task(struct task_struct *tsk, 554 struct rdtgroup *rdtgrp) 555 { 556 struct task_move_callback *callback; 557 int ret; 558 559 callback = kzalloc(sizeof(*callback), GFP_KERNEL); 560 if (!callback) 561 return -ENOMEM; 562 callback->work.func = move_myself; 563 callback->rdtgrp = rdtgrp; 564 565 /* 566 * Take a refcount, so rdtgrp cannot be freed before the 567 * callback has been invoked. 568 */ 569 atomic_inc(&rdtgrp->waitcount); 570 ret = task_work_add(tsk, &callback->work, true); 571 if (ret) { 572 /* 573 * Task is exiting. Drop the refcount and free the callback. 574 * No need to check the refcount as the group cannot be 575 * deleted before the write function unlocks rdtgroup_mutex. 576 */ 577 atomic_dec(&rdtgrp->waitcount); 578 kfree(callback); 579 rdt_last_cmd_puts("Task exited\n"); 580 } else { 581 /* 582 * For ctrl_mon groups move both closid and rmid. 583 * For monitor groups, can move the tasks only from 584 * their parent CTRL group. 585 */ 586 if (rdtgrp->type == RDTCTRL_GROUP) { 587 tsk->closid = rdtgrp->closid; 588 tsk->rmid = rdtgrp->mon.rmid; 589 } else if (rdtgrp->type == RDTMON_GROUP) { 590 if (rdtgrp->mon.parent->closid == tsk->closid) { 591 tsk->rmid = rdtgrp->mon.rmid; 592 } else { 593 rdt_last_cmd_puts("Can't move task to different control group\n"); 594 ret = -EINVAL; 595 } 596 } 597 } 598 return ret; 599 } 600 601 /** 602 * rdtgroup_tasks_assigned - Test if tasks have been assigned to resource group 603 * @r: Resource group 604 * 605 * Return: 1 if tasks have been assigned to @r, 0 otherwise 606 */ 607 int rdtgroup_tasks_assigned(struct rdtgroup *r) 608 { 609 struct task_struct *p, *t; 610 int ret = 0; 611 612 lockdep_assert_held(&rdtgroup_mutex); 613 614 rcu_read_lock(); 615 for_each_process_thread(p, t) { 616 if ((r->type == RDTCTRL_GROUP && t->closid == r->closid) || 617 (r->type == RDTMON_GROUP && t->rmid == r->mon.rmid)) { 618 ret = 1; 619 break; 620 } 621 } 622 rcu_read_unlock(); 623 624 return ret; 625 } 626 627 static int rdtgroup_task_write_permission(struct task_struct *task, 628 struct kernfs_open_file *of) 629 { 630 const struct cred *tcred = get_task_cred(task); 631 const struct cred *cred = current_cred(); 632 int ret = 0; 633 634 /* 635 * Even if we're attaching all tasks in the thread group, we only 636 * need to check permissions on one of them. 637 */ 638 if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) && 639 !uid_eq(cred->euid, tcred->uid) && 640 !uid_eq(cred->euid, tcred->suid)) { 641 rdt_last_cmd_printf("No permission to move task %d\n", task->pid); 642 ret = -EPERM; 643 } 644 645 put_cred(tcred); 646 return ret; 647 } 648 649 static int rdtgroup_move_task(pid_t pid, struct rdtgroup *rdtgrp, 650 struct kernfs_open_file *of) 651 { 652 struct task_struct *tsk; 653 int ret; 654 655 rcu_read_lock(); 656 if (pid) { 657 tsk = find_task_by_vpid(pid); 658 if (!tsk) { 659 rcu_read_unlock(); 660 rdt_last_cmd_printf("No task %d\n", pid); 661 return -ESRCH; 662 } 663 } else { 664 tsk = current; 665 } 666 667 get_task_struct(tsk); 668 rcu_read_unlock(); 669 670 ret = rdtgroup_task_write_permission(tsk, of); 671 if (!ret) 672 ret = __rdtgroup_move_task(tsk, rdtgrp); 673 674 put_task_struct(tsk); 675 return ret; 676 } 677 678 static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of, 679 char *buf, size_t nbytes, loff_t off) 680 { 681 struct rdtgroup *rdtgrp; 682 int ret = 0; 683 pid_t pid; 684 685 if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0) 686 return -EINVAL; 687 rdtgrp = rdtgroup_kn_lock_live(of->kn); 688 if (!rdtgrp) { 689 rdtgroup_kn_unlock(of->kn); 690 return -ENOENT; 691 } 692 rdt_last_cmd_clear(); 693 694 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED || 695 rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { 696 ret = -EINVAL; 697 rdt_last_cmd_puts("Pseudo-locking in progress\n"); 698 goto unlock; 699 } 700 701 ret = rdtgroup_move_task(pid, rdtgrp, of); 702 703 unlock: 704 rdtgroup_kn_unlock(of->kn); 705 706 return ret ?: nbytes; 707 } 708 709 static void show_rdt_tasks(struct rdtgroup *r, struct seq_file *s) 710 { 711 struct task_struct *p, *t; 712 713 rcu_read_lock(); 714 for_each_process_thread(p, t) { 715 if ((r->type == RDTCTRL_GROUP && t->closid == r->closid) || 716 (r->type == RDTMON_GROUP && t->rmid == r->mon.rmid)) 717 seq_printf(s, "%d\n", t->pid); 718 } 719 rcu_read_unlock(); 720 } 721 722 static int rdtgroup_tasks_show(struct kernfs_open_file *of, 723 struct seq_file *s, void *v) 724 { 725 struct rdtgroup *rdtgrp; 726 int ret = 0; 727 728 rdtgrp = rdtgroup_kn_lock_live(of->kn); 729 if (rdtgrp) 730 show_rdt_tasks(rdtgrp, s); 731 else 732 ret = -ENOENT; 733 rdtgroup_kn_unlock(of->kn); 734 735 return ret; 736 } 737 738 static int rdt_last_cmd_status_show(struct kernfs_open_file *of, 739 struct seq_file *seq, void *v) 740 { 741 int len; 742 743 mutex_lock(&rdtgroup_mutex); 744 len = seq_buf_used(&last_cmd_status); 745 if (len) 746 seq_printf(seq, "%.*s", len, last_cmd_status_buf); 747 else 748 seq_puts(seq, "ok\n"); 749 mutex_unlock(&rdtgroup_mutex); 750 return 0; 751 } 752 753 static int rdt_num_closids_show(struct kernfs_open_file *of, 754 struct seq_file *seq, void *v) 755 { 756 struct rdt_resource *r = of->kn->parent->priv; 757 758 seq_printf(seq, "%d\n", r->num_closid); 759 return 0; 760 } 761 762 static int rdt_default_ctrl_show(struct kernfs_open_file *of, 763 struct seq_file *seq, void *v) 764 { 765 struct rdt_resource *r = of->kn->parent->priv; 766 767 seq_printf(seq, "%x\n", r->default_ctrl); 768 return 0; 769 } 770 771 static int rdt_min_cbm_bits_show(struct kernfs_open_file *of, 772 struct seq_file *seq, void *v) 773 { 774 struct rdt_resource *r = of->kn->parent->priv; 775 776 seq_printf(seq, "%u\n", r->cache.min_cbm_bits); 777 return 0; 778 } 779 780 static int rdt_shareable_bits_show(struct kernfs_open_file *of, 781 struct seq_file *seq, void *v) 782 { 783 struct rdt_resource *r = of->kn->parent->priv; 784 785 seq_printf(seq, "%x\n", r->cache.shareable_bits); 786 return 0; 787 } 788 789 /** 790 * rdt_bit_usage_show - Display current usage of resources 791 * 792 * A domain is a shared resource that can now be allocated differently. Here 793 * we display the current regions of the domain as an annotated bitmask. 794 * For each domain of this resource its allocation bitmask 795 * is annotated as below to indicate the current usage of the corresponding bit: 796 * 0 - currently unused 797 * X - currently available for sharing and used by software and hardware 798 * H - currently used by hardware only but available for software use 799 * S - currently used and shareable by software only 800 * E - currently used exclusively by one resource group 801 * P - currently pseudo-locked by one resource group 802 */ 803 static int rdt_bit_usage_show(struct kernfs_open_file *of, 804 struct seq_file *seq, void *v) 805 { 806 struct rdt_resource *r = of->kn->parent->priv; 807 u32 sw_shareable = 0, hw_shareable = 0; 808 u32 exclusive = 0, pseudo_locked = 0; 809 struct rdt_domain *dom; 810 int i, hwb, swb, excl, psl; 811 enum rdtgrp_mode mode; 812 bool sep = false; 813 u32 *ctrl; 814 815 mutex_lock(&rdtgroup_mutex); 816 hw_shareable = r->cache.shareable_bits; 817 list_for_each_entry(dom, &r->domains, list) { 818 if (sep) 819 seq_putc(seq, ';'); 820 ctrl = dom->ctrl_val; 821 sw_shareable = 0; 822 exclusive = 0; 823 seq_printf(seq, "%d=", dom->id); 824 for (i = 0; i < closids_supported(); i++, ctrl++) { 825 if (!closid_allocated(i)) 826 continue; 827 mode = rdtgroup_mode_by_closid(i); 828 switch (mode) { 829 case RDT_MODE_SHAREABLE: 830 sw_shareable |= *ctrl; 831 break; 832 case RDT_MODE_EXCLUSIVE: 833 exclusive |= *ctrl; 834 break; 835 case RDT_MODE_PSEUDO_LOCKSETUP: 836 /* 837 * RDT_MODE_PSEUDO_LOCKSETUP is possible 838 * here but not included since the CBM 839 * associated with this CLOSID in this mode 840 * is not initialized and no task or cpu can be 841 * assigned this CLOSID. 842 */ 843 break; 844 case RDT_MODE_PSEUDO_LOCKED: 845 case RDT_NUM_MODES: 846 WARN(1, 847 "invalid mode for closid %d\n", i); 848 break; 849 } 850 } 851 for (i = r->cache.cbm_len - 1; i >= 0; i--) { 852 pseudo_locked = dom->plr ? dom->plr->cbm : 0; 853 hwb = test_bit(i, (unsigned long *)&hw_shareable); 854 swb = test_bit(i, (unsigned long *)&sw_shareable); 855 excl = test_bit(i, (unsigned long *)&exclusive); 856 psl = test_bit(i, (unsigned long *)&pseudo_locked); 857 if (hwb && swb) 858 seq_putc(seq, 'X'); 859 else if (hwb && !swb) 860 seq_putc(seq, 'H'); 861 else if (!hwb && swb) 862 seq_putc(seq, 'S'); 863 else if (excl) 864 seq_putc(seq, 'E'); 865 else if (psl) 866 seq_putc(seq, 'P'); 867 else /* Unused bits remain */ 868 seq_putc(seq, '0'); 869 } 870 sep = true; 871 } 872 seq_putc(seq, '\n'); 873 mutex_unlock(&rdtgroup_mutex); 874 return 0; 875 } 876 877 static int rdt_min_bw_show(struct kernfs_open_file *of, 878 struct seq_file *seq, void *v) 879 { 880 struct rdt_resource *r = of->kn->parent->priv; 881 882 seq_printf(seq, "%u\n", r->membw.min_bw); 883 return 0; 884 } 885 886 static int rdt_num_rmids_show(struct kernfs_open_file *of, 887 struct seq_file *seq, void *v) 888 { 889 struct rdt_resource *r = of->kn->parent->priv; 890 891 seq_printf(seq, "%d\n", r->num_rmid); 892 893 return 0; 894 } 895 896 static int rdt_mon_features_show(struct kernfs_open_file *of, 897 struct seq_file *seq, void *v) 898 { 899 struct rdt_resource *r = of->kn->parent->priv; 900 struct mon_evt *mevt; 901 902 list_for_each_entry(mevt, &r->evt_list, list) 903 seq_printf(seq, "%s\n", mevt->name); 904 905 return 0; 906 } 907 908 static int rdt_bw_gran_show(struct kernfs_open_file *of, 909 struct seq_file *seq, void *v) 910 { 911 struct rdt_resource *r = of->kn->parent->priv; 912 913 seq_printf(seq, "%u\n", r->membw.bw_gran); 914 return 0; 915 } 916 917 static int rdt_delay_linear_show(struct kernfs_open_file *of, 918 struct seq_file *seq, void *v) 919 { 920 struct rdt_resource *r = of->kn->parent->priv; 921 922 seq_printf(seq, "%u\n", r->membw.delay_linear); 923 return 0; 924 } 925 926 static int max_threshold_occ_show(struct kernfs_open_file *of, 927 struct seq_file *seq, void *v) 928 { 929 struct rdt_resource *r = of->kn->parent->priv; 930 931 seq_printf(seq, "%u\n", resctrl_cqm_threshold * r->mon_scale); 932 933 return 0; 934 } 935 936 static ssize_t max_threshold_occ_write(struct kernfs_open_file *of, 937 char *buf, size_t nbytes, loff_t off) 938 { 939 struct rdt_resource *r = of->kn->parent->priv; 940 unsigned int bytes; 941 int ret; 942 943 ret = kstrtouint(buf, 0, &bytes); 944 if (ret) 945 return ret; 946 947 if (bytes > (boot_cpu_data.x86_cache_size * 1024)) 948 return -EINVAL; 949 950 resctrl_cqm_threshold = bytes / r->mon_scale; 951 952 return nbytes; 953 } 954 955 /* 956 * rdtgroup_mode_show - Display mode of this resource group 957 */ 958 static int rdtgroup_mode_show(struct kernfs_open_file *of, 959 struct seq_file *s, void *v) 960 { 961 struct rdtgroup *rdtgrp; 962 963 rdtgrp = rdtgroup_kn_lock_live(of->kn); 964 if (!rdtgrp) { 965 rdtgroup_kn_unlock(of->kn); 966 return -ENOENT; 967 } 968 969 seq_printf(s, "%s\n", rdtgroup_mode_str(rdtgrp->mode)); 970 971 rdtgroup_kn_unlock(of->kn); 972 return 0; 973 } 974 975 /** 976 * rdt_cdp_peer_get - Retrieve CDP peer if it exists 977 * @r: RDT resource to which RDT domain @d belongs 978 * @d: Cache instance for which a CDP peer is requested 979 * @r_cdp: RDT resource that shares hardware with @r (RDT resource peer) 980 * Used to return the result. 981 * @d_cdp: RDT domain that shares hardware with @d (RDT domain peer) 982 * Used to return the result. 983 * 984 * RDT resources are managed independently and by extension the RDT domains 985 * (RDT resource instances) are managed independently also. The Code and 986 * Data Prioritization (CDP) RDT resources, while managed independently, 987 * could refer to the same underlying hardware. For example, 988 * RDT_RESOURCE_L2CODE and RDT_RESOURCE_L2DATA both refer to the L2 cache. 989 * 990 * When provided with an RDT resource @r and an instance of that RDT 991 * resource @d rdt_cdp_peer_get() will return if there is a peer RDT 992 * resource and the exact instance that shares the same hardware. 993 * 994 * Return: 0 if a CDP peer was found, <0 on error or if no CDP peer exists. 995 * If a CDP peer was found, @r_cdp will point to the peer RDT resource 996 * and @d_cdp will point to the peer RDT domain. 997 */ 998 static int rdt_cdp_peer_get(struct rdt_resource *r, struct rdt_domain *d, 999 struct rdt_resource **r_cdp, 1000 struct rdt_domain **d_cdp) 1001 { 1002 struct rdt_resource *_r_cdp = NULL; 1003 struct rdt_domain *_d_cdp = NULL; 1004 int ret = 0; 1005 1006 switch (r->rid) { 1007 case RDT_RESOURCE_L3DATA: 1008 _r_cdp = &rdt_resources_all[RDT_RESOURCE_L3CODE]; 1009 break; 1010 case RDT_RESOURCE_L3CODE: 1011 _r_cdp = &rdt_resources_all[RDT_RESOURCE_L3DATA]; 1012 break; 1013 case RDT_RESOURCE_L2DATA: 1014 _r_cdp = &rdt_resources_all[RDT_RESOURCE_L2CODE]; 1015 break; 1016 case RDT_RESOURCE_L2CODE: 1017 _r_cdp = &rdt_resources_all[RDT_RESOURCE_L2DATA]; 1018 break; 1019 default: 1020 ret = -ENOENT; 1021 goto out; 1022 } 1023 1024 /* 1025 * When a new CPU comes online and CDP is enabled then the new 1026 * RDT domains (if any) associated with both CDP RDT resources 1027 * are added in the same CPU online routine while the 1028 * rdtgroup_mutex is held. It should thus not happen for one 1029 * RDT domain to exist and be associated with its RDT CDP 1030 * resource but there is no RDT domain associated with the 1031 * peer RDT CDP resource. Hence the WARN. 1032 */ 1033 _d_cdp = rdt_find_domain(_r_cdp, d->id, NULL); 1034 if (WARN_ON(IS_ERR_OR_NULL(_d_cdp))) { 1035 _r_cdp = NULL; 1036 ret = -EINVAL; 1037 } 1038 1039 out: 1040 *r_cdp = _r_cdp; 1041 *d_cdp = _d_cdp; 1042 1043 return ret; 1044 } 1045 1046 /** 1047 * __rdtgroup_cbm_overlaps - Does CBM for intended closid overlap with other 1048 * @r: Resource to which domain instance @d belongs. 1049 * @d: The domain instance for which @closid is being tested. 1050 * @cbm: Capacity bitmask being tested. 1051 * @closid: Intended closid for @cbm. 1052 * @exclusive: Only check if overlaps with exclusive resource groups 1053 * 1054 * Checks if provided @cbm intended to be used for @closid on domain 1055 * @d overlaps with any other closids or other hardware usage associated 1056 * with this domain. If @exclusive is true then only overlaps with 1057 * resource groups in exclusive mode will be considered. If @exclusive 1058 * is false then overlaps with any resource group or hardware entities 1059 * will be considered. 1060 * 1061 * @cbm is unsigned long, even if only 32 bits are used, to make the 1062 * bitmap functions work correctly. 1063 * 1064 * Return: false if CBM does not overlap, true if it does. 1065 */ 1066 static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d, 1067 unsigned long cbm, int closid, bool exclusive) 1068 { 1069 enum rdtgrp_mode mode; 1070 unsigned long ctrl_b; 1071 u32 *ctrl; 1072 int i; 1073 1074 /* Check for any overlap with regions used by hardware directly */ 1075 if (!exclusive) { 1076 ctrl_b = r->cache.shareable_bits; 1077 if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len)) 1078 return true; 1079 } 1080 1081 /* Check for overlap with other resource groups */ 1082 ctrl = d->ctrl_val; 1083 for (i = 0; i < closids_supported(); i++, ctrl++) { 1084 ctrl_b = *ctrl; 1085 mode = rdtgroup_mode_by_closid(i); 1086 if (closid_allocated(i) && i != closid && 1087 mode != RDT_MODE_PSEUDO_LOCKSETUP) { 1088 if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len)) { 1089 if (exclusive) { 1090 if (mode == RDT_MODE_EXCLUSIVE) 1091 return true; 1092 continue; 1093 } 1094 return true; 1095 } 1096 } 1097 } 1098 1099 return false; 1100 } 1101 1102 /** 1103 * rdtgroup_cbm_overlaps - Does CBM overlap with other use of hardware 1104 * @r: Resource to which domain instance @d belongs. 1105 * @d: The domain instance for which @closid is being tested. 1106 * @cbm: Capacity bitmask being tested. 1107 * @closid: Intended closid for @cbm. 1108 * @exclusive: Only check if overlaps with exclusive resource groups 1109 * 1110 * Resources that can be allocated using a CBM can use the CBM to control 1111 * the overlap of these allocations. rdtgroup_cmb_overlaps() is the test 1112 * for overlap. Overlap test is not limited to the specific resource for 1113 * which the CBM is intended though - when dealing with CDP resources that 1114 * share the underlying hardware the overlap check should be performed on 1115 * the CDP resource sharing the hardware also. 1116 * 1117 * Refer to description of __rdtgroup_cbm_overlaps() for the details of the 1118 * overlap test. 1119 * 1120 * Return: true if CBM overlap detected, false if there is no overlap 1121 */ 1122 bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d, 1123 unsigned long cbm, int closid, bool exclusive) 1124 { 1125 struct rdt_resource *r_cdp; 1126 struct rdt_domain *d_cdp; 1127 1128 if (__rdtgroup_cbm_overlaps(r, d, cbm, closid, exclusive)) 1129 return true; 1130 1131 if (rdt_cdp_peer_get(r, d, &r_cdp, &d_cdp) < 0) 1132 return false; 1133 1134 return __rdtgroup_cbm_overlaps(r_cdp, d_cdp, cbm, closid, exclusive); 1135 } 1136 1137 /** 1138 * rdtgroup_mode_test_exclusive - Test if this resource group can be exclusive 1139 * 1140 * An exclusive resource group implies that there should be no sharing of 1141 * its allocated resources. At the time this group is considered to be 1142 * exclusive this test can determine if its current schemata supports this 1143 * setting by testing for overlap with all other resource groups. 1144 * 1145 * Return: true if resource group can be exclusive, false if there is overlap 1146 * with allocations of other resource groups and thus this resource group 1147 * cannot be exclusive. 1148 */ 1149 static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp) 1150 { 1151 int closid = rdtgrp->closid; 1152 struct rdt_resource *r; 1153 bool has_cache = false; 1154 struct rdt_domain *d; 1155 1156 for_each_alloc_enabled_rdt_resource(r) { 1157 if (r->rid == RDT_RESOURCE_MBA) 1158 continue; 1159 has_cache = true; 1160 list_for_each_entry(d, &r->domains, list) { 1161 if (rdtgroup_cbm_overlaps(r, d, d->ctrl_val[closid], 1162 rdtgrp->closid, false)) { 1163 rdt_last_cmd_puts("Schemata overlaps\n"); 1164 return false; 1165 } 1166 } 1167 } 1168 1169 if (!has_cache) { 1170 rdt_last_cmd_puts("Cannot be exclusive without CAT/CDP\n"); 1171 return false; 1172 } 1173 1174 return true; 1175 } 1176 1177 /** 1178 * rdtgroup_mode_write - Modify the resource group's mode 1179 * 1180 */ 1181 static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of, 1182 char *buf, size_t nbytes, loff_t off) 1183 { 1184 struct rdtgroup *rdtgrp; 1185 enum rdtgrp_mode mode; 1186 int ret = 0; 1187 1188 /* Valid input requires a trailing newline */ 1189 if (nbytes == 0 || buf[nbytes - 1] != '\n') 1190 return -EINVAL; 1191 buf[nbytes - 1] = '\0'; 1192 1193 rdtgrp = rdtgroup_kn_lock_live(of->kn); 1194 if (!rdtgrp) { 1195 rdtgroup_kn_unlock(of->kn); 1196 return -ENOENT; 1197 } 1198 1199 rdt_last_cmd_clear(); 1200 1201 mode = rdtgrp->mode; 1202 1203 if ((!strcmp(buf, "shareable") && mode == RDT_MODE_SHAREABLE) || 1204 (!strcmp(buf, "exclusive") && mode == RDT_MODE_EXCLUSIVE) || 1205 (!strcmp(buf, "pseudo-locksetup") && 1206 mode == RDT_MODE_PSEUDO_LOCKSETUP) || 1207 (!strcmp(buf, "pseudo-locked") && mode == RDT_MODE_PSEUDO_LOCKED)) 1208 goto out; 1209 1210 if (mode == RDT_MODE_PSEUDO_LOCKED) { 1211 rdt_last_cmd_puts("Cannot change pseudo-locked group\n"); 1212 ret = -EINVAL; 1213 goto out; 1214 } 1215 1216 if (!strcmp(buf, "shareable")) { 1217 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { 1218 ret = rdtgroup_locksetup_exit(rdtgrp); 1219 if (ret) 1220 goto out; 1221 } 1222 rdtgrp->mode = RDT_MODE_SHAREABLE; 1223 } else if (!strcmp(buf, "exclusive")) { 1224 if (!rdtgroup_mode_test_exclusive(rdtgrp)) { 1225 ret = -EINVAL; 1226 goto out; 1227 } 1228 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { 1229 ret = rdtgroup_locksetup_exit(rdtgrp); 1230 if (ret) 1231 goto out; 1232 } 1233 rdtgrp->mode = RDT_MODE_EXCLUSIVE; 1234 } else if (!strcmp(buf, "pseudo-locksetup")) { 1235 ret = rdtgroup_locksetup_enter(rdtgrp); 1236 if (ret) 1237 goto out; 1238 rdtgrp->mode = RDT_MODE_PSEUDO_LOCKSETUP; 1239 } else { 1240 rdt_last_cmd_puts("Unknown or unsupported mode\n"); 1241 ret = -EINVAL; 1242 } 1243 1244 out: 1245 rdtgroup_kn_unlock(of->kn); 1246 return ret ?: nbytes; 1247 } 1248 1249 /** 1250 * rdtgroup_cbm_to_size - Translate CBM to size in bytes 1251 * @r: RDT resource to which @d belongs. 1252 * @d: RDT domain instance. 1253 * @cbm: bitmask for which the size should be computed. 1254 * 1255 * The bitmask provided associated with the RDT domain instance @d will be 1256 * translated into how many bytes it represents. The size in bytes is 1257 * computed by first dividing the total cache size by the CBM length to 1258 * determine how many bytes each bit in the bitmask represents. The result 1259 * is multiplied with the number of bits set in the bitmask. 1260 * 1261 * @cbm is unsigned long, even if only 32 bits are used to make the 1262 * bitmap functions work correctly. 1263 */ 1264 unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, 1265 struct rdt_domain *d, unsigned long cbm) 1266 { 1267 struct cpu_cacheinfo *ci; 1268 unsigned int size = 0; 1269 int num_b, i; 1270 1271 num_b = bitmap_weight(&cbm, r->cache.cbm_len); 1272 ci = get_cpu_cacheinfo(cpumask_any(&d->cpu_mask)); 1273 for (i = 0; i < ci->num_leaves; i++) { 1274 if (ci->info_list[i].level == r->cache_level) { 1275 size = ci->info_list[i].size / r->cache.cbm_len * num_b; 1276 break; 1277 } 1278 } 1279 1280 return size; 1281 } 1282 1283 /** 1284 * rdtgroup_size_show - Display size in bytes of allocated regions 1285 * 1286 * The "size" file mirrors the layout of the "schemata" file, printing the 1287 * size in bytes of each region instead of the capacity bitmask. 1288 * 1289 */ 1290 static int rdtgroup_size_show(struct kernfs_open_file *of, 1291 struct seq_file *s, void *v) 1292 { 1293 struct rdtgroup *rdtgrp; 1294 struct rdt_resource *r; 1295 struct rdt_domain *d; 1296 unsigned int size; 1297 int ret = 0; 1298 bool sep; 1299 u32 ctrl; 1300 1301 rdtgrp = rdtgroup_kn_lock_live(of->kn); 1302 if (!rdtgrp) { 1303 rdtgroup_kn_unlock(of->kn); 1304 return -ENOENT; 1305 } 1306 1307 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) { 1308 if (!rdtgrp->plr->d) { 1309 rdt_last_cmd_clear(); 1310 rdt_last_cmd_puts("Cache domain offline\n"); 1311 ret = -ENODEV; 1312 } else { 1313 seq_printf(s, "%*s:", max_name_width, 1314 rdtgrp->plr->r->name); 1315 size = rdtgroup_cbm_to_size(rdtgrp->plr->r, 1316 rdtgrp->plr->d, 1317 rdtgrp->plr->cbm); 1318 seq_printf(s, "%d=%u\n", rdtgrp->plr->d->id, size); 1319 } 1320 goto out; 1321 } 1322 1323 for_each_alloc_enabled_rdt_resource(r) { 1324 sep = false; 1325 seq_printf(s, "%*s:", max_name_width, r->name); 1326 list_for_each_entry(d, &r->domains, list) { 1327 if (sep) 1328 seq_putc(s, ';'); 1329 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { 1330 size = 0; 1331 } else { 1332 ctrl = (!is_mba_sc(r) ? 1333 d->ctrl_val[rdtgrp->closid] : 1334 d->mbps_val[rdtgrp->closid]); 1335 if (r->rid == RDT_RESOURCE_MBA) 1336 size = ctrl; 1337 else 1338 size = rdtgroup_cbm_to_size(r, d, ctrl); 1339 } 1340 seq_printf(s, "%d=%u", d->id, size); 1341 sep = true; 1342 } 1343 seq_putc(s, '\n'); 1344 } 1345 1346 out: 1347 rdtgroup_kn_unlock(of->kn); 1348 1349 return ret; 1350 } 1351 1352 /* rdtgroup information files for one cache resource. */ 1353 static struct rftype res_common_files[] = { 1354 { 1355 .name = "last_cmd_status", 1356 .mode = 0444, 1357 .kf_ops = &rdtgroup_kf_single_ops, 1358 .seq_show = rdt_last_cmd_status_show, 1359 .fflags = RF_TOP_INFO, 1360 }, 1361 { 1362 .name = "num_closids", 1363 .mode = 0444, 1364 .kf_ops = &rdtgroup_kf_single_ops, 1365 .seq_show = rdt_num_closids_show, 1366 .fflags = RF_CTRL_INFO, 1367 }, 1368 { 1369 .name = "mon_features", 1370 .mode = 0444, 1371 .kf_ops = &rdtgroup_kf_single_ops, 1372 .seq_show = rdt_mon_features_show, 1373 .fflags = RF_MON_INFO, 1374 }, 1375 { 1376 .name = "num_rmids", 1377 .mode = 0444, 1378 .kf_ops = &rdtgroup_kf_single_ops, 1379 .seq_show = rdt_num_rmids_show, 1380 .fflags = RF_MON_INFO, 1381 }, 1382 { 1383 .name = "cbm_mask", 1384 .mode = 0444, 1385 .kf_ops = &rdtgroup_kf_single_ops, 1386 .seq_show = rdt_default_ctrl_show, 1387 .fflags = RF_CTRL_INFO | RFTYPE_RES_CACHE, 1388 }, 1389 { 1390 .name = "min_cbm_bits", 1391 .mode = 0444, 1392 .kf_ops = &rdtgroup_kf_single_ops, 1393 .seq_show = rdt_min_cbm_bits_show, 1394 .fflags = RF_CTRL_INFO | RFTYPE_RES_CACHE, 1395 }, 1396 { 1397 .name = "shareable_bits", 1398 .mode = 0444, 1399 .kf_ops = &rdtgroup_kf_single_ops, 1400 .seq_show = rdt_shareable_bits_show, 1401 .fflags = RF_CTRL_INFO | RFTYPE_RES_CACHE, 1402 }, 1403 { 1404 .name = "bit_usage", 1405 .mode = 0444, 1406 .kf_ops = &rdtgroup_kf_single_ops, 1407 .seq_show = rdt_bit_usage_show, 1408 .fflags = RF_CTRL_INFO | RFTYPE_RES_CACHE, 1409 }, 1410 { 1411 .name = "min_bandwidth", 1412 .mode = 0444, 1413 .kf_ops = &rdtgroup_kf_single_ops, 1414 .seq_show = rdt_min_bw_show, 1415 .fflags = RF_CTRL_INFO | RFTYPE_RES_MB, 1416 }, 1417 { 1418 .name = "bandwidth_gran", 1419 .mode = 0444, 1420 .kf_ops = &rdtgroup_kf_single_ops, 1421 .seq_show = rdt_bw_gran_show, 1422 .fflags = RF_CTRL_INFO | RFTYPE_RES_MB, 1423 }, 1424 { 1425 .name = "delay_linear", 1426 .mode = 0444, 1427 .kf_ops = &rdtgroup_kf_single_ops, 1428 .seq_show = rdt_delay_linear_show, 1429 .fflags = RF_CTRL_INFO | RFTYPE_RES_MB, 1430 }, 1431 { 1432 .name = "max_threshold_occupancy", 1433 .mode = 0644, 1434 .kf_ops = &rdtgroup_kf_single_ops, 1435 .write = max_threshold_occ_write, 1436 .seq_show = max_threshold_occ_show, 1437 .fflags = RF_MON_INFO | RFTYPE_RES_CACHE, 1438 }, 1439 { 1440 .name = "cpus", 1441 .mode = 0644, 1442 .kf_ops = &rdtgroup_kf_single_ops, 1443 .write = rdtgroup_cpus_write, 1444 .seq_show = rdtgroup_cpus_show, 1445 .fflags = RFTYPE_BASE, 1446 }, 1447 { 1448 .name = "cpus_list", 1449 .mode = 0644, 1450 .kf_ops = &rdtgroup_kf_single_ops, 1451 .write = rdtgroup_cpus_write, 1452 .seq_show = rdtgroup_cpus_show, 1453 .flags = RFTYPE_FLAGS_CPUS_LIST, 1454 .fflags = RFTYPE_BASE, 1455 }, 1456 { 1457 .name = "tasks", 1458 .mode = 0644, 1459 .kf_ops = &rdtgroup_kf_single_ops, 1460 .write = rdtgroup_tasks_write, 1461 .seq_show = rdtgroup_tasks_show, 1462 .fflags = RFTYPE_BASE, 1463 }, 1464 { 1465 .name = "schemata", 1466 .mode = 0644, 1467 .kf_ops = &rdtgroup_kf_single_ops, 1468 .write = rdtgroup_schemata_write, 1469 .seq_show = rdtgroup_schemata_show, 1470 .fflags = RF_CTRL_BASE, 1471 }, 1472 { 1473 .name = "mode", 1474 .mode = 0644, 1475 .kf_ops = &rdtgroup_kf_single_ops, 1476 .write = rdtgroup_mode_write, 1477 .seq_show = rdtgroup_mode_show, 1478 .fflags = RF_CTRL_BASE, 1479 }, 1480 { 1481 .name = "size", 1482 .mode = 0444, 1483 .kf_ops = &rdtgroup_kf_single_ops, 1484 .seq_show = rdtgroup_size_show, 1485 .fflags = RF_CTRL_BASE, 1486 }, 1487 1488 }; 1489 1490 static int rdtgroup_add_files(struct kernfs_node *kn, unsigned long fflags) 1491 { 1492 struct rftype *rfts, *rft; 1493 int ret, len; 1494 1495 rfts = res_common_files; 1496 len = ARRAY_SIZE(res_common_files); 1497 1498 lockdep_assert_held(&rdtgroup_mutex); 1499 1500 for (rft = rfts; rft < rfts + len; rft++) { 1501 if ((fflags & rft->fflags) == rft->fflags) { 1502 ret = rdtgroup_add_file(kn, rft); 1503 if (ret) 1504 goto error; 1505 } 1506 } 1507 1508 return 0; 1509 error: 1510 pr_warn("Failed to add %s, err=%d\n", rft->name, ret); 1511 while (--rft >= rfts) { 1512 if ((fflags & rft->fflags) == rft->fflags) 1513 kernfs_remove_by_name(kn, rft->name); 1514 } 1515 return ret; 1516 } 1517 1518 /** 1519 * rdtgroup_kn_mode_restrict - Restrict user access to named resctrl file 1520 * @r: The resource group with which the file is associated. 1521 * @name: Name of the file 1522 * 1523 * The permissions of named resctrl file, directory, or link are modified 1524 * to not allow read, write, or execute by any user. 1525 * 1526 * WARNING: This function is intended to communicate to the user that the 1527 * resctrl file has been locked down - that it is not relevant to the 1528 * particular state the system finds itself in. It should not be relied 1529 * on to protect from user access because after the file's permissions 1530 * are restricted the user can still change the permissions using chmod 1531 * from the command line. 1532 * 1533 * Return: 0 on success, <0 on failure. 1534 */ 1535 int rdtgroup_kn_mode_restrict(struct rdtgroup *r, const char *name) 1536 { 1537 struct iattr iattr = {.ia_valid = ATTR_MODE,}; 1538 struct kernfs_node *kn; 1539 int ret = 0; 1540 1541 kn = kernfs_find_and_get_ns(r->kn, name, NULL); 1542 if (!kn) 1543 return -ENOENT; 1544 1545 switch (kernfs_type(kn)) { 1546 case KERNFS_DIR: 1547 iattr.ia_mode = S_IFDIR; 1548 break; 1549 case KERNFS_FILE: 1550 iattr.ia_mode = S_IFREG; 1551 break; 1552 case KERNFS_LINK: 1553 iattr.ia_mode = S_IFLNK; 1554 break; 1555 } 1556 1557 ret = kernfs_setattr(kn, &iattr); 1558 kernfs_put(kn); 1559 return ret; 1560 } 1561 1562 /** 1563 * rdtgroup_kn_mode_restore - Restore user access to named resctrl file 1564 * @r: The resource group with which the file is associated. 1565 * @name: Name of the file 1566 * @mask: Mask of permissions that should be restored 1567 * 1568 * Restore the permissions of the named file. If @name is a directory the 1569 * permissions of its parent will be used. 1570 * 1571 * Return: 0 on success, <0 on failure. 1572 */ 1573 int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name, 1574 umode_t mask) 1575 { 1576 struct iattr iattr = {.ia_valid = ATTR_MODE,}; 1577 struct kernfs_node *kn, *parent; 1578 struct rftype *rfts, *rft; 1579 int ret, len; 1580 1581 rfts = res_common_files; 1582 len = ARRAY_SIZE(res_common_files); 1583 1584 for (rft = rfts; rft < rfts + len; rft++) { 1585 if (!strcmp(rft->name, name)) 1586 iattr.ia_mode = rft->mode & mask; 1587 } 1588 1589 kn = kernfs_find_and_get_ns(r->kn, name, NULL); 1590 if (!kn) 1591 return -ENOENT; 1592 1593 switch (kernfs_type(kn)) { 1594 case KERNFS_DIR: 1595 parent = kernfs_get_parent(kn); 1596 if (parent) { 1597 iattr.ia_mode |= parent->mode; 1598 kernfs_put(parent); 1599 } 1600 iattr.ia_mode |= S_IFDIR; 1601 break; 1602 case KERNFS_FILE: 1603 iattr.ia_mode |= S_IFREG; 1604 break; 1605 case KERNFS_LINK: 1606 iattr.ia_mode |= S_IFLNK; 1607 break; 1608 } 1609 1610 ret = kernfs_setattr(kn, &iattr); 1611 kernfs_put(kn); 1612 return ret; 1613 } 1614 1615 static int rdtgroup_mkdir_info_resdir(struct rdt_resource *r, char *name, 1616 unsigned long fflags) 1617 { 1618 struct kernfs_node *kn_subdir; 1619 int ret; 1620 1621 kn_subdir = kernfs_create_dir(kn_info, name, 1622 kn_info->mode, r); 1623 if (IS_ERR(kn_subdir)) 1624 return PTR_ERR(kn_subdir); 1625 1626 kernfs_get(kn_subdir); 1627 ret = rdtgroup_kn_set_ugid(kn_subdir); 1628 if (ret) 1629 return ret; 1630 1631 ret = rdtgroup_add_files(kn_subdir, fflags); 1632 if (!ret) 1633 kernfs_activate(kn_subdir); 1634 1635 return ret; 1636 } 1637 1638 static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn) 1639 { 1640 struct rdt_resource *r; 1641 unsigned long fflags; 1642 char name[32]; 1643 int ret; 1644 1645 /* create the directory */ 1646 kn_info = kernfs_create_dir(parent_kn, "info", parent_kn->mode, NULL); 1647 if (IS_ERR(kn_info)) 1648 return PTR_ERR(kn_info); 1649 kernfs_get(kn_info); 1650 1651 ret = rdtgroup_add_files(kn_info, RF_TOP_INFO); 1652 if (ret) 1653 goto out_destroy; 1654 1655 for_each_alloc_enabled_rdt_resource(r) { 1656 fflags = r->fflags | RF_CTRL_INFO; 1657 ret = rdtgroup_mkdir_info_resdir(r, r->name, fflags); 1658 if (ret) 1659 goto out_destroy; 1660 } 1661 1662 for_each_mon_enabled_rdt_resource(r) { 1663 fflags = r->fflags | RF_MON_INFO; 1664 sprintf(name, "%s_MON", r->name); 1665 ret = rdtgroup_mkdir_info_resdir(r, name, fflags); 1666 if (ret) 1667 goto out_destroy; 1668 } 1669 1670 /* 1671 * This extra ref will be put in kernfs_remove() and guarantees 1672 * that @rdtgrp->kn is always accessible. 1673 */ 1674 kernfs_get(kn_info); 1675 1676 ret = rdtgroup_kn_set_ugid(kn_info); 1677 if (ret) 1678 goto out_destroy; 1679 1680 kernfs_activate(kn_info); 1681 1682 return 0; 1683 1684 out_destroy: 1685 kernfs_remove(kn_info); 1686 return ret; 1687 } 1688 1689 static int 1690 mongroup_create_dir(struct kernfs_node *parent_kn, struct rdtgroup *prgrp, 1691 char *name, struct kernfs_node **dest_kn) 1692 { 1693 struct kernfs_node *kn; 1694 int ret; 1695 1696 /* create the directory */ 1697 kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp); 1698 if (IS_ERR(kn)) 1699 return PTR_ERR(kn); 1700 1701 if (dest_kn) 1702 *dest_kn = kn; 1703 1704 /* 1705 * This extra ref will be put in kernfs_remove() and guarantees 1706 * that @rdtgrp->kn is always accessible. 1707 */ 1708 kernfs_get(kn); 1709 1710 ret = rdtgroup_kn_set_ugid(kn); 1711 if (ret) 1712 goto out_destroy; 1713 1714 kernfs_activate(kn); 1715 1716 return 0; 1717 1718 out_destroy: 1719 kernfs_remove(kn); 1720 return ret; 1721 } 1722 1723 static void l3_qos_cfg_update(void *arg) 1724 { 1725 bool *enable = arg; 1726 1727 wrmsrl(MSR_IA32_L3_QOS_CFG, *enable ? L3_QOS_CDP_ENABLE : 0ULL); 1728 } 1729 1730 static void l2_qos_cfg_update(void *arg) 1731 { 1732 bool *enable = arg; 1733 1734 wrmsrl(MSR_IA32_L2_QOS_CFG, *enable ? L2_QOS_CDP_ENABLE : 0ULL); 1735 } 1736 1737 static inline bool is_mba_linear(void) 1738 { 1739 return rdt_resources_all[RDT_RESOURCE_MBA].membw.delay_linear; 1740 } 1741 1742 static int set_cache_qos_cfg(int level, bool enable) 1743 { 1744 void (*update)(void *arg); 1745 struct rdt_resource *r_l; 1746 cpumask_var_t cpu_mask; 1747 struct rdt_domain *d; 1748 int cpu; 1749 1750 if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) 1751 return -ENOMEM; 1752 1753 if (level == RDT_RESOURCE_L3) 1754 update = l3_qos_cfg_update; 1755 else if (level == RDT_RESOURCE_L2) 1756 update = l2_qos_cfg_update; 1757 else 1758 return -EINVAL; 1759 1760 r_l = &rdt_resources_all[level]; 1761 list_for_each_entry(d, &r_l->domains, list) { 1762 /* Pick one CPU from each domain instance to update MSR */ 1763 cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask); 1764 } 1765 cpu = get_cpu(); 1766 /* Update QOS_CFG MSR on this cpu if it's in cpu_mask. */ 1767 if (cpumask_test_cpu(cpu, cpu_mask)) 1768 update(&enable); 1769 /* Update QOS_CFG MSR on all other cpus in cpu_mask. */ 1770 smp_call_function_many(cpu_mask, update, &enable, 1); 1771 put_cpu(); 1772 1773 free_cpumask_var(cpu_mask); 1774 1775 return 0; 1776 } 1777 1778 /* 1779 * Enable or disable the MBA software controller 1780 * which helps user specify bandwidth in MBps. 1781 * MBA software controller is supported only if 1782 * MBM is supported and MBA is in linear scale. 1783 */ 1784 static int set_mba_sc(bool mba_sc) 1785 { 1786 struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA]; 1787 struct rdt_domain *d; 1788 1789 if (!is_mbm_enabled() || !is_mba_linear() || 1790 mba_sc == is_mba_sc(r)) 1791 return -EINVAL; 1792 1793 r->membw.mba_sc = mba_sc; 1794 list_for_each_entry(d, &r->domains, list) 1795 setup_default_ctrlval(r, d->ctrl_val, d->mbps_val); 1796 1797 return 0; 1798 } 1799 1800 static int cdp_enable(int level, int data_type, int code_type) 1801 { 1802 struct rdt_resource *r_ldata = &rdt_resources_all[data_type]; 1803 struct rdt_resource *r_lcode = &rdt_resources_all[code_type]; 1804 struct rdt_resource *r_l = &rdt_resources_all[level]; 1805 int ret; 1806 1807 if (!r_l->alloc_capable || !r_ldata->alloc_capable || 1808 !r_lcode->alloc_capable) 1809 return -EINVAL; 1810 1811 ret = set_cache_qos_cfg(level, true); 1812 if (!ret) { 1813 r_l->alloc_enabled = false; 1814 r_ldata->alloc_enabled = true; 1815 r_lcode->alloc_enabled = true; 1816 } 1817 return ret; 1818 } 1819 1820 static int cdpl3_enable(void) 1821 { 1822 return cdp_enable(RDT_RESOURCE_L3, RDT_RESOURCE_L3DATA, 1823 RDT_RESOURCE_L3CODE); 1824 } 1825 1826 static int cdpl2_enable(void) 1827 { 1828 return cdp_enable(RDT_RESOURCE_L2, RDT_RESOURCE_L2DATA, 1829 RDT_RESOURCE_L2CODE); 1830 } 1831 1832 static void cdp_disable(int level, int data_type, int code_type) 1833 { 1834 struct rdt_resource *r = &rdt_resources_all[level]; 1835 1836 r->alloc_enabled = r->alloc_capable; 1837 1838 if (rdt_resources_all[data_type].alloc_enabled) { 1839 rdt_resources_all[data_type].alloc_enabled = false; 1840 rdt_resources_all[code_type].alloc_enabled = false; 1841 set_cache_qos_cfg(level, false); 1842 } 1843 } 1844 1845 static void cdpl3_disable(void) 1846 { 1847 cdp_disable(RDT_RESOURCE_L3, RDT_RESOURCE_L3DATA, RDT_RESOURCE_L3CODE); 1848 } 1849 1850 static void cdpl2_disable(void) 1851 { 1852 cdp_disable(RDT_RESOURCE_L2, RDT_RESOURCE_L2DATA, RDT_RESOURCE_L2CODE); 1853 } 1854 1855 static void cdp_disable_all(void) 1856 { 1857 if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled) 1858 cdpl3_disable(); 1859 if (rdt_resources_all[RDT_RESOURCE_L2DATA].alloc_enabled) 1860 cdpl2_disable(); 1861 } 1862 1863 /* 1864 * We don't allow rdtgroup directories to be created anywhere 1865 * except the root directory. Thus when looking for the rdtgroup 1866 * structure for a kernfs node we are either looking at a directory, 1867 * in which case the rdtgroup structure is pointed at by the "priv" 1868 * field, otherwise we have a file, and need only look to the parent 1869 * to find the rdtgroup. 1870 */ 1871 static struct rdtgroup *kernfs_to_rdtgroup(struct kernfs_node *kn) 1872 { 1873 if (kernfs_type(kn) == KERNFS_DIR) { 1874 /* 1875 * All the resource directories use "kn->priv" 1876 * to point to the "struct rdtgroup" for the 1877 * resource. "info" and its subdirectories don't 1878 * have rdtgroup structures, so return NULL here. 1879 */ 1880 if (kn == kn_info || kn->parent == kn_info) 1881 return NULL; 1882 else 1883 return kn->priv; 1884 } else { 1885 return kn->parent->priv; 1886 } 1887 } 1888 1889 struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn) 1890 { 1891 struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn); 1892 1893 if (!rdtgrp) 1894 return NULL; 1895 1896 atomic_inc(&rdtgrp->waitcount); 1897 kernfs_break_active_protection(kn); 1898 1899 mutex_lock(&rdtgroup_mutex); 1900 1901 /* Was this group deleted while we waited? */ 1902 if (rdtgrp->flags & RDT_DELETED) 1903 return NULL; 1904 1905 return rdtgrp; 1906 } 1907 1908 void rdtgroup_kn_unlock(struct kernfs_node *kn) 1909 { 1910 struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn); 1911 1912 if (!rdtgrp) 1913 return; 1914 1915 mutex_unlock(&rdtgroup_mutex); 1916 1917 if (atomic_dec_and_test(&rdtgrp->waitcount) && 1918 (rdtgrp->flags & RDT_DELETED)) { 1919 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP || 1920 rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) 1921 rdtgroup_pseudo_lock_remove(rdtgrp); 1922 kernfs_unbreak_active_protection(kn); 1923 kernfs_put(rdtgrp->kn); 1924 kfree(rdtgrp); 1925 } else { 1926 kernfs_unbreak_active_protection(kn); 1927 } 1928 } 1929 1930 static int mkdir_mondata_all(struct kernfs_node *parent_kn, 1931 struct rdtgroup *prgrp, 1932 struct kernfs_node **mon_data_kn); 1933 1934 static int rdt_enable_ctx(struct rdt_fs_context *ctx) 1935 { 1936 int ret = 0; 1937 1938 if (ctx->enable_cdpl2) 1939 ret = cdpl2_enable(); 1940 1941 if (!ret && ctx->enable_cdpl3) 1942 ret = cdpl3_enable(); 1943 1944 if (!ret && ctx->enable_mba_mbps) 1945 ret = set_mba_sc(true); 1946 1947 return ret; 1948 } 1949 1950 static int rdt_get_tree(struct fs_context *fc) 1951 { 1952 struct rdt_fs_context *ctx = rdt_fc2context(fc); 1953 struct rdt_domain *dom; 1954 struct rdt_resource *r; 1955 int ret; 1956 1957 cpus_read_lock(); 1958 mutex_lock(&rdtgroup_mutex); 1959 /* 1960 * resctrl file system can only be mounted once. 1961 */ 1962 if (static_branch_unlikely(&rdt_enable_key)) { 1963 ret = -EBUSY; 1964 goto out; 1965 } 1966 1967 ret = rdt_enable_ctx(ctx); 1968 if (ret < 0) 1969 goto out_cdp; 1970 1971 closid_init(); 1972 1973 ret = rdtgroup_create_info_dir(rdtgroup_default.kn); 1974 if (ret < 0) 1975 goto out_mba; 1976 1977 if (rdt_mon_capable) { 1978 ret = mongroup_create_dir(rdtgroup_default.kn, 1979 NULL, "mon_groups", 1980 &kn_mongrp); 1981 if (ret < 0) 1982 goto out_info; 1983 kernfs_get(kn_mongrp); 1984 1985 ret = mkdir_mondata_all(rdtgroup_default.kn, 1986 &rdtgroup_default, &kn_mondata); 1987 if (ret < 0) 1988 goto out_mongrp; 1989 kernfs_get(kn_mondata); 1990 rdtgroup_default.mon.mon_data_kn = kn_mondata; 1991 } 1992 1993 ret = rdt_pseudo_lock_init(); 1994 if (ret) 1995 goto out_mondata; 1996 1997 ret = kernfs_get_tree(fc); 1998 if (ret < 0) 1999 goto out_psl; 2000 2001 if (rdt_alloc_capable) 2002 static_branch_enable_cpuslocked(&rdt_alloc_enable_key); 2003 if (rdt_mon_capable) 2004 static_branch_enable_cpuslocked(&rdt_mon_enable_key); 2005 2006 if (rdt_alloc_capable || rdt_mon_capable) 2007 static_branch_enable_cpuslocked(&rdt_enable_key); 2008 2009 if (is_mbm_enabled()) { 2010 r = &rdt_resources_all[RDT_RESOURCE_L3]; 2011 list_for_each_entry(dom, &r->domains, list) 2012 mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL); 2013 } 2014 2015 goto out; 2016 2017 out_psl: 2018 rdt_pseudo_lock_release(); 2019 out_mondata: 2020 if (rdt_mon_capable) 2021 kernfs_remove(kn_mondata); 2022 out_mongrp: 2023 if (rdt_mon_capable) 2024 kernfs_remove(kn_mongrp); 2025 out_info: 2026 kernfs_remove(kn_info); 2027 out_mba: 2028 if (ctx->enable_mba_mbps) 2029 set_mba_sc(false); 2030 out_cdp: 2031 cdp_disable_all(); 2032 out: 2033 rdt_last_cmd_clear(); 2034 mutex_unlock(&rdtgroup_mutex); 2035 cpus_read_unlock(); 2036 return ret; 2037 } 2038 2039 enum rdt_param { 2040 Opt_cdp, 2041 Opt_cdpl2, 2042 Opt_mba_mbps, 2043 nr__rdt_params 2044 }; 2045 2046 static const struct fs_parameter_spec rdt_param_specs[] = { 2047 fsparam_flag("cdp", Opt_cdp), 2048 fsparam_flag("cdpl2", Opt_cdpl2), 2049 fsparam_flag("mba_MBps", Opt_mba_mbps), 2050 {} 2051 }; 2052 2053 static const struct fs_parameter_description rdt_fs_parameters = { 2054 .name = "rdt", 2055 .specs = rdt_param_specs, 2056 }; 2057 2058 static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param) 2059 { 2060 struct rdt_fs_context *ctx = rdt_fc2context(fc); 2061 struct fs_parse_result result; 2062 int opt; 2063 2064 opt = fs_parse(fc, &rdt_fs_parameters, param, &result); 2065 if (opt < 0) 2066 return opt; 2067 2068 switch (opt) { 2069 case Opt_cdp: 2070 ctx->enable_cdpl3 = true; 2071 return 0; 2072 case Opt_cdpl2: 2073 ctx->enable_cdpl2 = true; 2074 return 0; 2075 case Opt_mba_mbps: 2076 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) 2077 return -EINVAL; 2078 ctx->enable_mba_mbps = true; 2079 return 0; 2080 } 2081 2082 return -EINVAL; 2083 } 2084 2085 static void rdt_fs_context_free(struct fs_context *fc) 2086 { 2087 struct rdt_fs_context *ctx = rdt_fc2context(fc); 2088 2089 kernfs_free_fs_context(fc); 2090 kfree(ctx); 2091 } 2092 2093 static const struct fs_context_operations rdt_fs_context_ops = { 2094 .free = rdt_fs_context_free, 2095 .parse_param = rdt_parse_param, 2096 .get_tree = rdt_get_tree, 2097 }; 2098 2099 static int rdt_init_fs_context(struct fs_context *fc) 2100 { 2101 struct rdt_fs_context *ctx; 2102 2103 ctx = kzalloc(sizeof(struct rdt_fs_context), GFP_KERNEL); 2104 if (!ctx) 2105 return -ENOMEM; 2106 2107 ctx->kfc.root = rdt_root; 2108 ctx->kfc.magic = RDTGROUP_SUPER_MAGIC; 2109 fc->fs_private = &ctx->kfc; 2110 fc->ops = &rdt_fs_context_ops; 2111 if (fc->user_ns) 2112 put_user_ns(fc->user_ns); 2113 fc->user_ns = get_user_ns(&init_user_ns); 2114 fc->global = true; 2115 return 0; 2116 } 2117 2118 static int reset_all_ctrls(struct rdt_resource *r) 2119 { 2120 struct msr_param msr_param; 2121 cpumask_var_t cpu_mask; 2122 struct rdt_domain *d; 2123 int i, cpu; 2124 2125 if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) 2126 return -ENOMEM; 2127 2128 msr_param.res = r; 2129 msr_param.low = 0; 2130 msr_param.high = r->num_closid; 2131 2132 /* 2133 * Disable resource control for this resource by setting all 2134 * CBMs in all domains to the maximum mask value. Pick one CPU 2135 * from each domain to update the MSRs below. 2136 */ 2137 list_for_each_entry(d, &r->domains, list) { 2138 cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask); 2139 2140 for (i = 0; i < r->num_closid; i++) 2141 d->ctrl_val[i] = r->default_ctrl; 2142 } 2143 cpu = get_cpu(); 2144 /* Update CBM on this cpu if it's in cpu_mask. */ 2145 if (cpumask_test_cpu(cpu, cpu_mask)) 2146 rdt_ctrl_update(&msr_param); 2147 /* Update CBM on all other cpus in cpu_mask. */ 2148 smp_call_function_many(cpu_mask, rdt_ctrl_update, &msr_param, 1); 2149 put_cpu(); 2150 2151 free_cpumask_var(cpu_mask); 2152 2153 return 0; 2154 } 2155 2156 static bool is_closid_match(struct task_struct *t, struct rdtgroup *r) 2157 { 2158 return (rdt_alloc_capable && 2159 (r->type == RDTCTRL_GROUP) && (t->closid == r->closid)); 2160 } 2161 2162 static bool is_rmid_match(struct task_struct *t, struct rdtgroup *r) 2163 { 2164 return (rdt_mon_capable && 2165 (r->type == RDTMON_GROUP) && (t->rmid == r->mon.rmid)); 2166 } 2167 2168 /* 2169 * Move tasks from one to the other group. If @from is NULL, then all tasks 2170 * in the systems are moved unconditionally (used for teardown). 2171 * 2172 * If @mask is not NULL the cpus on which moved tasks are running are set 2173 * in that mask so the update smp function call is restricted to affected 2174 * cpus. 2175 */ 2176 static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to, 2177 struct cpumask *mask) 2178 { 2179 struct task_struct *p, *t; 2180 2181 read_lock(&tasklist_lock); 2182 for_each_process_thread(p, t) { 2183 if (!from || is_closid_match(t, from) || 2184 is_rmid_match(t, from)) { 2185 t->closid = to->closid; 2186 t->rmid = to->mon.rmid; 2187 2188 #ifdef CONFIG_SMP 2189 /* 2190 * This is safe on x86 w/o barriers as the ordering 2191 * of writing to task_cpu() and t->on_cpu is 2192 * reverse to the reading here. The detection is 2193 * inaccurate as tasks might move or schedule 2194 * before the smp function call takes place. In 2195 * such a case the function call is pointless, but 2196 * there is no other side effect. 2197 */ 2198 if (mask && t->on_cpu) 2199 cpumask_set_cpu(task_cpu(t), mask); 2200 #endif 2201 } 2202 } 2203 read_unlock(&tasklist_lock); 2204 } 2205 2206 static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp) 2207 { 2208 struct rdtgroup *sentry, *stmp; 2209 struct list_head *head; 2210 2211 head = &rdtgrp->mon.crdtgrp_list; 2212 list_for_each_entry_safe(sentry, stmp, head, mon.crdtgrp_list) { 2213 free_rmid(sentry->mon.rmid); 2214 list_del(&sentry->mon.crdtgrp_list); 2215 kfree(sentry); 2216 } 2217 } 2218 2219 /* 2220 * Forcibly remove all of subdirectories under root. 2221 */ 2222 static void rmdir_all_sub(void) 2223 { 2224 struct rdtgroup *rdtgrp, *tmp; 2225 2226 /* Move all tasks to the default resource group */ 2227 rdt_move_group_tasks(NULL, &rdtgroup_default, NULL); 2228 2229 list_for_each_entry_safe(rdtgrp, tmp, &rdt_all_groups, rdtgroup_list) { 2230 /* Free any child rmids */ 2231 free_all_child_rdtgrp(rdtgrp); 2232 2233 /* Remove each rdtgroup other than root */ 2234 if (rdtgrp == &rdtgroup_default) 2235 continue; 2236 2237 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP || 2238 rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) 2239 rdtgroup_pseudo_lock_remove(rdtgrp); 2240 2241 /* 2242 * Give any CPUs back to the default group. We cannot copy 2243 * cpu_online_mask because a CPU might have executed the 2244 * offline callback already, but is still marked online. 2245 */ 2246 cpumask_or(&rdtgroup_default.cpu_mask, 2247 &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask); 2248 2249 free_rmid(rdtgrp->mon.rmid); 2250 2251 kernfs_remove(rdtgrp->kn); 2252 list_del(&rdtgrp->rdtgroup_list); 2253 kfree(rdtgrp); 2254 } 2255 /* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */ 2256 update_closid_rmid(cpu_online_mask, &rdtgroup_default); 2257 2258 kernfs_remove(kn_info); 2259 kernfs_remove(kn_mongrp); 2260 kernfs_remove(kn_mondata); 2261 } 2262 2263 static void rdt_kill_sb(struct super_block *sb) 2264 { 2265 struct rdt_resource *r; 2266 2267 cpus_read_lock(); 2268 mutex_lock(&rdtgroup_mutex); 2269 2270 set_mba_sc(false); 2271 2272 /*Put everything back to default values. */ 2273 for_each_alloc_enabled_rdt_resource(r) 2274 reset_all_ctrls(r); 2275 cdp_disable_all(); 2276 rmdir_all_sub(); 2277 rdt_pseudo_lock_release(); 2278 rdtgroup_default.mode = RDT_MODE_SHAREABLE; 2279 static_branch_disable_cpuslocked(&rdt_alloc_enable_key); 2280 static_branch_disable_cpuslocked(&rdt_mon_enable_key); 2281 static_branch_disable_cpuslocked(&rdt_enable_key); 2282 kernfs_kill_sb(sb); 2283 mutex_unlock(&rdtgroup_mutex); 2284 cpus_read_unlock(); 2285 } 2286 2287 static struct file_system_type rdt_fs_type = { 2288 .name = "resctrl", 2289 .init_fs_context = rdt_init_fs_context, 2290 .parameters = &rdt_fs_parameters, 2291 .kill_sb = rdt_kill_sb, 2292 }; 2293 2294 static int mon_addfile(struct kernfs_node *parent_kn, const char *name, 2295 void *priv) 2296 { 2297 struct kernfs_node *kn; 2298 int ret = 0; 2299 2300 kn = __kernfs_create_file(parent_kn, name, 0444, 2301 GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0, 2302 &kf_mondata_ops, priv, NULL, NULL); 2303 if (IS_ERR(kn)) 2304 return PTR_ERR(kn); 2305 2306 ret = rdtgroup_kn_set_ugid(kn); 2307 if (ret) { 2308 kernfs_remove(kn); 2309 return ret; 2310 } 2311 2312 return ret; 2313 } 2314 2315 /* 2316 * Remove all subdirectories of mon_data of ctrl_mon groups 2317 * and monitor groups with given domain id. 2318 */ 2319 void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, unsigned int dom_id) 2320 { 2321 struct rdtgroup *prgrp, *crgrp; 2322 char name[32]; 2323 2324 if (!r->mon_enabled) 2325 return; 2326 2327 list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) { 2328 sprintf(name, "mon_%s_%02d", r->name, dom_id); 2329 kernfs_remove_by_name(prgrp->mon.mon_data_kn, name); 2330 2331 list_for_each_entry(crgrp, &prgrp->mon.crdtgrp_list, mon.crdtgrp_list) 2332 kernfs_remove_by_name(crgrp->mon.mon_data_kn, name); 2333 } 2334 } 2335 2336 static int mkdir_mondata_subdir(struct kernfs_node *parent_kn, 2337 struct rdt_domain *d, 2338 struct rdt_resource *r, struct rdtgroup *prgrp) 2339 { 2340 union mon_data_bits priv; 2341 struct kernfs_node *kn; 2342 struct mon_evt *mevt; 2343 struct rmid_read rr; 2344 char name[32]; 2345 int ret; 2346 2347 sprintf(name, "mon_%s_%02d", r->name, d->id); 2348 /* create the directory */ 2349 kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp); 2350 if (IS_ERR(kn)) 2351 return PTR_ERR(kn); 2352 2353 /* 2354 * This extra ref will be put in kernfs_remove() and guarantees 2355 * that kn is always accessible. 2356 */ 2357 kernfs_get(kn); 2358 ret = rdtgroup_kn_set_ugid(kn); 2359 if (ret) 2360 goto out_destroy; 2361 2362 if (WARN_ON(list_empty(&r->evt_list))) { 2363 ret = -EPERM; 2364 goto out_destroy; 2365 } 2366 2367 priv.u.rid = r->rid; 2368 priv.u.domid = d->id; 2369 list_for_each_entry(mevt, &r->evt_list, list) { 2370 priv.u.evtid = mevt->evtid; 2371 ret = mon_addfile(kn, mevt->name, priv.priv); 2372 if (ret) 2373 goto out_destroy; 2374 2375 if (is_mbm_event(mevt->evtid)) 2376 mon_event_read(&rr, d, prgrp, mevt->evtid, true); 2377 } 2378 kernfs_activate(kn); 2379 return 0; 2380 2381 out_destroy: 2382 kernfs_remove(kn); 2383 return ret; 2384 } 2385 2386 /* 2387 * Add all subdirectories of mon_data for "ctrl_mon" groups 2388 * and "monitor" groups with given domain id. 2389 */ 2390 void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, 2391 struct rdt_domain *d) 2392 { 2393 struct kernfs_node *parent_kn; 2394 struct rdtgroup *prgrp, *crgrp; 2395 struct list_head *head; 2396 2397 if (!r->mon_enabled) 2398 return; 2399 2400 list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) { 2401 parent_kn = prgrp->mon.mon_data_kn; 2402 mkdir_mondata_subdir(parent_kn, d, r, prgrp); 2403 2404 head = &prgrp->mon.crdtgrp_list; 2405 list_for_each_entry(crgrp, head, mon.crdtgrp_list) { 2406 parent_kn = crgrp->mon.mon_data_kn; 2407 mkdir_mondata_subdir(parent_kn, d, r, crgrp); 2408 } 2409 } 2410 } 2411 2412 static int mkdir_mondata_subdir_alldom(struct kernfs_node *parent_kn, 2413 struct rdt_resource *r, 2414 struct rdtgroup *prgrp) 2415 { 2416 struct rdt_domain *dom; 2417 int ret; 2418 2419 list_for_each_entry(dom, &r->domains, list) { 2420 ret = mkdir_mondata_subdir(parent_kn, dom, r, prgrp); 2421 if (ret) 2422 return ret; 2423 } 2424 2425 return 0; 2426 } 2427 2428 /* 2429 * This creates a directory mon_data which contains the monitored data. 2430 * 2431 * mon_data has one directory for each domain whic are named 2432 * in the format mon_<domain_name>_<domain_id>. For ex: A mon_data 2433 * with L3 domain looks as below: 2434 * ./mon_data: 2435 * mon_L3_00 2436 * mon_L3_01 2437 * mon_L3_02 2438 * ... 2439 * 2440 * Each domain directory has one file per event: 2441 * ./mon_L3_00/: 2442 * llc_occupancy 2443 * 2444 */ 2445 static int mkdir_mondata_all(struct kernfs_node *parent_kn, 2446 struct rdtgroup *prgrp, 2447 struct kernfs_node **dest_kn) 2448 { 2449 struct rdt_resource *r; 2450 struct kernfs_node *kn; 2451 int ret; 2452 2453 /* 2454 * Create the mon_data directory first. 2455 */ 2456 ret = mongroup_create_dir(parent_kn, NULL, "mon_data", &kn); 2457 if (ret) 2458 return ret; 2459 2460 if (dest_kn) 2461 *dest_kn = kn; 2462 2463 /* 2464 * Create the subdirectories for each domain. Note that all events 2465 * in a domain like L3 are grouped into a resource whose domain is L3 2466 */ 2467 for_each_mon_enabled_rdt_resource(r) { 2468 ret = mkdir_mondata_subdir_alldom(kn, r, prgrp); 2469 if (ret) 2470 goto out_destroy; 2471 } 2472 2473 return 0; 2474 2475 out_destroy: 2476 kernfs_remove(kn); 2477 return ret; 2478 } 2479 2480 /** 2481 * cbm_ensure_valid - Enforce validity on provided CBM 2482 * @_val: Candidate CBM 2483 * @r: RDT resource to which the CBM belongs 2484 * 2485 * The provided CBM represents all cache portions available for use. This 2486 * may be represented by a bitmap that does not consist of contiguous ones 2487 * and thus be an invalid CBM. 2488 * Here the provided CBM is forced to be a valid CBM by only considering 2489 * the first set of contiguous bits as valid and clearing all bits. 2490 * The intention here is to provide a valid default CBM with which a new 2491 * resource group is initialized. The user can follow this with a 2492 * modification to the CBM if the default does not satisfy the 2493 * requirements. 2494 */ 2495 static void cbm_ensure_valid(u32 *_val, struct rdt_resource *r) 2496 { 2497 /* 2498 * Convert the u32 _val to an unsigned long required by all the bit 2499 * operations within this function. No more than 32 bits of this 2500 * converted value can be accessed because all bit operations are 2501 * additionally provided with cbm_len that is initialized during 2502 * hardware enumeration using five bits from the EAX register and 2503 * thus never can exceed 32 bits. 2504 */ 2505 unsigned long *val = (unsigned long *)_val; 2506 unsigned int cbm_len = r->cache.cbm_len; 2507 unsigned long first_bit, zero_bit; 2508 2509 if (*val == 0) 2510 return; 2511 2512 first_bit = find_first_bit(val, cbm_len); 2513 zero_bit = find_next_zero_bit(val, cbm_len, first_bit); 2514 2515 /* Clear any remaining bits to ensure contiguous region */ 2516 bitmap_clear(val, zero_bit, cbm_len - zero_bit); 2517 } 2518 2519 /** 2520 * rdtgroup_init_alloc - Initialize the new RDT group's allocations 2521 * 2522 * A new RDT group is being created on an allocation capable (CAT) 2523 * supporting system. Set this group up to start off with all usable 2524 * allocations. That is, all shareable and unused bits. 2525 * 2526 * All-zero CBM is invalid. If there are no more shareable bits available 2527 * on any domain then the entire allocation will fail. 2528 */ 2529 static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp) 2530 { 2531 struct rdt_resource *r_cdp = NULL; 2532 struct rdt_domain *d_cdp = NULL; 2533 u32 used_b = 0, unused_b = 0; 2534 u32 closid = rdtgrp->closid; 2535 struct rdt_resource *r; 2536 unsigned long tmp_cbm; 2537 enum rdtgrp_mode mode; 2538 struct rdt_domain *d; 2539 u32 peer_ctl, *ctrl; 2540 int i, ret; 2541 2542 for_each_alloc_enabled_rdt_resource(r) { 2543 /* 2544 * Only initialize default allocations for CBM cache 2545 * resources 2546 */ 2547 if (r->rid == RDT_RESOURCE_MBA) 2548 continue; 2549 list_for_each_entry(d, &r->domains, list) { 2550 rdt_cdp_peer_get(r, d, &r_cdp, &d_cdp); 2551 d->have_new_ctrl = false; 2552 d->new_ctrl = r->cache.shareable_bits; 2553 used_b = r->cache.shareable_bits; 2554 ctrl = d->ctrl_val; 2555 for (i = 0; i < closids_supported(); i++, ctrl++) { 2556 if (closid_allocated(i) && i != closid) { 2557 mode = rdtgroup_mode_by_closid(i); 2558 if (mode == RDT_MODE_PSEUDO_LOCKSETUP) 2559 break; 2560 /* 2561 * If CDP is active include peer 2562 * domain's usage to ensure there 2563 * is no overlap with an exclusive 2564 * group. 2565 */ 2566 if (d_cdp) 2567 peer_ctl = d_cdp->ctrl_val[i]; 2568 else 2569 peer_ctl = 0; 2570 used_b |= *ctrl | peer_ctl; 2571 if (mode == RDT_MODE_SHAREABLE) 2572 d->new_ctrl |= *ctrl | peer_ctl; 2573 } 2574 } 2575 if (d->plr && d->plr->cbm > 0) 2576 used_b |= d->plr->cbm; 2577 unused_b = used_b ^ (BIT_MASK(r->cache.cbm_len) - 1); 2578 unused_b &= BIT_MASK(r->cache.cbm_len) - 1; 2579 d->new_ctrl |= unused_b; 2580 /* 2581 * Force the initial CBM to be valid, user can 2582 * modify the CBM based on system availability. 2583 */ 2584 cbm_ensure_valid(&d->new_ctrl, r); 2585 /* 2586 * Assign the u32 CBM to an unsigned long to ensure 2587 * that bitmap_weight() does not access out-of-bound 2588 * memory. 2589 */ 2590 tmp_cbm = d->new_ctrl; 2591 if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) < 2592 r->cache.min_cbm_bits) { 2593 rdt_last_cmd_printf("No space on %s:%d\n", 2594 r->name, d->id); 2595 return -ENOSPC; 2596 } 2597 d->have_new_ctrl = true; 2598 } 2599 } 2600 2601 for_each_alloc_enabled_rdt_resource(r) { 2602 /* 2603 * Only initialize default allocations for CBM cache 2604 * resources 2605 */ 2606 if (r->rid == RDT_RESOURCE_MBA) 2607 continue; 2608 ret = update_domains(r, rdtgrp->closid); 2609 if (ret < 0) { 2610 rdt_last_cmd_puts("Failed to initialize allocations\n"); 2611 return ret; 2612 } 2613 rdtgrp->mode = RDT_MODE_SHAREABLE; 2614 } 2615 2616 return 0; 2617 } 2618 2619 static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, 2620 struct kernfs_node *prgrp_kn, 2621 const char *name, umode_t mode, 2622 enum rdt_group_type rtype, struct rdtgroup **r) 2623 { 2624 struct rdtgroup *prdtgrp, *rdtgrp; 2625 struct kernfs_node *kn; 2626 uint files = 0; 2627 int ret; 2628 2629 prdtgrp = rdtgroup_kn_lock_live(prgrp_kn); 2630 rdt_last_cmd_clear(); 2631 if (!prdtgrp) { 2632 ret = -ENODEV; 2633 rdt_last_cmd_puts("Directory was removed\n"); 2634 goto out_unlock; 2635 } 2636 2637 if (rtype == RDTMON_GROUP && 2638 (prdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP || 2639 prdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)) { 2640 ret = -EINVAL; 2641 rdt_last_cmd_puts("Pseudo-locking in progress\n"); 2642 goto out_unlock; 2643 } 2644 2645 /* allocate the rdtgroup. */ 2646 rdtgrp = kzalloc(sizeof(*rdtgrp), GFP_KERNEL); 2647 if (!rdtgrp) { 2648 ret = -ENOSPC; 2649 rdt_last_cmd_puts("Kernel out of memory\n"); 2650 goto out_unlock; 2651 } 2652 *r = rdtgrp; 2653 rdtgrp->mon.parent = prdtgrp; 2654 rdtgrp->type = rtype; 2655 INIT_LIST_HEAD(&rdtgrp->mon.crdtgrp_list); 2656 2657 /* kernfs creates the directory for rdtgrp */ 2658 kn = kernfs_create_dir(parent_kn, name, mode, rdtgrp); 2659 if (IS_ERR(kn)) { 2660 ret = PTR_ERR(kn); 2661 rdt_last_cmd_puts("kernfs create error\n"); 2662 goto out_free_rgrp; 2663 } 2664 rdtgrp->kn = kn; 2665 2666 /* 2667 * kernfs_remove() will drop the reference count on "kn" which 2668 * will free it. But we still need it to stick around for the 2669 * rdtgroup_kn_unlock(kn} call below. Take one extra reference 2670 * here, which will be dropped inside rdtgroup_kn_unlock(). 2671 */ 2672 kernfs_get(kn); 2673 2674 ret = rdtgroup_kn_set_ugid(kn); 2675 if (ret) { 2676 rdt_last_cmd_puts("kernfs perm error\n"); 2677 goto out_destroy; 2678 } 2679 2680 files = RFTYPE_BASE | BIT(RF_CTRLSHIFT + rtype); 2681 ret = rdtgroup_add_files(kn, files); 2682 if (ret) { 2683 rdt_last_cmd_puts("kernfs fill error\n"); 2684 goto out_destroy; 2685 } 2686 2687 if (rdt_mon_capable) { 2688 ret = alloc_rmid(); 2689 if (ret < 0) { 2690 rdt_last_cmd_puts("Out of RMIDs\n"); 2691 goto out_destroy; 2692 } 2693 rdtgrp->mon.rmid = ret; 2694 2695 ret = mkdir_mondata_all(kn, rdtgrp, &rdtgrp->mon.mon_data_kn); 2696 if (ret) { 2697 rdt_last_cmd_puts("kernfs subdir error\n"); 2698 goto out_idfree; 2699 } 2700 } 2701 kernfs_activate(kn); 2702 2703 /* 2704 * The caller unlocks the prgrp_kn upon success. 2705 */ 2706 return 0; 2707 2708 out_idfree: 2709 free_rmid(rdtgrp->mon.rmid); 2710 out_destroy: 2711 kernfs_remove(rdtgrp->kn); 2712 out_free_rgrp: 2713 kfree(rdtgrp); 2714 out_unlock: 2715 rdtgroup_kn_unlock(prgrp_kn); 2716 return ret; 2717 } 2718 2719 static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp) 2720 { 2721 kernfs_remove(rgrp->kn); 2722 free_rmid(rgrp->mon.rmid); 2723 kfree(rgrp); 2724 } 2725 2726 /* 2727 * Create a monitor group under "mon_groups" directory of a control 2728 * and monitor group(ctrl_mon). This is a resource group 2729 * to monitor a subset of tasks and cpus in its parent ctrl_mon group. 2730 */ 2731 static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn, 2732 struct kernfs_node *prgrp_kn, 2733 const char *name, 2734 umode_t mode) 2735 { 2736 struct rdtgroup *rdtgrp, *prgrp; 2737 int ret; 2738 2739 ret = mkdir_rdt_prepare(parent_kn, prgrp_kn, name, mode, RDTMON_GROUP, 2740 &rdtgrp); 2741 if (ret) 2742 return ret; 2743 2744 prgrp = rdtgrp->mon.parent; 2745 rdtgrp->closid = prgrp->closid; 2746 2747 /* 2748 * Add the rdtgrp to the list of rdtgrps the parent 2749 * ctrl_mon group has to track. 2750 */ 2751 list_add_tail(&rdtgrp->mon.crdtgrp_list, &prgrp->mon.crdtgrp_list); 2752 2753 rdtgroup_kn_unlock(prgrp_kn); 2754 return ret; 2755 } 2756 2757 /* 2758 * These are rdtgroups created under the root directory. Can be used 2759 * to allocate and monitor resources. 2760 */ 2761 static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn, 2762 struct kernfs_node *prgrp_kn, 2763 const char *name, umode_t mode) 2764 { 2765 struct rdtgroup *rdtgrp; 2766 struct kernfs_node *kn; 2767 u32 closid; 2768 int ret; 2769 2770 ret = mkdir_rdt_prepare(parent_kn, prgrp_kn, name, mode, RDTCTRL_GROUP, 2771 &rdtgrp); 2772 if (ret) 2773 return ret; 2774 2775 kn = rdtgrp->kn; 2776 ret = closid_alloc(); 2777 if (ret < 0) { 2778 rdt_last_cmd_puts("Out of CLOSIDs\n"); 2779 goto out_common_fail; 2780 } 2781 closid = ret; 2782 ret = 0; 2783 2784 rdtgrp->closid = closid; 2785 ret = rdtgroup_init_alloc(rdtgrp); 2786 if (ret < 0) 2787 goto out_id_free; 2788 2789 list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups); 2790 2791 if (rdt_mon_capable) { 2792 /* 2793 * Create an empty mon_groups directory to hold the subset 2794 * of tasks and cpus to monitor. 2795 */ 2796 ret = mongroup_create_dir(kn, NULL, "mon_groups", NULL); 2797 if (ret) { 2798 rdt_last_cmd_puts("kernfs subdir error\n"); 2799 goto out_del_list; 2800 } 2801 } 2802 2803 goto out_unlock; 2804 2805 out_del_list: 2806 list_del(&rdtgrp->rdtgroup_list); 2807 out_id_free: 2808 closid_free(closid); 2809 out_common_fail: 2810 mkdir_rdt_prepare_clean(rdtgrp); 2811 out_unlock: 2812 rdtgroup_kn_unlock(prgrp_kn); 2813 return ret; 2814 } 2815 2816 /* 2817 * We allow creating mon groups only with in a directory called "mon_groups" 2818 * which is present in every ctrl_mon group. Check if this is a valid 2819 * "mon_groups" directory. 2820 * 2821 * 1. The directory should be named "mon_groups". 2822 * 2. The mon group itself should "not" be named "mon_groups". 2823 * This makes sure "mon_groups" directory always has a ctrl_mon group 2824 * as parent. 2825 */ 2826 static bool is_mon_groups(struct kernfs_node *kn, const char *name) 2827 { 2828 return (!strcmp(kn->name, "mon_groups") && 2829 strcmp(name, "mon_groups")); 2830 } 2831 2832 static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name, 2833 umode_t mode) 2834 { 2835 /* Do not accept '\n' to avoid unparsable situation. */ 2836 if (strchr(name, '\n')) 2837 return -EINVAL; 2838 2839 /* 2840 * If the parent directory is the root directory and RDT 2841 * allocation is supported, add a control and monitoring 2842 * subdirectory 2843 */ 2844 if (rdt_alloc_capable && parent_kn == rdtgroup_default.kn) 2845 return rdtgroup_mkdir_ctrl_mon(parent_kn, parent_kn, name, mode); 2846 2847 /* 2848 * If RDT monitoring is supported and the parent directory is a valid 2849 * "mon_groups" directory, add a monitoring subdirectory. 2850 */ 2851 if (rdt_mon_capable && is_mon_groups(parent_kn, name)) 2852 return rdtgroup_mkdir_mon(parent_kn, parent_kn->parent, name, mode); 2853 2854 return -EPERM; 2855 } 2856 2857 static int rdtgroup_rmdir_mon(struct kernfs_node *kn, struct rdtgroup *rdtgrp, 2858 cpumask_var_t tmpmask) 2859 { 2860 struct rdtgroup *prdtgrp = rdtgrp->mon.parent; 2861 int cpu; 2862 2863 /* Give any tasks back to the parent group */ 2864 rdt_move_group_tasks(rdtgrp, prdtgrp, tmpmask); 2865 2866 /* Update per cpu rmid of the moved CPUs first */ 2867 for_each_cpu(cpu, &rdtgrp->cpu_mask) 2868 per_cpu(pqr_state.default_rmid, cpu) = prdtgrp->mon.rmid; 2869 /* 2870 * Update the MSR on moved CPUs and CPUs which have moved 2871 * task running on them. 2872 */ 2873 cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask); 2874 update_closid_rmid(tmpmask, NULL); 2875 2876 rdtgrp->flags = RDT_DELETED; 2877 free_rmid(rdtgrp->mon.rmid); 2878 2879 /* 2880 * Remove the rdtgrp from the parent ctrl_mon group's list 2881 */ 2882 WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list)); 2883 list_del(&rdtgrp->mon.crdtgrp_list); 2884 2885 /* 2886 * one extra hold on this, will drop when we kfree(rdtgrp) 2887 * in rdtgroup_kn_unlock() 2888 */ 2889 kernfs_get(kn); 2890 kernfs_remove(rdtgrp->kn); 2891 2892 return 0; 2893 } 2894 2895 static int rdtgroup_ctrl_remove(struct kernfs_node *kn, 2896 struct rdtgroup *rdtgrp) 2897 { 2898 rdtgrp->flags = RDT_DELETED; 2899 list_del(&rdtgrp->rdtgroup_list); 2900 2901 /* 2902 * one extra hold on this, will drop when we kfree(rdtgrp) 2903 * in rdtgroup_kn_unlock() 2904 */ 2905 kernfs_get(kn); 2906 kernfs_remove(rdtgrp->kn); 2907 return 0; 2908 } 2909 2910 static int rdtgroup_rmdir_ctrl(struct kernfs_node *kn, struct rdtgroup *rdtgrp, 2911 cpumask_var_t tmpmask) 2912 { 2913 int cpu; 2914 2915 /* Give any tasks back to the default group */ 2916 rdt_move_group_tasks(rdtgrp, &rdtgroup_default, tmpmask); 2917 2918 /* Give any CPUs back to the default group */ 2919 cpumask_or(&rdtgroup_default.cpu_mask, 2920 &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask); 2921 2922 /* Update per cpu closid and rmid of the moved CPUs first */ 2923 for_each_cpu(cpu, &rdtgrp->cpu_mask) { 2924 per_cpu(pqr_state.default_closid, cpu) = rdtgroup_default.closid; 2925 per_cpu(pqr_state.default_rmid, cpu) = rdtgroup_default.mon.rmid; 2926 } 2927 2928 /* 2929 * Update the MSR on moved CPUs and CPUs which have moved 2930 * task running on them. 2931 */ 2932 cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask); 2933 update_closid_rmid(tmpmask, NULL); 2934 2935 closid_free(rdtgrp->closid); 2936 free_rmid(rdtgrp->mon.rmid); 2937 2938 /* 2939 * Free all the child monitor group rmids. 2940 */ 2941 free_all_child_rdtgrp(rdtgrp); 2942 2943 rdtgroup_ctrl_remove(kn, rdtgrp); 2944 2945 return 0; 2946 } 2947 2948 static int rdtgroup_rmdir(struct kernfs_node *kn) 2949 { 2950 struct kernfs_node *parent_kn = kn->parent; 2951 struct rdtgroup *rdtgrp; 2952 cpumask_var_t tmpmask; 2953 int ret = 0; 2954 2955 if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL)) 2956 return -ENOMEM; 2957 2958 rdtgrp = rdtgroup_kn_lock_live(kn); 2959 if (!rdtgrp) { 2960 ret = -EPERM; 2961 goto out; 2962 } 2963 2964 /* 2965 * If the rdtgroup is a ctrl_mon group and parent directory 2966 * is the root directory, remove the ctrl_mon group. 2967 * 2968 * If the rdtgroup is a mon group and parent directory 2969 * is a valid "mon_groups" directory, remove the mon group. 2970 */ 2971 if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == rdtgroup_default.kn) { 2972 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP || 2973 rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) { 2974 ret = rdtgroup_ctrl_remove(kn, rdtgrp); 2975 } else { 2976 ret = rdtgroup_rmdir_ctrl(kn, rdtgrp, tmpmask); 2977 } 2978 } else if (rdtgrp->type == RDTMON_GROUP && 2979 is_mon_groups(parent_kn, kn->name)) { 2980 ret = rdtgroup_rmdir_mon(kn, rdtgrp, tmpmask); 2981 } else { 2982 ret = -EPERM; 2983 } 2984 2985 out: 2986 rdtgroup_kn_unlock(kn); 2987 free_cpumask_var(tmpmask); 2988 return ret; 2989 } 2990 2991 static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf) 2992 { 2993 if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled) 2994 seq_puts(seq, ",cdp"); 2995 2996 if (rdt_resources_all[RDT_RESOURCE_L2DATA].alloc_enabled) 2997 seq_puts(seq, ",cdpl2"); 2998 2999 if (is_mba_sc(&rdt_resources_all[RDT_RESOURCE_MBA])) 3000 seq_puts(seq, ",mba_MBps"); 3001 3002 return 0; 3003 } 3004 3005 static struct kernfs_syscall_ops rdtgroup_kf_syscall_ops = { 3006 .mkdir = rdtgroup_mkdir, 3007 .rmdir = rdtgroup_rmdir, 3008 .show_options = rdtgroup_show_options, 3009 }; 3010 3011 static int __init rdtgroup_setup_root(void) 3012 { 3013 int ret; 3014 3015 rdt_root = kernfs_create_root(&rdtgroup_kf_syscall_ops, 3016 KERNFS_ROOT_CREATE_DEACTIVATED | 3017 KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK, 3018 &rdtgroup_default); 3019 if (IS_ERR(rdt_root)) 3020 return PTR_ERR(rdt_root); 3021 3022 mutex_lock(&rdtgroup_mutex); 3023 3024 rdtgroup_default.closid = 0; 3025 rdtgroup_default.mon.rmid = 0; 3026 rdtgroup_default.type = RDTCTRL_GROUP; 3027 INIT_LIST_HEAD(&rdtgroup_default.mon.crdtgrp_list); 3028 3029 list_add(&rdtgroup_default.rdtgroup_list, &rdt_all_groups); 3030 3031 ret = rdtgroup_add_files(rdt_root->kn, RF_CTRL_BASE); 3032 if (ret) { 3033 kernfs_destroy_root(rdt_root); 3034 goto out; 3035 } 3036 3037 rdtgroup_default.kn = rdt_root->kn; 3038 kernfs_activate(rdtgroup_default.kn); 3039 3040 out: 3041 mutex_unlock(&rdtgroup_mutex); 3042 3043 return ret; 3044 } 3045 3046 /* 3047 * rdtgroup_init - rdtgroup initialization 3048 * 3049 * Setup resctrl file system including set up root, create mount point, 3050 * register rdtgroup filesystem, and initialize files under root directory. 3051 * 3052 * Return: 0 on success or -errno 3053 */ 3054 int __init rdtgroup_init(void) 3055 { 3056 int ret = 0; 3057 3058 seq_buf_init(&last_cmd_status, last_cmd_status_buf, 3059 sizeof(last_cmd_status_buf)); 3060 3061 ret = rdtgroup_setup_root(); 3062 if (ret) 3063 return ret; 3064 3065 ret = sysfs_create_mount_point(fs_kobj, "resctrl"); 3066 if (ret) 3067 goto cleanup_root; 3068 3069 ret = register_filesystem(&rdt_fs_type); 3070 if (ret) 3071 goto cleanup_mountpoint; 3072 3073 /* 3074 * Adding the resctrl debugfs directory here may not be ideal since 3075 * it would let the resctrl debugfs directory appear on the debugfs 3076 * filesystem before the resctrl filesystem is mounted. 3077 * It may also be ok since that would enable debugging of RDT before 3078 * resctrl is mounted. 3079 * The reason why the debugfs directory is created here and not in 3080 * rdt_mount() is because rdt_mount() takes rdtgroup_mutex and 3081 * during the debugfs directory creation also &sb->s_type->i_mutex_key 3082 * (the lockdep class of inode->i_rwsem). Other filesystem 3083 * interactions (eg. SyS_getdents) have the lock ordering: 3084 * &sb->s_type->i_mutex_key --> &mm->mmap_sem 3085 * During mmap(), called with &mm->mmap_sem, the rdtgroup_mutex 3086 * is taken, thus creating dependency: 3087 * &mm->mmap_sem --> rdtgroup_mutex for the latter that can cause 3088 * issues considering the other two lock dependencies. 3089 * By creating the debugfs directory here we avoid a dependency 3090 * that may cause deadlock (even though file operations cannot 3091 * occur until the filesystem is mounted, but I do not know how to 3092 * tell lockdep that). 3093 */ 3094 debugfs_resctrl = debugfs_create_dir("resctrl", NULL); 3095 3096 return 0; 3097 3098 cleanup_mountpoint: 3099 sysfs_remove_mount_point(fs_kobj, "resctrl"); 3100 cleanup_root: 3101 kernfs_destroy_root(rdt_root); 3102 3103 return ret; 3104 } 3105 3106 void __exit rdtgroup_exit(void) 3107 { 3108 debugfs_remove_recursive(debugfs_resctrl); 3109 unregister_filesystem(&rdt_fs_type); 3110 sysfs_remove_mount_point(fs_kobj, "resctrl"); 3111 kernfs_destroy_root(rdt_root); 3112 } 3113