1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Resource Director Technology(RDT) 4 * - Cache Allocation code. 5 * 6 * Copyright (C) 2016 Intel Corporation 7 * 8 * Authors: 9 * Fenghua Yu <fenghua.yu@intel.com> 10 * Tony Luck <tony.luck@intel.com> 11 * Vikas Shivappa <vikas.shivappa@intel.com> 12 * 13 * More information about RDT be found in the Intel (R) x86 Architecture 14 * Software Developer Manual June 2016, volume 3, section 17.17. 15 */ 16 17 #define pr_fmt(fmt) "resctrl: " fmt 18 19 #include <linux/slab.h> 20 #include <linux/err.h> 21 #include <linux/cacheinfo.h> 22 #include <linux/cpuhotplug.h> 23 24 #include <asm/intel-family.h> 25 #include <asm/resctrl.h> 26 #include "internal.h" 27 28 /* Mutex to protect rdtgroup access. */ 29 DEFINE_MUTEX(rdtgroup_mutex); 30 31 /* 32 * The cached resctrl_pqr_state is strictly per CPU and can never be 33 * updated from a remote CPU. Functions which modify the state 34 * are called with interrupts disabled and no preemption, which 35 * is sufficient for the protection. 36 */ 37 DEFINE_PER_CPU(struct resctrl_pqr_state, pqr_state); 38 39 /* 40 * Used to store the max resource name width and max resource data width 41 * to display the schemata in a tabular format 42 */ 43 int max_name_width, max_data_width; 44 45 /* 46 * Global boolean for rdt_alloc which is true if any 47 * resource allocation is enabled. 48 */ 49 bool rdt_alloc_capable; 50 51 static void 52 mba_wrmsr_intel(struct rdt_domain *d, struct msr_param *m, 53 struct rdt_resource *r); 54 static void 55 cat_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r); 56 static void 57 mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m, 58 struct rdt_resource *r); 59 60 #define domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.domains) 61 62 struct rdt_hw_resource rdt_resources_all[] = { 63 [RDT_RESOURCE_L3] = 64 { 65 .r_resctrl = { 66 .rid = RDT_RESOURCE_L3, 67 .name = "L3", 68 .cache_level = 3, 69 .domains = domain_init(RDT_RESOURCE_L3), 70 .parse_ctrlval = parse_cbm, 71 .format_str = "%d=%0*x", 72 .fflags = RFTYPE_RES_CACHE, 73 }, 74 .msr_base = MSR_IA32_L3_CBM_BASE, 75 .msr_update = cat_wrmsr, 76 }, 77 [RDT_RESOURCE_L2] = 78 { 79 .r_resctrl = { 80 .rid = RDT_RESOURCE_L2, 81 .name = "L2", 82 .cache_level = 2, 83 .domains = domain_init(RDT_RESOURCE_L2), 84 .parse_ctrlval = parse_cbm, 85 .format_str = "%d=%0*x", 86 .fflags = RFTYPE_RES_CACHE, 87 }, 88 .msr_base = MSR_IA32_L2_CBM_BASE, 89 .msr_update = cat_wrmsr, 90 }, 91 [RDT_RESOURCE_MBA] = 92 { 93 .r_resctrl = { 94 .rid = RDT_RESOURCE_MBA, 95 .name = "MB", 96 .cache_level = 3, 97 .domains = domain_init(RDT_RESOURCE_MBA), 98 .parse_ctrlval = parse_bw, 99 .format_str = "%d=%*u", 100 .fflags = RFTYPE_RES_MB, 101 }, 102 }, 103 [RDT_RESOURCE_SMBA] = 104 { 105 .r_resctrl = { 106 .rid = RDT_RESOURCE_SMBA, 107 .name = "SMBA", 108 .cache_level = 3, 109 .domains = domain_init(RDT_RESOURCE_SMBA), 110 .parse_ctrlval = parse_bw, 111 .format_str = "%d=%*u", 112 .fflags = RFTYPE_RES_MB, 113 }, 114 }, 115 }; 116 117 /* 118 * cache_alloc_hsw_probe() - Have to probe for Intel haswell server CPUs 119 * as they do not have CPUID enumeration support for Cache allocation. 120 * The check for Vendor/Family/Model is not enough to guarantee that 121 * the MSRs won't #GP fault because only the following SKUs support 122 * CAT: 123 * Intel(R) Xeon(R) CPU E5-2658 v3 @ 2.20GHz 124 * Intel(R) Xeon(R) CPU E5-2648L v3 @ 1.80GHz 125 * Intel(R) Xeon(R) CPU E5-2628L v3 @ 2.00GHz 126 * Intel(R) Xeon(R) CPU E5-2618L v3 @ 2.30GHz 127 * Intel(R) Xeon(R) CPU E5-2608L v3 @ 2.00GHz 128 * Intel(R) Xeon(R) CPU E5-2658A v3 @ 2.20GHz 129 * 130 * Probe by trying to write the first of the L3 cache mask registers 131 * and checking that the bits stick. Max CLOSids is always 4 and max cbm length 132 * is always 20 on hsw server parts. The minimum cache bitmask length 133 * allowed for HSW server is always 2 bits. Hardcode all of them. 134 */ 135 static inline void cache_alloc_hsw_probe(void) 136 { 137 struct rdt_hw_resource *hw_res = &rdt_resources_all[RDT_RESOURCE_L3]; 138 struct rdt_resource *r = &hw_res->r_resctrl; 139 u32 l, h, max_cbm = BIT_MASK(20) - 1; 140 141 if (wrmsr_safe(MSR_IA32_L3_CBM_BASE, max_cbm, 0)) 142 return; 143 144 rdmsr(MSR_IA32_L3_CBM_BASE, l, h); 145 146 /* If all the bits were set in MSR, return success */ 147 if (l != max_cbm) 148 return; 149 150 hw_res->num_closid = 4; 151 r->default_ctrl = max_cbm; 152 r->cache.cbm_len = 20; 153 r->cache.shareable_bits = 0xc0000; 154 r->cache.min_cbm_bits = 2; 155 r->alloc_capable = true; 156 157 rdt_alloc_capable = true; 158 } 159 160 bool is_mba_sc(struct rdt_resource *r) 161 { 162 if (!r) 163 return rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl.membw.mba_sc; 164 165 /* 166 * The software controller support is only applicable to MBA resource. 167 * Make sure to check for resource type. 168 */ 169 if (r->rid != RDT_RESOURCE_MBA) 170 return false; 171 172 return r->membw.mba_sc; 173 } 174 175 /* 176 * rdt_get_mb_table() - get a mapping of bandwidth(b/w) percentage values 177 * exposed to user interface and the h/w understandable delay values. 178 * 179 * The non-linear delay values have the granularity of power of two 180 * and also the h/w does not guarantee a curve for configured delay 181 * values vs. actual b/w enforced. 182 * Hence we need a mapping that is pre calibrated so the user can 183 * express the memory b/w as a percentage value. 184 */ 185 static inline bool rdt_get_mb_table(struct rdt_resource *r) 186 { 187 /* 188 * There are no Intel SKUs as of now to support non-linear delay. 189 */ 190 pr_info("MBA b/w map not implemented for cpu:%d, model:%d", 191 boot_cpu_data.x86, boot_cpu_data.x86_model); 192 193 return false; 194 } 195 196 static __init bool __get_mem_config_intel(struct rdt_resource *r) 197 { 198 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); 199 union cpuid_0x10_3_eax eax; 200 union cpuid_0x10_x_edx edx; 201 u32 ebx, ecx, max_delay; 202 203 cpuid_count(0x00000010, 3, &eax.full, &ebx, &ecx, &edx.full); 204 hw_res->num_closid = edx.split.cos_max + 1; 205 max_delay = eax.split.max_delay + 1; 206 r->default_ctrl = MAX_MBA_BW; 207 r->membw.arch_needs_linear = true; 208 if (ecx & MBA_IS_LINEAR) { 209 r->membw.delay_linear = true; 210 r->membw.min_bw = MAX_MBA_BW - max_delay; 211 r->membw.bw_gran = MAX_MBA_BW - max_delay; 212 } else { 213 if (!rdt_get_mb_table(r)) 214 return false; 215 r->membw.arch_needs_linear = false; 216 } 217 r->data_width = 3; 218 219 if (boot_cpu_has(X86_FEATURE_PER_THREAD_MBA)) 220 r->membw.throttle_mode = THREAD_THROTTLE_PER_THREAD; 221 else 222 r->membw.throttle_mode = THREAD_THROTTLE_MAX; 223 thread_throttle_mode_init(); 224 225 r->alloc_capable = true; 226 227 return true; 228 } 229 230 static __init bool __rdt_get_mem_config_amd(struct rdt_resource *r) 231 { 232 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); 233 u32 eax, ebx, ecx, edx, subleaf; 234 235 /* 236 * Query CPUID_Fn80000020_EDX_x01 for MBA and 237 * CPUID_Fn80000020_EDX_x02 for SMBA 238 */ 239 subleaf = (r->rid == RDT_RESOURCE_SMBA) ? 2 : 1; 240 241 cpuid_count(0x80000020, subleaf, &eax, &ebx, &ecx, &edx); 242 hw_res->num_closid = edx + 1; 243 r->default_ctrl = 1 << eax; 244 245 /* AMD does not use delay */ 246 r->membw.delay_linear = false; 247 r->membw.arch_needs_linear = false; 248 249 /* 250 * AMD does not use memory delay throttle model to control 251 * the allocation like Intel does. 252 */ 253 r->membw.throttle_mode = THREAD_THROTTLE_UNDEFINED; 254 r->membw.min_bw = 0; 255 r->membw.bw_gran = 1; 256 /* Max value is 2048, Data width should be 4 in decimal */ 257 r->data_width = 4; 258 259 r->alloc_capable = true; 260 261 return true; 262 } 263 264 static void rdt_get_cache_alloc_cfg(int idx, struct rdt_resource *r) 265 { 266 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); 267 union cpuid_0x10_1_eax eax; 268 union cpuid_0x10_x_edx edx; 269 u32 ebx, ecx; 270 271 cpuid_count(0x00000010, idx, &eax.full, &ebx, &ecx, &edx.full); 272 hw_res->num_closid = edx.split.cos_max + 1; 273 r->cache.cbm_len = eax.split.cbm_len + 1; 274 r->default_ctrl = BIT_MASK(eax.split.cbm_len + 1) - 1; 275 r->cache.shareable_bits = ebx & r->default_ctrl; 276 r->data_width = (r->cache.cbm_len + 3) / 4; 277 r->alloc_capable = true; 278 } 279 280 static void rdt_get_cdp_config(int level) 281 { 282 /* 283 * By default, CDP is disabled. CDP can be enabled by mount parameter 284 * "cdp" during resctrl file system mount time. 285 */ 286 rdt_resources_all[level].cdp_enabled = false; 287 rdt_resources_all[level].r_resctrl.cdp_capable = true; 288 } 289 290 static void rdt_get_cdp_l3_config(void) 291 { 292 rdt_get_cdp_config(RDT_RESOURCE_L3); 293 } 294 295 static void rdt_get_cdp_l2_config(void) 296 { 297 rdt_get_cdp_config(RDT_RESOURCE_L2); 298 } 299 300 static void 301 mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r) 302 { 303 unsigned int i; 304 struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); 305 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); 306 307 for (i = m->low; i < m->high; i++) 308 wrmsrl(hw_res->msr_base + i, hw_dom->ctrl_val[i]); 309 } 310 311 /* 312 * Map the memory b/w percentage value to delay values 313 * that can be written to QOS_MSRs. 314 * There are currently no SKUs which support non linear delay values. 315 */ 316 static u32 delay_bw_map(unsigned long bw, struct rdt_resource *r) 317 { 318 if (r->membw.delay_linear) 319 return MAX_MBA_BW - bw; 320 321 pr_warn_once("Non Linear delay-bw map not supported but queried\n"); 322 return r->default_ctrl; 323 } 324 325 static void 326 mba_wrmsr_intel(struct rdt_domain *d, struct msr_param *m, 327 struct rdt_resource *r) 328 { 329 unsigned int i; 330 struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); 331 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); 332 333 /* Write the delay values for mba. */ 334 for (i = m->low; i < m->high; i++) 335 wrmsrl(hw_res->msr_base + i, delay_bw_map(hw_dom->ctrl_val[i], r)); 336 } 337 338 static void 339 cat_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r) 340 { 341 unsigned int i; 342 struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); 343 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); 344 345 for (i = m->low; i < m->high; i++) 346 wrmsrl(hw_res->msr_base + i, hw_dom->ctrl_val[i]); 347 } 348 349 struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r) 350 { 351 struct rdt_domain *d; 352 353 list_for_each_entry(d, &r->domains, list) { 354 /* Find the domain that contains this CPU */ 355 if (cpumask_test_cpu(cpu, &d->cpu_mask)) 356 return d; 357 } 358 359 return NULL; 360 } 361 362 u32 resctrl_arch_get_num_closid(struct rdt_resource *r) 363 { 364 return resctrl_to_arch_res(r)->num_closid; 365 } 366 367 void rdt_ctrl_update(void *arg) 368 { 369 struct msr_param *m = arg; 370 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res); 371 struct rdt_resource *r = m->res; 372 int cpu = smp_processor_id(); 373 struct rdt_domain *d; 374 375 d = get_domain_from_cpu(cpu, r); 376 if (d) { 377 hw_res->msr_update(d, m, r); 378 return; 379 } 380 pr_warn_once("cpu %d not found in any domain for resource %s\n", 381 cpu, r->name); 382 } 383 384 /* 385 * rdt_find_domain - Find a domain in a resource that matches input resource id 386 * 387 * Search resource r's domain list to find the resource id. If the resource 388 * id is found in a domain, return the domain. Otherwise, if requested by 389 * caller, return the first domain whose id is bigger than the input id. 390 * The domain list is sorted by id in ascending order. 391 */ 392 struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id, 393 struct list_head **pos) 394 { 395 struct rdt_domain *d; 396 struct list_head *l; 397 398 if (id < 0) 399 return ERR_PTR(-ENODEV); 400 401 list_for_each(l, &r->domains) { 402 d = list_entry(l, struct rdt_domain, list); 403 /* When id is found, return its domain. */ 404 if (id == d->id) 405 return d; 406 /* Stop searching when finding id's position in sorted list. */ 407 if (id < d->id) 408 break; 409 } 410 411 if (pos) 412 *pos = l; 413 414 return NULL; 415 } 416 417 static void setup_default_ctrlval(struct rdt_resource *r, u32 *dc) 418 { 419 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); 420 int i; 421 422 /* 423 * Initialize the Control MSRs to having no control. 424 * For Cache Allocation: Set all bits in cbm 425 * For Memory Allocation: Set b/w requested to 100% 426 */ 427 for (i = 0; i < hw_res->num_closid; i++, dc++) 428 *dc = r->default_ctrl; 429 } 430 431 static void domain_free(struct rdt_hw_domain *hw_dom) 432 { 433 kfree(hw_dom->arch_mbm_total); 434 kfree(hw_dom->arch_mbm_local); 435 kfree(hw_dom->ctrl_val); 436 kfree(hw_dom); 437 } 438 439 static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_domain *d) 440 { 441 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); 442 struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); 443 struct msr_param m; 444 u32 *dc; 445 446 dc = kmalloc_array(hw_res->num_closid, sizeof(*hw_dom->ctrl_val), 447 GFP_KERNEL); 448 if (!dc) 449 return -ENOMEM; 450 451 hw_dom->ctrl_val = dc; 452 setup_default_ctrlval(r, dc); 453 454 m.low = 0; 455 m.high = hw_res->num_closid; 456 hw_res->msr_update(d, &m, r); 457 return 0; 458 } 459 460 /** 461 * arch_domain_mbm_alloc() - Allocate arch private storage for the MBM counters 462 * @num_rmid: The size of the MBM counter array 463 * @hw_dom: The domain that owns the allocated arrays 464 */ 465 static int arch_domain_mbm_alloc(u32 num_rmid, struct rdt_hw_domain *hw_dom) 466 { 467 size_t tsize; 468 469 if (is_mbm_total_enabled()) { 470 tsize = sizeof(*hw_dom->arch_mbm_total); 471 hw_dom->arch_mbm_total = kcalloc(num_rmid, tsize, GFP_KERNEL); 472 if (!hw_dom->arch_mbm_total) 473 return -ENOMEM; 474 } 475 if (is_mbm_local_enabled()) { 476 tsize = sizeof(*hw_dom->arch_mbm_local); 477 hw_dom->arch_mbm_local = kcalloc(num_rmid, tsize, GFP_KERNEL); 478 if (!hw_dom->arch_mbm_local) { 479 kfree(hw_dom->arch_mbm_total); 480 hw_dom->arch_mbm_total = NULL; 481 return -ENOMEM; 482 } 483 } 484 485 return 0; 486 } 487 488 /* 489 * domain_add_cpu - Add a cpu to a resource's domain list. 490 * 491 * If an existing domain in the resource r's domain list matches the cpu's 492 * resource id, add the cpu in the domain. 493 * 494 * Otherwise, a new domain is allocated and inserted into the right position 495 * in the domain list sorted by id in ascending order. 496 * 497 * The order in the domain list is visible to users when we print entries 498 * in the schemata file and schemata input is validated to have the same order 499 * as this list. 500 */ 501 static void domain_add_cpu(int cpu, struct rdt_resource *r) 502 { 503 int id = get_cpu_cacheinfo_id(cpu, r->cache_level); 504 struct list_head *add_pos = NULL; 505 struct rdt_hw_domain *hw_dom; 506 struct rdt_domain *d; 507 int err; 508 509 d = rdt_find_domain(r, id, &add_pos); 510 if (IS_ERR(d)) { 511 pr_warn("Couldn't find cache id for CPU %d\n", cpu); 512 return; 513 } 514 515 if (d) { 516 cpumask_set_cpu(cpu, &d->cpu_mask); 517 if (r->cache.arch_has_per_cpu_cfg) 518 rdt_domain_reconfigure_cdp(r); 519 return; 520 } 521 522 hw_dom = kzalloc_node(sizeof(*hw_dom), GFP_KERNEL, cpu_to_node(cpu)); 523 if (!hw_dom) 524 return; 525 526 d = &hw_dom->d_resctrl; 527 d->id = id; 528 cpumask_set_cpu(cpu, &d->cpu_mask); 529 530 rdt_domain_reconfigure_cdp(r); 531 532 if (r->alloc_capable && domain_setup_ctrlval(r, d)) { 533 domain_free(hw_dom); 534 return; 535 } 536 537 if (r->mon_capable && arch_domain_mbm_alloc(r->num_rmid, hw_dom)) { 538 domain_free(hw_dom); 539 return; 540 } 541 542 list_add_tail(&d->list, add_pos); 543 544 err = resctrl_online_domain(r, d); 545 if (err) { 546 list_del(&d->list); 547 domain_free(hw_dom); 548 } 549 } 550 551 static void domain_remove_cpu(int cpu, struct rdt_resource *r) 552 { 553 int id = get_cpu_cacheinfo_id(cpu, r->cache_level); 554 struct rdt_hw_domain *hw_dom; 555 struct rdt_domain *d; 556 557 d = rdt_find_domain(r, id, NULL); 558 if (IS_ERR_OR_NULL(d)) { 559 pr_warn("Couldn't find cache id for CPU %d\n", cpu); 560 return; 561 } 562 hw_dom = resctrl_to_arch_dom(d); 563 564 cpumask_clear_cpu(cpu, &d->cpu_mask); 565 if (cpumask_empty(&d->cpu_mask)) { 566 resctrl_offline_domain(r, d); 567 list_del(&d->list); 568 569 /* 570 * rdt_domain "d" is going to be freed below, so clear 571 * its pointer from pseudo_lock_region struct. 572 */ 573 if (d->plr) 574 d->plr->d = NULL; 575 domain_free(hw_dom); 576 577 return; 578 } 579 580 if (r == &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl) { 581 if (is_mbm_enabled() && cpu == d->mbm_work_cpu) { 582 cancel_delayed_work(&d->mbm_over); 583 mbm_setup_overflow_handler(d, 0); 584 } 585 if (is_llc_occupancy_enabled() && cpu == d->cqm_work_cpu && 586 has_busy_rmid(r, d)) { 587 cancel_delayed_work(&d->cqm_limbo); 588 cqm_setup_limbo_handler(d, 0); 589 } 590 } 591 } 592 593 static void clear_closid_rmid(int cpu) 594 { 595 struct resctrl_pqr_state *state = this_cpu_ptr(&pqr_state); 596 597 state->default_closid = 0; 598 state->default_rmid = 0; 599 state->cur_closid = 0; 600 state->cur_rmid = 0; 601 wrmsr(MSR_IA32_PQR_ASSOC, 0, 0); 602 } 603 604 static int resctrl_online_cpu(unsigned int cpu) 605 { 606 struct rdt_resource *r; 607 608 mutex_lock(&rdtgroup_mutex); 609 for_each_capable_rdt_resource(r) 610 domain_add_cpu(cpu, r); 611 /* The cpu is set in default rdtgroup after online. */ 612 cpumask_set_cpu(cpu, &rdtgroup_default.cpu_mask); 613 clear_closid_rmid(cpu); 614 mutex_unlock(&rdtgroup_mutex); 615 616 return 0; 617 } 618 619 static void clear_childcpus(struct rdtgroup *r, unsigned int cpu) 620 { 621 struct rdtgroup *cr; 622 623 list_for_each_entry(cr, &r->mon.crdtgrp_list, mon.crdtgrp_list) { 624 if (cpumask_test_and_clear_cpu(cpu, &cr->cpu_mask)) { 625 break; 626 } 627 } 628 } 629 630 static int resctrl_offline_cpu(unsigned int cpu) 631 { 632 struct rdtgroup *rdtgrp; 633 struct rdt_resource *r; 634 635 mutex_lock(&rdtgroup_mutex); 636 for_each_capable_rdt_resource(r) 637 domain_remove_cpu(cpu, r); 638 list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) { 639 if (cpumask_test_and_clear_cpu(cpu, &rdtgrp->cpu_mask)) { 640 clear_childcpus(rdtgrp, cpu); 641 break; 642 } 643 } 644 clear_closid_rmid(cpu); 645 mutex_unlock(&rdtgroup_mutex); 646 647 return 0; 648 } 649 650 /* 651 * Choose a width for the resource name and resource data based on the 652 * resource that has widest name and cbm. 653 */ 654 static __init void rdt_init_padding(void) 655 { 656 struct rdt_resource *r; 657 658 for_each_alloc_capable_rdt_resource(r) { 659 if (r->data_width > max_data_width) 660 max_data_width = r->data_width; 661 } 662 } 663 664 enum { 665 RDT_FLAG_CMT, 666 RDT_FLAG_MBM_TOTAL, 667 RDT_FLAG_MBM_LOCAL, 668 RDT_FLAG_L3_CAT, 669 RDT_FLAG_L3_CDP, 670 RDT_FLAG_L2_CAT, 671 RDT_FLAG_L2_CDP, 672 RDT_FLAG_MBA, 673 RDT_FLAG_SMBA, 674 RDT_FLAG_BMEC, 675 }; 676 677 #define RDT_OPT(idx, n, f) \ 678 [idx] = { \ 679 .name = n, \ 680 .flag = f \ 681 } 682 683 struct rdt_options { 684 char *name; 685 int flag; 686 bool force_off, force_on; 687 }; 688 689 static struct rdt_options rdt_options[] __initdata = { 690 RDT_OPT(RDT_FLAG_CMT, "cmt", X86_FEATURE_CQM_OCCUP_LLC), 691 RDT_OPT(RDT_FLAG_MBM_TOTAL, "mbmtotal", X86_FEATURE_CQM_MBM_TOTAL), 692 RDT_OPT(RDT_FLAG_MBM_LOCAL, "mbmlocal", X86_FEATURE_CQM_MBM_LOCAL), 693 RDT_OPT(RDT_FLAG_L3_CAT, "l3cat", X86_FEATURE_CAT_L3), 694 RDT_OPT(RDT_FLAG_L3_CDP, "l3cdp", X86_FEATURE_CDP_L3), 695 RDT_OPT(RDT_FLAG_L2_CAT, "l2cat", X86_FEATURE_CAT_L2), 696 RDT_OPT(RDT_FLAG_L2_CDP, "l2cdp", X86_FEATURE_CDP_L2), 697 RDT_OPT(RDT_FLAG_MBA, "mba", X86_FEATURE_MBA), 698 RDT_OPT(RDT_FLAG_SMBA, "smba", X86_FEATURE_SMBA), 699 RDT_OPT(RDT_FLAG_BMEC, "bmec", X86_FEATURE_BMEC), 700 }; 701 #define NUM_RDT_OPTIONS ARRAY_SIZE(rdt_options) 702 703 static int __init set_rdt_options(char *str) 704 { 705 struct rdt_options *o; 706 bool force_off; 707 char *tok; 708 709 if (*str == '=') 710 str++; 711 while ((tok = strsep(&str, ",")) != NULL) { 712 force_off = *tok == '!'; 713 if (force_off) 714 tok++; 715 for (o = rdt_options; o < &rdt_options[NUM_RDT_OPTIONS]; o++) { 716 if (strcmp(tok, o->name) == 0) { 717 if (force_off) 718 o->force_off = true; 719 else 720 o->force_on = true; 721 break; 722 } 723 } 724 } 725 return 1; 726 } 727 __setup("rdt", set_rdt_options); 728 729 bool __init rdt_cpu_has(int flag) 730 { 731 bool ret = boot_cpu_has(flag); 732 struct rdt_options *o; 733 734 if (!ret) 735 return ret; 736 737 for (o = rdt_options; o < &rdt_options[NUM_RDT_OPTIONS]; o++) { 738 if (flag == o->flag) { 739 if (o->force_off) 740 ret = false; 741 if (o->force_on) 742 ret = true; 743 break; 744 } 745 } 746 return ret; 747 } 748 749 static __init bool get_mem_config(void) 750 { 751 struct rdt_hw_resource *hw_res = &rdt_resources_all[RDT_RESOURCE_MBA]; 752 753 if (!rdt_cpu_has(X86_FEATURE_MBA)) 754 return false; 755 756 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) 757 return __get_mem_config_intel(&hw_res->r_resctrl); 758 else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) 759 return __rdt_get_mem_config_amd(&hw_res->r_resctrl); 760 761 return false; 762 } 763 764 static __init bool get_slow_mem_config(void) 765 { 766 struct rdt_hw_resource *hw_res = &rdt_resources_all[RDT_RESOURCE_SMBA]; 767 768 if (!rdt_cpu_has(X86_FEATURE_SMBA)) 769 return false; 770 771 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) 772 return __rdt_get_mem_config_amd(&hw_res->r_resctrl); 773 774 return false; 775 } 776 777 static __init bool get_rdt_alloc_resources(void) 778 { 779 struct rdt_resource *r; 780 bool ret = false; 781 782 if (rdt_alloc_capable) 783 return true; 784 785 if (!boot_cpu_has(X86_FEATURE_RDT_A)) 786 return false; 787 788 if (rdt_cpu_has(X86_FEATURE_CAT_L3)) { 789 r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; 790 rdt_get_cache_alloc_cfg(1, r); 791 if (rdt_cpu_has(X86_FEATURE_CDP_L3)) 792 rdt_get_cdp_l3_config(); 793 ret = true; 794 } 795 if (rdt_cpu_has(X86_FEATURE_CAT_L2)) { 796 /* CPUID 0x10.2 fields are same format at 0x10.1 */ 797 r = &rdt_resources_all[RDT_RESOURCE_L2].r_resctrl; 798 rdt_get_cache_alloc_cfg(2, r); 799 if (rdt_cpu_has(X86_FEATURE_CDP_L2)) 800 rdt_get_cdp_l2_config(); 801 ret = true; 802 } 803 804 if (get_mem_config()) 805 ret = true; 806 807 if (get_slow_mem_config()) 808 ret = true; 809 810 return ret; 811 } 812 813 static __init bool get_rdt_mon_resources(void) 814 { 815 struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; 816 817 if (rdt_cpu_has(X86_FEATURE_CQM_OCCUP_LLC)) 818 rdt_mon_features |= (1 << QOS_L3_OCCUP_EVENT_ID); 819 if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL)) 820 rdt_mon_features |= (1 << QOS_L3_MBM_TOTAL_EVENT_ID); 821 if (rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL)) 822 rdt_mon_features |= (1 << QOS_L3_MBM_LOCAL_EVENT_ID); 823 824 if (!rdt_mon_features) 825 return false; 826 827 return !rdt_get_mon_l3_config(r); 828 } 829 830 static __init void __check_quirks_intel(void) 831 { 832 switch (boot_cpu_data.x86_model) { 833 case INTEL_FAM6_HASWELL_X: 834 if (!rdt_options[RDT_FLAG_L3_CAT].force_off) 835 cache_alloc_hsw_probe(); 836 break; 837 case INTEL_FAM6_SKYLAKE_X: 838 if (boot_cpu_data.x86_stepping <= 4) 839 set_rdt_options("!cmt,!mbmtotal,!mbmlocal,!l3cat"); 840 else 841 set_rdt_options("!l3cat"); 842 fallthrough; 843 case INTEL_FAM6_BROADWELL_X: 844 intel_rdt_mbm_apply_quirk(); 845 break; 846 } 847 } 848 849 static __init void check_quirks(void) 850 { 851 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) 852 __check_quirks_intel(); 853 } 854 855 static __init bool get_rdt_resources(void) 856 { 857 rdt_alloc_capable = get_rdt_alloc_resources(); 858 rdt_mon_capable = get_rdt_mon_resources(); 859 860 return (rdt_mon_capable || rdt_alloc_capable); 861 } 862 863 static __init void rdt_init_res_defs_intel(void) 864 { 865 struct rdt_hw_resource *hw_res; 866 struct rdt_resource *r; 867 868 for_each_rdt_resource(r) { 869 hw_res = resctrl_to_arch_res(r); 870 871 if (r->rid == RDT_RESOURCE_L3 || 872 r->rid == RDT_RESOURCE_L2) { 873 r->cache.arch_has_sparse_bitmaps = false; 874 r->cache.arch_has_per_cpu_cfg = false; 875 r->cache.min_cbm_bits = 1; 876 } else if (r->rid == RDT_RESOURCE_MBA) { 877 hw_res->msr_base = MSR_IA32_MBA_THRTL_BASE; 878 hw_res->msr_update = mba_wrmsr_intel; 879 } 880 } 881 } 882 883 static __init void rdt_init_res_defs_amd(void) 884 { 885 struct rdt_hw_resource *hw_res; 886 struct rdt_resource *r; 887 888 for_each_rdt_resource(r) { 889 hw_res = resctrl_to_arch_res(r); 890 891 if (r->rid == RDT_RESOURCE_L3 || 892 r->rid == RDT_RESOURCE_L2) { 893 r->cache.arch_has_sparse_bitmaps = true; 894 r->cache.arch_has_per_cpu_cfg = true; 895 r->cache.min_cbm_bits = 0; 896 } else if (r->rid == RDT_RESOURCE_MBA) { 897 hw_res->msr_base = MSR_IA32_MBA_BW_BASE; 898 hw_res->msr_update = mba_wrmsr_amd; 899 } else if (r->rid == RDT_RESOURCE_SMBA) { 900 hw_res->msr_base = MSR_IA32_SMBA_BW_BASE; 901 hw_res->msr_update = mba_wrmsr_amd; 902 } 903 } 904 } 905 906 static __init void rdt_init_res_defs(void) 907 { 908 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) 909 rdt_init_res_defs_intel(); 910 else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) 911 rdt_init_res_defs_amd(); 912 } 913 914 static enum cpuhp_state rdt_online; 915 916 /* Runs once on the BSP during boot. */ 917 void resctrl_cpu_detect(struct cpuinfo_x86 *c) 918 { 919 if (!cpu_has(c, X86_FEATURE_CQM_LLC)) { 920 c->x86_cache_max_rmid = -1; 921 c->x86_cache_occ_scale = -1; 922 c->x86_cache_mbm_width_offset = -1; 923 return; 924 } 925 926 /* will be overridden if occupancy monitoring exists */ 927 c->x86_cache_max_rmid = cpuid_ebx(0xf); 928 929 if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC) || 930 cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL) || 931 cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)) { 932 u32 eax, ebx, ecx, edx; 933 934 /* QoS sub-leaf, EAX=0Fh, ECX=1 */ 935 cpuid_count(0xf, 1, &eax, &ebx, &ecx, &edx); 936 937 c->x86_cache_max_rmid = ecx; 938 c->x86_cache_occ_scale = ebx; 939 c->x86_cache_mbm_width_offset = eax & 0xff; 940 941 if (c->x86_vendor == X86_VENDOR_AMD && !c->x86_cache_mbm_width_offset) 942 c->x86_cache_mbm_width_offset = MBM_CNTR_WIDTH_OFFSET_AMD; 943 } 944 } 945 946 static int __init resctrl_late_init(void) 947 { 948 struct rdt_resource *r; 949 int state, ret; 950 951 /* 952 * Initialize functions(or definitions) that are different 953 * between vendors here. 954 */ 955 rdt_init_res_defs(); 956 957 check_quirks(); 958 959 if (!get_rdt_resources()) 960 return -ENODEV; 961 962 rdt_init_padding(); 963 964 state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, 965 "x86/resctrl/cat:online:", 966 resctrl_online_cpu, resctrl_offline_cpu); 967 if (state < 0) 968 return state; 969 970 ret = rdtgroup_init(); 971 if (ret) { 972 cpuhp_remove_state(state); 973 return ret; 974 } 975 rdt_online = state; 976 977 for_each_alloc_capable_rdt_resource(r) 978 pr_info("%s allocation detected\n", r->name); 979 980 for_each_mon_capable_rdt_resource(r) 981 pr_info("%s monitoring detected\n", r->name); 982 983 return 0; 984 } 985 986 late_initcall(resctrl_late_init); 987 988 static void __exit resctrl_exit(void) 989 { 990 cpuhp_remove_state(rdt_online); 991 rdtgroup_exit(); 992 } 993 994 __exitcall(resctrl_exit); 995