1 /* 2 * Copyright IBM Corp. 2007, 2011 3 * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> 4 */ 5 6 #define KMSG_COMPONENT "cpu" 7 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 8 9 #include <linux/workqueue.h> 10 #include <linux/cpuset.h> 11 #include <linux/device.h> 12 #include <linux/export.h> 13 #include <linux/kernel.h> 14 #include <linux/sched.h> 15 #include <linux/delay.h> 16 #include <linux/init.h> 17 #include <linux/slab.h> 18 #include <linux/cpu.h> 19 #include <linux/smp.h> 20 #include <linux/mm.h> 21 #include <linux/nodemask.h> 22 #include <linux/node.h> 23 #include <asm/sysinfo.h> 24 #include <asm/numa.h> 25 26 #define PTF_HORIZONTAL (0UL) 27 #define PTF_VERTICAL (1UL) 28 #define PTF_CHECK (2UL) 29 30 struct mask_info { 31 struct mask_info *next; 32 unsigned char id; 33 cpumask_t mask; 34 }; 35 36 static void set_topology_timer(void); 37 static void topology_work_fn(struct work_struct *work); 38 static struct sysinfo_15_1_x *tl_info; 39 40 static int topology_enabled = 1; 41 static DECLARE_WORK(topology_work, topology_work_fn); 42 43 /* 44 * Socket/Book linked lists and per_cpu(cpu_topology) updates are 45 * protected by "sched_domains_mutex". 46 */ 47 static struct mask_info socket_info; 48 static struct mask_info book_info; 49 50 DEFINE_PER_CPU(struct cpu_topology_s390, cpu_topology); 51 EXPORT_PER_CPU_SYMBOL_GPL(cpu_topology); 52 53 static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu) 54 { 55 cpumask_t mask; 56 57 cpumask_copy(&mask, cpumask_of(cpu)); 58 if (!topology_enabled || !MACHINE_HAS_TOPOLOGY) 59 return mask; 60 for (; info; info = info->next) { 61 if (cpumask_test_cpu(cpu, &info->mask)) 62 return info->mask; 63 } 64 return mask; 65 } 66 67 static cpumask_t cpu_thread_map(unsigned int cpu) 68 { 69 cpumask_t mask; 70 int i; 71 72 cpumask_copy(&mask, cpumask_of(cpu)); 73 if (!topology_enabled || !MACHINE_HAS_TOPOLOGY) 74 return mask; 75 cpu -= cpu % (smp_cpu_mtid + 1); 76 for (i = 0; i <= smp_cpu_mtid; i++) 77 if (cpu_present(cpu + i)) 78 cpumask_set_cpu(cpu + i, &mask); 79 return mask; 80 } 81 82 static struct mask_info *add_cpus_to_mask(struct topology_core *tl_core, 83 struct mask_info *book, 84 struct mask_info *socket, 85 int one_socket_per_cpu) 86 { 87 struct cpu_topology_s390 *topo; 88 unsigned int core; 89 90 for_each_set_bit(core, &tl_core->mask[0], TOPOLOGY_CORE_BITS) { 91 unsigned int rcore; 92 int lcpu, i; 93 94 rcore = TOPOLOGY_CORE_BITS - 1 - core + tl_core->origin; 95 lcpu = smp_find_processor_id(rcore << smp_cpu_mt_shift); 96 if (lcpu < 0) 97 continue; 98 for (i = 0; i <= smp_cpu_mtid; i++) { 99 topo = &per_cpu(cpu_topology, lcpu + i); 100 topo->book_id = book->id; 101 topo->core_id = rcore; 102 topo->thread_id = lcpu + i; 103 cpumask_set_cpu(lcpu + i, &book->mask); 104 cpumask_set_cpu(lcpu + i, &socket->mask); 105 if (one_socket_per_cpu) 106 topo->socket_id = rcore; 107 else 108 topo->socket_id = socket->id; 109 smp_cpu_set_polarization(lcpu + i, tl_core->pp); 110 } 111 if (one_socket_per_cpu) 112 socket = socket->next; 113 } 114 return socket; 115 } 116 117 static void clear_masks(void) 118 { 119 struct mask_info *info; 120 121 info = &socket_info; 122 while (info) { 123 cpumask_clear(&info->mask); 124 info = info->next; 125 } 126 info = &book_info; 127 while (info) { 128 cpumask_clear(&info->mask); 129 info = info->next; 130 } 131 } 132 133 static union topology_entry *next_tle(union topology_entry *tle) 134 { 135 if (!tle->nl) 136 return (union topology_entry *)((struct topology_core *)tle + 1); 137 return (union topology_entry *)((struct topology_container *)tle + 1); 138 } 139 140 static void __tl_to_masks_generic(struct sysinfo_15_1_x *info) 141 { 142 struct mask_info *socket = &socket_info; 143 struct mask_info *book = &book_info; 144 union topology_entry *tle, *end; 145 146 tle = info->tle; 147 end = (union topology_entry *)((unsigned long)info + info->length); 148 while (tle < end) { 149 switch (tle->nl) { 150 case 2: 151 book = book->next; 152 book->id = tle->container.id; 153 break; 154 case 1: 155 socket = socket->next; 156 socket->id = tle->container.id; 157 break; 158 case 0: 159 add_cpus_to_mask(&tle->cpu, book, socket, 0); 160 break; 161 default: 162 clear_masks(); 163 return; 164 } 165 tle = next_tle(tle); 166 } 167 } 168 169 static void __tl_to_masks_z10(struct sysinfo_15_1_x *info) 170 { 171 struct mask_info *socket = &socket_info; 172 struct mask_info *book = &book_info; 173 union topology_entry *tle, *end; 174 175 tle = info->tle; 176 end = (union topology_entry *)((unsigned long)info + info->length); 177 while (tle < end) { 178 switch (tle->nl) { 179 case 1: 180 book = book->next; 181 book->id = tle->container.id; 182 break; 183 case 0: 184 socket = add_cpus_to_mask(&tle->cpu, book, socket, 1); 185 break; 186 default: 187 clear_masks(); 188 return; 189 } 190 tle = next_tle(tle); 191 } 192 } 193 194 static void tl_to_masks(struct sysinfo_15_1_x *info) 195 { 196 struct cpuid cpu_id; 197 198 get_cpu_id(&cpu_id); 199 clear_masks(); 200 switch (cpu_id.machine) { 201 case 0x2097: 202 case 0x2098: 203 __tl_to_masks_z10(info); 204 break; 205 default: 206 __tl_to_masks_generic(info); 207 } 208 } 209 210 static void topology_update_polarization_simple(void) 211 { 212 int cpu; 213 214 mutex_lock(&smp_cpu_state_mutex); 215 for_each_possible_cpu(cpu) 216 smp_cpu_set_polarization(cpu, POLARIZATION_HRZ); 217 mutex_unlock(&smp_cpu_state_mutex); 218 } 219 220 static int ptf(unsigned long fc) 221 { 222 int rc; 223 224 asm volatile( 225 " .insn rre,0xb9a20000,%1,%1\n" 226 " ipm %0\n" 227 " srl %0,28\n" 228 : "=d" (rc) 229 : "d" (fc) : "cc"); 230 return rc; 231 } 232 233 int topology_set_cpu_management(int fc) 234 { 235 int cpu, rc; 236 237 if (!MACHINE_HAS_TOPOLOGY) 238 return -EOPNOTSUPP; 239 if (fc) 240 rc = ptf(PTF_VERTICAL); 241 else 242 rc = ptf(PTF_HORIZONTAL); 243 if (rc) 244 return -EBUSY; 245 for_each_possible_cpu(cpu) 246 smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); 247 return rc; 248 } 249 250 static void update_cpu_masks(void) 251 { 252 struct cpu_topology_s390 *topo; 253 int cpu; 254 255 for_each_possible_cpu(cpu) { 256 topo = &per_cpu(cpu_topology, cpu); 257 topo->thread_mask = cpu_thread_map(cpu); 258 topo->core_mask = cpu_group_map(&socket_info, cpu); 259 topo->book_mask = cpu_group_map(&book_info, cpu); 260 if (!MACHINE_HAS_TOPOLOGY) { 261 topo->thread_id = cpu; 262 topo->core_id = cpu; 263 topo->socket_id = cpu; 264 topo->book_id = cpu; 265 } 266 } 267 numa_update_cpu_topology(); 268 } 269 270 void store_topology(struct sysinfo_15_1_x *info) 271 { 272 if (topology_max_mnest >= 3) 273 stsi(info, 15, 1, 3); 274 else 275 stsi(info, 15, 1, 2); 276 } 277 278 int arch_update_cpu_topology(void) 279 { 280 struct sysinfo_15_1_x *info = tl_info; 281 struct device *dev; 282 int cpu, rc = 0; 283 284 if (MACHINE_HAS_TOPOLOGY) { 285 rc = 1; 286 store_topology(info); 287 tl_to_masks(info); 288 } 289 update_cpu_masks(); 290 if (!MACHINE_HAS_TOPOLOGY) 291 topology_update_polarization_simple(); 292 for_each_online_cpu(cpu) { 293 dev = get_cpu_device(cpu); 294 kobject_uevent(&dev->kobj, KOBJ_CHANGE); 295 } 296 return rc; 297 } 298 299 static void topology_work_fn(struct work_struct *work) 300 { 301 rebuild_sched_domains(); 302 } 303 304 void topology_schedule_update(void) 305 { 306 schedule_work(&topology_work); 307 } 308 309 static void topology_timer_fn(unsigned long ignored) 310 { 311 if (ptf(PTF_CHECK)) 312 topology_schedule_update(); 313 set_topology_timer(); 314 } 315 316 static struct timer_list topology_timer = 317 TIMER_DEFERRED_INITIALIZER(topology_timer_fn, 0, 0); 318 319 static atomic_t topology_poll = ATOMIC_INIT(0); 320 321 static void set_topology_timer(void) 322 { 323 if (atomic_add_unless(&topology_poll, -1, 0)) 324 mod_timer(&topology_timer, jiffies + HZ / 10); 325 else 326 mod_timer(&topology_timer, jiffies + HZ * 60); 327 } 328 329 void topology_expect_change(void) 330 { 331 if (!MACHINE_HAS_TOPOLOGY) 332 return; 333 /* This is racy, but it doesn't matter since it is just a heuristic. 334 * Worst case is that we poll in a higher frequency for a bit longer. 335 */ 336 if (atomic_read(&topology_poll) > 60) 337 return; 338 atomic_add(60, &topology_poll); 339 set_topology_timer(); 340 } 341 342 static int cpu_management; 343 344 static ssize_t dispatching_show(struct device *dev, 345 struct device_attribute *attr, 346 char *buf) 347 { 348 ssize_t count; 349 350 mutex_lock(&smp_cpu_state_mutex); 351 count = sprintf(buf, "%d\n", cpu_management); 352 mutex_unlock(&smp_cpu_state_mutex); 353 return count; 354 } 355 356 static ssize_t dispatching_store(struct device *dev, 357 struct device_attribute *attr, 358 const char *buf, 359 size_t count) 360 { 361 int val, rc; 362 char delim; 363 364 if (sscanf(buf, "%d %c", &val, &delim) != 1) 365 return -EINVAL; 366 if (val != 0 && val != 1) 367 return -EINVAL; 368 rc = 0; 369 get_online_cpus(); 370 mutex_lock(&smp_cpu_state_mutex); 371 if (cpu_management == val) 372 goto out; 373 rc = topology_set_cpu_management(val); 374 if (rc) 375 goto out; 376 cpu_management = val; 377 topology_expect_change(); 378 out: 379 mutex_unlock(&smp_cpu_state_mutex); 380 put_online_cpus(); 381 return rc ? rc : count; 382 } 383 static DEVICE_ATTR(dispatching, 0644, dispatching_show, 384 dispatching_store); 385 386 static ssize_t cpu_polarization_show(struct device *dev, 387 struct device_attribute *attr, char *buf) 388 { 389 int cpu = dev->id; 390 ssize_t count; 391 392 mutex_lock(&smp_cpu_state_mutex); 393 switch (smp_cpu_get_polarization(cpu)) { 394 case POLARIZATION_HRZ: 395 count = sprintf(buf, "horizontal\n"); 396 break; 397 case POLARIZATION_VL: 398 count = sprintf(buf, "vertical:low\n"); 399 break; 400 case POLARIZATION_VM: 401 count = sprintf(buf, "vertical:medium\n"); 402 break; 403 case POLARIZATION_VH: 404 count = sprintf(buf, "vertical:high\n"); 405 break; 406 default: 407 count = sprintf(buf, "unknown\n"); 408 break; 409 } 410 mutex_unlock(&smp_cpu_state_mutex); 411 return count; 412 } 413 static DEVICE_ATTR(polarization, 0444, cpu_polarization_show, NULL); 414 415 static struct attribute *topology_cpu_attrs[] = { 416 &dev_attr_polarization.attr, 417 NULL, 418 }; 419 420 static struct attribute_group topology_cpu_attr_group = { 421 .attrs = topology_cpu_attrs, 422 }; 423 424 int topology_cpu_init(struct cpu *cpu) 425 { 426 return sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group); 427 } 428 429 static const struct cpumask *cpu_thread_mask(int cpu) 430 { 431 return &per_cpu(cpu_topology, cpu).thread_mask; 432 } 433 434 435 const struct cpumask *cpu_coregroup_mask(int cpu) 436 { 437 return &per_cpu(cpu_topology, cpu).core_mask; 438 } 439 440 static const struct cpumask *cpu_book_mask(int cpu) 441 { 442 return &per_cpu(cpu_topology, cpu).book_mask; 443 } 444 445 static int __init early_parse_topology(char *p) 446 { 447 if (strncmp(p, "off", 3)) 448 return 0; 449 topology_enabled = 0; 450 return 0; 451 } 452 early_param("topology", early_parse_topology); 453 454 static struct sched_domain_topology_level s390_topology[] = { 455 { cpu_thread_mask, cpu_smt_flags, SD_INIT_NAME(SMT) }, 456 { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) }, 457 { cpu_book_mask, SD_INIT_NAME(BOOK) }, 458 { cpu_cpu_mask, SD_INIT_NAME(DIE) }, 459 { NULL, }, 460 }; 461 462 static void __init alloc_masks(struct sysinfo_15_1_x *info, 463 struct mask_info *mask, int offset) 464 { 465 int i, nr_masks; 466 467 nr_masks = info->mag[TOPOLOGY_NR_MAG - offset]; 468 for (i = 0; i < info->mnest - offset; i++) 469 nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i]; 470 nr_masks = max(nr_masks, 1); 471 for (i = 0; i < nr_masks; i++) { 472 mask->next = kzalloc(sizeof(*mask->next), GFP_KERNEL); 473 mask = mask->next; 474 } 475 } 476 477 static int __init s390_topology_init(void) 478 { 479 struct sysinfo_15_1_x *info; 480 int i; 481 482 if (!MACHINE_HAS_TOPOLOGY) 483 return 0; 484 tl_info = (struct sysinfo_15_1_x *)__get_free_page(GFP_KERNEL); 485 info = tl_info; 486 store_topology(info); 487 pr_info("The CPU configuration topology of the machine is:"); 488 for (i = 0; i < TOPOLOGY_NR_MAG; i++) 489 printk(KERN_CONT " %d", info->mag[i]); 490 printk(KERN_CONT " / %d\n", info->mnest); 491 alloc_masks(info, &socket_info, 1); 492 alloc_masks(info, &book_info, 2); 493 set_sched_topology(s390_topology); 494 return 0; 495 } 496 early_initcall(s390_topology_init); 497 498 static int __init topology_init(void) 499 { 500 if (MACHINE_HAS_TOPOLOGY) 501 set_topology_timer(); 502 else 503 topology_update_polarization_simple(); 504 return device_create_file(cpu_subsys.dev_root, &dev_attr_dispatching); 505 } 506 device_initcall(topology_init); 507