1 /* 2 * Copyright IBM Corp. 2007, 2011 3 * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> 4 */ 5 6 #define KMSG_COMPONENT "cpu" 7 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 8 9 #include <linux/workqueue.h> 10 #include <linux/bootmem.h> 11 #include <linux/cpuset.h> 12 #include <linux/device.h> 13 #include <linux/export.h> 14 #include <linux/kernel.h> 15 #include <linux/sched.h> 16 #include <linux/sched/topology.h> 17 #include <linux/delay.h> 18 #include <linux/init.h> 19 #include <linux/slab.h> 20 #include <linux/cpu.h> 21 #include <linux/smp.h> 22 #include <linux/mm.h> 23 #include <linux/nodemask.h> 24 #include <linux/node.h> 25 #include <asm/sysinfo.h> 26 #include <asm/numa.h> 27 28 #define PTF_HORIZONTAL (0UL) 29 #define PTF_VERTICAL (1UL) 30 #define PTF_CHECK (2UL) 31 32 struct mask_info { 33 struct mask_info *next; 34 unsigned char id; 35 cpumask_t mask; 36 }; 37 38 static void set_topology_timer(void); 39 static void topology_work_fn(struct work_struct *work); 40 static struct sysinfo_15_1_x *tl_info; 41 42 static DECLARE_WORK(topology_work, topology_work_fn); 43 44 /* 45 * Socket/Book linked lists and cpu_topology updates are 46 * protected by "sched_domains_mutex". 47 */ 48 static struct mask_info socket_info; 49 static struct mask_info book_info; 50 static struct mask_info drawer_info; 51 52 struct cpu_topology_s390 cpu_topology[NR_CPUS]; 53 EXPORT_SYMBOL_GPL(cpu_topology); 54 55 cpumask_t cpus_with_topology; 56 57 static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu) 58 { 59 cpumask_t mask; 60 61 cpumask_copy(&mask, cpumask_of(cpu)); 62 if (!MACHINE_HAS_TOPOLOGY) 63 return mask; 64 for (; info; info = info->next) { 65 if (cpumask_test_cpu(cpu, &info->mask)) 66 return info->mask; 67 } 68 return mask; 69 } 70 71 static cpumask_t cpu_thread_map(unsigned int cpu) 72 { 73 cpumask_t mask; 74 int i; 75 76 cpumask_copy(&mask, cpumask_of(cpu)); 77 if (!MACHINE_HAS_TOPOLOGY) 78 return mask; 79 cpu -= cpu % (smp_cpu_mtid + 1); 80 for (i = 0; i <= smp_cpu_mtid; i++) 81 if (cpu_present(cpu + i)) 82 cpumask_set_cpu(cpu + i, &mask); 83 return mask; 84 } 85 86 #define TOPOLOGY_CORE_BITS 64 87 88 static void add_cpus_to_mask(struct topology_core *tl_core, 89 struct mask_info *drawer, 90 struct mask_info *book, 91 struct mask_info *socket) 92 { 93 struct cpu_topology_s390 *topo; 94 unsigned int core; 95 96 for_each_set_bit(core, &tl_core->mask, TOPOLOGY_CORE_BITS) { 97 unsigned int rcore; 98 int lcpu, i; 99 100 rcore = TOPOLOGY_CORE_BITS - 1 - core + tl_core->origin; 101 lcpu = smp_find_processor_id(rcore << smp_cpu_mt_shift); 102 if (lcpu < 0) 103 continue; 104 for (i = 0; i <= smp_cpu_mtid; i++) { 105 topo = &cpu_topology[lcpu + i]; 106 topo->drawer_id = drawer->id; 107 topo->book_id = book->id; 108 topo->socket_id = socket->id; 109 topo->core_id = rcore; 110 topo->thread_id = lcpu + i; 111 cpumask_set_cpu(lcpu + i, &drawer->mask); 112 cpumask_set_cpu(lcpu + i, &book->mask); 113 cpumask_set_cpu(lcpu + i, &socket->mask); 114 cpumask_set_cpu(lcpu + i, &cpus_with_topology); 115 smp_cpu_set_polarization(lcpu + i, tl_core->pp); 116 } 117 } 118 } 119 120 static void clear_masks(void) 121 { 122 struct mask_info *info; 123 124 info = &socket_info; 125 while (info) { 126 cpumask_clear(&info->mask); 127 info = info->next; 128 } 129 info = &book_info; 130 while (info) { 131 cpumask_clear(&info->mask); 132 info = info->next; 133 } 134 info = &drawer_info; 135 while (info) { 136 cpumask_clear(&info->mask); 137 info = info->next; 138 } 139 } 140 141 static union topology_entry *next_tle(union topology_entry *tle) 142 { 143 if (!tle->nl) 144 return (union topology_entry *)((struct topology_core *)tle + 1); 145 return (union topology_entry *)((struct topology_container *)tle + 1); 146 } 147 148 static void tl_to_masks(struct sysinfo_15_1_x *info) 149 { 150 struct mask_info *socket = &socket_info; 151 struct mask_info *book = &book_info; 152 struct mask_info *drawer = &drawer_info; 153 union topology_entry *tle, *end; 154 155 clear_masks(); 156 tle = info->tle; 157 end = (union topology_entry *)((unsigned long)info + info->length); 158 while (tle < end) { 159 switch (tle->nl) { 160 case 3: 161 drawer = drawer->next; 162 drawer->id = tle->container.id; 163 break; 164 case 2: 165 book = book->next; 166 book->id = tle->container.id; 167 break; 168 case 1: 169 socket = socket->next; 170 socket->id = tle->container.id; 171 break; 172 case 0: 173 add_cpus_to_mask(&tle->cpu, drawer, book, socket); 174 break; 175 default: 176 clear_masks(); 177 return; 178 } 179 tle = next_tle(tle); 180 } 181 } 182 183 static void topology_update_polarization_simple(void) 184 { 185 int cpu; 186 187 mutex_lock(&smp_cpu_state_mutex); 188 for_each_possible_cpu(cpu) 189 smp_cpu_set_polarization(cpu, POLARIZATION_HRZ); 190 mutex_unlock(&smp_cpu_state_mutex); 191 } 192 193 static int ptf(unsigned long fc) 194 { 195 int rc; 196 197 asm volatile( 198 " .insn rre,0xb9a20000,%1,%1\n" 199 " ipm %0\n" 200 " srl %0,28\n" 201 : "=d" (rc) 202 : "d" (fc) : "cc"); 203 return rc; 204 } 205 206 int topology_set_cpu_management(int fc) 207 { 208 int cpu, rc; 209 210 if (!MACHINE_HAS_TOPOLOGY) 211 return -EOPNOTSUPP; 212 if (fc) 213 rc = ptf(PTF_VERTICAL); 214 else 215 rc = ptf(PTF_HORIZONTAL); 216 if (rc) 217 return -EBUSY; 218 for_each_possible_cpu(cpu) 219 smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); 220 return rc; 221 } 222 223 static void update_cpu_masks(void) 224 { 225 struct cpu_topology_s390 *topo; 226 int cpu; 227 228 for_each_possible_cpu(cpu) { 229 topo = &cpu_topology[cpu]; 230 topo->thread_mask = cpu_thread_map(cpu); 231 topo->core_mask = cpu_group_map(&socket_info, cpu); 232 topo->book_mask = cpu_group_map(&book_info, cpu); 233 topo->drawer_mask = cpu_group_map(&drawer_info, cpu); 234 if (!MACHINE_HAS_TOPOLOGY) { 235 topo->thread_id = cpu; 236 topo->core_id = cpu; 237 topo->socket_id = cpu; 238 topo->book_id = cpu; 239 topo->drawer_id = cpu; 240 if (cpu_present(cpu)) 241 cpumask_set_cpu(cpu, &cpus_with_topology); 242 } 243 } 244 numa_update_cpu_topology(); 245 } 246 247 void store_topology(struct sysinfo_15_1_x *info) 248 { 249 stsi(info, 15, 1, topology_mnest_limit()); 250 } 251 252 static int __arch_update_cpu_topology(void) 253 { 254 struct sysinfo_15_1_x *info = tl_info; 255 int rc = 0; 256 257 cpumask_clear(&cpus_with_topology); 258 if (MACHINE_HAS_TOPOLOGY) { 259 rc = 1; 260 store_topology(info); 261 tl_to_masks(info); 262 } 263 update_cpu_masks(); 264 if (!MACHINE_HAS_TOPOLOGY) 265 topology_update_polarization_simple(); 266 return rc; 267 } 268 269 int arch_update_cpu_topology(void) 270 { 271 struct device *dev; 272 int cpu, rc; 273 274 rc = __arch_update_cpu_topology(); 275 for_each_online_cpu(cpu) { 276 dev = get_cpu_device(cpu); 277 kobject_uevent(&dev->kobj, KOBJ_CHANGE); 278 } 279 return rc; 280 } 281 282 static void topology_work_fn(struct work_struct *work) 283 { 284 rebuild_sched_domains(); 285 } 286 287 void topology_schedule_update(void) 288 { 289 schedule_work(&topology_work); 290 } 291 292 static void topology_timer_fn(unsigned long ignored) 293 { 294 if (ptf(PTF_CHECK)) 295 topology_schedule_update(); 296 set_topology_timer(); 297 } 298 299 static struct timer_list topology_timer = 300 TIMER_DEFERRED_INITIALIZER(topology_timer_fn, 0, 0); 301 302 static atomic_t topology_poll = ATOMIC_INIT(0); 303 304 static void set_topology_timer(void) 305 { 306 if (atomic_add_unless(&topology_poll, -1, 0)) 307 mod_timer(&topology_timer, jiffies + HZ / 10); 308 else 309 mod_timer(&topology_timer, jiffies + HZ * 60); 310 } 311 312 void topology_expect_change(void) 313 { 314 if (!MACHINE_HAS_TOPOLOGY) 315 return; 316 /* This is racy, but it doesn't matter since it is just a heuristic. 317 * Worst case is that we poll in a higher frequency for a bit longer. 318 */ 319 if (atomic_read(&topology_poll) > 60) 320 return; 321 atomic_add(60, &topology_poll); 322 set_topology_timer(); 323 } 324 325 static int cpu_management; 326 327 static ssize_t dispatching_show(struct device *dev, 328 struct device_attribute *attr, 329 char *buf) 330 { 331 ssize_t count; 332 333 mutex_lock(&smp_cpu_state_mutex); 334 count = sprintf(buf, "%d\n", cpu_management); 335 mutex_unlock(&smp_cpu_state_mutex); 336 return count; 337 } 338 339 static ssize_t dispatching_store(struct device *dev, 340 struct device_attribute *attr, 341 const char *buf, 342 size_t count) 343 { 344 int val, rc; 345 char delim; 346 347 if (sscanf(buf, "%d %c", &val, &delim) != 1) 348 return -EINVAL; 349 if (val != 0 && val != 1) 350 return -EINVAL; 351 rc = 0; 352 get_online_cpus(); 353 mutex_lock(&smp_cpu_state_mutex); 354 if (cpu_management == val) 355 goto out; 356 rc = topology_set_cpu_management(val); 357 if (rc) 358 goto out; 359 cpu_management = val; 360 topology_expect_change(); 361 out: 362 mutex_unlock(&smp_cpu_state_mutex); 363 put_online_cpus(); 364 return rc ? rc : count; 365 } 366 static DEVICE_ATTR(dispatching, 0644, dispatching_show, 367 dispatching_store); 368 369 static ssize_t cpu_polarization_show(struct device *dev, 370 struct device_attribute *attr, char *buf) 371 { 372 int cpu = dev->id; 373 ssize_t count; 374 375 mutex_lock(&smp_cpu_state_mutex); 376 switch (smp_cpu_get_polarization(cpu)) { 377 case POLARIZATION_HRZ: 378 count = sprintf(buf, "horizontal\n"); 379 break; 380 case POLARIZATION_VL: 381 count = sprintf(buf, "vertical:low\n"); 382 break; 383 case POLARIZATION_VM: 384 count = sprintf(buf, "vertical:medium\n"); 385 break; 386 case POLARIZATION_VH: 387 count = sprintf(buf, "vertical:high\n"); 388 break; 389 default: 390 count = sprintf(buf, "unknown\n"); 391 break; 392 } 393 mutex_unlock(&smp_cpu_state_mutex); 394 return count; 395 } 396 static DEVICE_ATTR(polarization, 0444, cpu_polarization_show, NULL); 397 398 static struct attribute *topology_cpu_attrs[] = { 399 &dev_attr_polarization.attr, 400 NULL, 401 }; 402 403 static struct attribute_group topology_cpu_attr_group = { 404 .attrs = topology_cpu_attrs, 405 }; 406 407 int topology_cpu_init(struct cpu *cpu) 408 { 409 return sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group); 410 } 411 412 static const struct cpumask *cpu_thread_mask(int cpu) 413 { 414 return &cpu_topology[cpu].thread_mask; 415 } 416 417 418 const struct cpumask *cpu_coregroup_mask(int cpu) 419 { 420 return &cpu_topology[cpu].core_mask; 421 } 422 423 static const struct cpumask *cpu_book_mask(int cpu) 424 { 425 return &cpu_topology[cpu].book_mask; 426 } 427 428 static const struct cpumask *cpu_drawer_mask(int cpu) 429 { 430 return &cpu_topology[cpu].drawer_mask; 431 } 432 433 static struct sched_domain_topology_level s390_topology[] = { 434 { cpu_thread_mask, cpu_smt_flags, SD_INIT_NAME(SMT) }, 435 { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) }, 436 { cpu_book_mask, SD_INIT_NAME(BOOK) }, 437 { cpu_drawer_mask, SD_INIT_NAME(DRAWER) }, 438 { cpu_cpu_mask, SD_INIT_NAME(DIE) }, 439 { NULL, }, 440 }; 441 442 static void __init alloc_masks(struct sysinfo_15_1_x *info, 443 struct mask_info *mask, int offset) 444 { 445 int i, nr_masks; 446 447 nr_masks = info->mag[TOPOLOGY_NR_MAG - offset]; 448 for (i = 0; i < info->mnest - offset; i++) 449 nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i]; 450 nr_masks = max(nr_masks, 1); 451 for (i = 0; i < nr_masks; i++) { 452 mask->next = memblock_virt_alloc(sizeof(*mask->next), 8); 453 mask = mask->next; 454 } 455 } 456 457 void __init topology_init_early(void) 458 { 459 struct sysinfo_15_1_x *info; 460 461 set_sched_topology(s390_topology); 462 if (!MACHINE_HAS_TOPOLOGY) 463 goto out; 464 tl_info = memblock_virt_alloc(PAGE_SIZE, PAGE_SIZE); 465 info = tl_info; 466 store_topology(info); 467 pr_info("The CPU configuration topology of the machine is: %d %d %d %d %d %d / %d\n", 468 info->mag[0], info->mag[1], info->mag[2], info->mag[3], 469 info->mag[4], info->mag[5], info->mnest); 470 alloc_masks(info, &socket_info, 1); 471 alloc_masks(info, &book_info, 2); 472 alloc_masks(info, &drawer_info, 3); 473 out: 474 __arch_update_cpu_topology(); 475 } 476 477 static int __init topology_init(void) 478 { 479 if (MACHINE_HAS_TOPOLOGY) 480 set_topology_timer(); 481 else 482 topology_update_polarization_simple(); 483 return device_create_file(cpu_subsys.dev_root, &dev_attr_dispatching); 484 } 485 device_initcall(topology_init); 486