1 /* 2 * Copyright IBM Corp. 2007, 2011 3 * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> 4 */ 5 6 #define KMSG_COMPONENT "cpu" 7 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 8 9 #include <linux/workqueue.h> 10 #include <linux/cpuset.h> 11 #include <linux/device.h> 12 #include <linux/export.h> 13 #include <linux/kernel.h> 14 #include <linux/sched.h> 15 #include <linux/delay.h> 16 #include <linux/init.h> 17 #include <linux/slab.h> 18 #include <linux/cpu.h> 19 #include <linux/smp.h> 20 #include <linux/mm.h> 21 #include <linux/nodemask.h> 22 #include <linux/node.h> 23 #include <asm/sysinfo.h> 24 #include <asm/numa.h> 25 26 #define PTF_HORIZONTAL (0UL) 27 #define PTF_VERTICAL (1UL) 28 #define PTF_CHECK (2UL) 29 30 struct mask_info { 31 struct mask_info *next; 32 unsigned char id; 33 cpumask_t mask; 34 }; 35 36 static void set_topology_timer(void); 37 static void topology_work_fn(struct work_struct *work); 38 static struct sysinfo_15_1_x *tl_info; 39 40 static bool topology_enabled = true; 41 static DECLARE_WORK(topology_work, topology_work_fn); 42 43 /* 44 * Socket/Book linked lists and per_cpu(cpu_topology) updates are 45 * protected by "sched_domains_mutex". 46 */ 47 static struct mask_info socket_info; 48 static struct mask_info book_info; 49 static struct mask_info drawer_info; 50 51 DEFINE_PER_CPU(struct cpu_topology_s390, cpu_topology); 52 EXPORT_PER_CPU_SYMBOL_GPL(cpu_topology); 53 54 static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu) 55 { 56 cpumask_t mask; 57 58 cpumask_copy(&mask, cpumask_of(cpu)); 59 if (!topology_enabled || !MACHINE_HAS_TOPOLOGY) 60 return mask; 61 for (; info; info = info->next) { 62 if (cpumask_test_cpu(cpu, &info->mask)) 63 return info->mask; 64 } 65 return mask; 66 } 67 68 static cpumask_t cpu_thread_map(unsigned int cpu) 69 { 70 cpumask_t mask; 71 int i; 72 73 cpumask_copy(&mask, cpumask_of(cpu)); 74 if (!topology_enabled || !MACHINE_HAS_TOPOLOGY) 75 return mask; 76 cpu -= cpu % (smp_cpu_mtid + 1); 77 for (i = 0; i <= smp_cpu_mtid; i++) 78 if (cpu_present(cpu + i)) 79 cpumask_set_cpu(cpu + i, &mask); 80 return mask; 81 } 82 83 static void add_cpus_to_mask(struct topology_core *tl_core, 84 struct mask_info *drawer, 85 struct mask_info *book, 86 struct mask_info *socket) 87 { 88 struct cpu_topology_s390 *topo; 89 unsigned int core; 90 91 for_each_set_bit(core, &tl_core->mask[0], TOPOLOGY_CORE_BITS) { 92 unsigned int rcore; 93 int lcpu, i; 94 95 rcore = TOPOLOGY_CORE_BITS - 1 - core + tl_core->origin; 96 lcpu = smp_find_processor_id(rcore << smp_cpu_mt_shift); 97 if (lcpu < 0) 98 continue; 99 for (i = 0; i <= smp_cpu_mtid; i++) { 100 topo = &per_cpu(cpu_topology, lcpu + i); 101 topo->drawer_id = drawer->id; 102 topo->book_id = book->id; 103 topo->socket_id = socket->id; 104 topo->core_id = rcore; 105 topo->thread_id = lcpu + i; 106 cpumask_set_cpu(lcpu + i, &drawer->mask); 107 cpumask_set_cpu(lcpu + i, &book->mask); 108 cpumask_set_cpu(lcpu + i, &socket->mask); 109 smp_cpu_set_polarization(lcpu + i, tl_core->pp); 110 } 111 } 112 } 113 114 static void clear_masks(void) 115 { 116 struct mask_info *info; 117 118 info = &socket_info; 119 while (info) { 120 cpumask_clear(&info->mask); 121 info = info->next; 122 } 123 info = &book_info; 124 while (info) { 125 cpumask_clear(&info->mask); 126 info = info->next; 127 } 128 info = &drawer_info; 129 while (info) { 130 cpumask_clear(&info->mask); 131 info = info->next; 132 } 133 } 134 135 static union topology_entry *next_tle(union topology_entry *tle) 136 { 137 if (!tle->nl) 138 return (union topology_entry *)((struct topology_core *)tle + 1); 139 return (union topology_entry *)((struct topology_container *)tle + 1); 140 } 141 142 static void tl_to_masks(struct sysinfo_15_1_x *info) 143 { 144 struct mask_info *socket = &socket_info; 145 struct mask_info *book = &book_info; 146 struct mask_info *drawer = &drawer_info; 147 union topology_entry *tle, *end; 148 149 clear_masks(); 150 tle = info->tle; 151 end = (union topology_entry *)((unsigned long)info + info->length); 152 while (tle < end) { 153 switch (tle->nl) { 154 case 3: 155 drawer = drawer->next; 156 drawer->id = tle->container.id; 157 break; 158 case 2: 159 book = book->next; 160 book->id = tle->container.id; 161 break; 162 case 1: 163 socket = socket->next; 164 socket->id = tle->container.id; 165 break; 166 case 0: 167 add_cpus_to_mask(&tle->cpu, drawer, book, socket); 168 break; 169 default: 170 clear_masks(); 171 return; 172 } 173 tle = next_tle(tle); 174 } 175 } 176 177 static void topology_update_polarization_simple(void) 178 { 179 int cpu; 180 181 mutex_lock(&smp_cpu_state_mutex); 182 for_each_possible_cpu(cpu) 183 smp_cpu_set_polarization(cpu, POLARIZATION_HRZ); 184 mutex_unlock(&smp_cpu_state_mutex); 185 } 186 187 static int ptf(unsigned long fc) 188 { 189 int rc; 190 191 asm volatile( 192 " .insn rre,0xb9a20000,%1,%1\n" 193 " ipm %0\n" 194 " srl %0,28\n" 195 : "=d" (rc) 196 : "d" (fc) : "cc"); 197 return rc; 198 } 199 200 int topology_set_cpu_management(int fc) 201 { 202 int cpu, rc; 203 204 if (!MACHINE_HAS_TOPOLOGY) 205 return -EOPNOTSUPP; 206 if (fc) 207 rc = ptf(PTF_VERTICAL); 208 else 209 rc = ptf(PTF_HORIZONTAL); 210 if (rc) 211 return -EBUSY; 212 for_each_possible_cpu(cpu) 213 smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); 214 return rc; 215 } 216 217 static void update_cpu_masks(void) 218 { 219 struct cpu_topology_s390 *topo; 220 int cpu; 221 222 for_each_possible_cpu(cpu) { 223 topo = &per_cpu(cpu_topology, cpu); 224 topo->thread_mask = cpu_thread_map(cpu); 225 topo->core_mask = cpu_group_map(&socket_info, cpu); 226 topo->book_mask = cpu_group_map(&book_info, cpu); 227 topo->drawer_mask = cpu_group_map(&drawer_info, cpu); 228 if (!MACHINE_HAS_TOPOLOGY) { 229 topo->thread_id = cpu; 230 topo->core_id = cpu; 231 topo->socket_id = cpu; 232 topo->book_id = cpu; 233 topo->drawer_id = cpu; 234 } 235 } 236 numa_update_cpu_topology(); 237 } 238 239 void store_topology(struct sysinfo_15_1_x *info) 240 { 241 stsi(info, 15, 1, min(topology_max_mnest, 4)); 242 } 243 244 int arch_update_cpu_topology(void) 245 { 246 struct sysinfo_15_1_x *info = tl_info; 247 struct device *dev; 248 int cpu, rc = 0; 249 250 if (MACHINE_HAS_TOPOLOGY) { 251 rc = 1; 252 store_topology(info); 253 tl_to_masks(info); 254 } 255 update_cpu_masks(); 256 if (!MACHINE_HAS_TOPOLOGY) 257 topology_update_polarization_simple(); 258 for_each_online_cpu(cpu) { 259 dev = get_cpu_device(cpu); 260 kobject_uevent(&dev->kobj, KOBJ_CHANGE); 261 } 262 return rc; 263 } 264 265 static void topology_work_fn(struct work_struct *work) 266 { 267 rebuild_sched_domains(); 268 } 269 270 void topology_schedule_update(void) 271 { 272 schedule_work(&topology_work); 273 } 274 275 static void topology_timer_fn(unsigned long ignored) 276 { 277 if (ptf(PTF_CHECK)) 278 topology_schedule_update(); 279 set_topology_timer(); 280 } 281 282 static struct timer_list topology_timer = 283 TIMER_DEFERRED_INITIALIZER(topology_timer_fn, 0, 0); 284 285 static atomic_t topology_poll = ATOMIC_INIT(0); 286 287 static void set_topology_timer(void) 288 { 289 if (atomic_add_unless(&topology_poll, -1, 0)) 290 mod_timer(&topology_timer, jiffies + HZ / 10); 291 else 292 mod_timer(&topology_timer, jiffies + HZ * 60); 293 } 294 295 void topology_expect_change(void) 296 { 297 if (!MACHINE_HAS_TOPOLOGY) 298 return; 299 /* This is racy, but it doesn't matter since it is just a heuristic. 300 * Worst case is that we poll in a higher frequency for a bit longer. 301 */ 302 if (atomic_read(&topology_poll) > 60) 303 return; 304 atomic_add(60, &topology_poll); 305 set_topology_timer(); 306 } 307 308 static int cpu_management; 309 310 static ssize_t dispatching_show(struct device *dev, 311 struct device_attribute *attr, 312 char *buf) 313 { 314 ssize_t count; 315 316 mutex_lock(&smp_cpu_state_mutex); 317 count = sprintf(buf, "%d\n", cpu_management); 318 mutex_unlock(&smp_cpu_state_mutex); 319 return count; 320 } 321 322 static ssize_t dispatching_store(struct device *dev, 323 struct device_attribute *attr, 324 const char *buf, 325 size_t count) 326 { 327 int val, rc; 328 char delim; 329 330 if (sscanf(buf, "%d %c", &val, &delim) != 1) 331 return -EINVAL; 332 if (val != 0 && val != 1) 333 return -EINVAL; 334 rc = 0; 335 get_online_cpus(); 336 mutex_lock(&smp_cpu_state_mutex); 337 if (cpu_management == val) 338 goto out; 339 rc = topology_set_cpu_management(val); 340 if (rc) 341 goto out; 342 cpu_management = val; 343 topology_expect_change(); 344 out: 345 mutex_unlock(&smp_cpu_state_mutex); 346 put_online_cpus(); 347 return rc ? rc : count; 348 } 349 static DEVICE_ATTR(dispatching, 0644, dispatching_show, 350 dispatching_store); 351 352 static ssize_t cpu_polarization_show(struct device *dev, 353 struct device_attribute *attr, char *buf) 354 { 355 int cpu = dev->id; 356 ssize_t count; 357 358 mutex_lock(&smp_cpu_state_mutex); 359 switch (smp_cpu_get_polarization(cpu)) { 360 case POLARIZATION_HRZ: 361 count = sprintf(buf, "horizontal\n"); 362 break; 363 case POLARIZATION_VL: 364 count = sprintf(buf, "vertical:low\n"); 365 break; 366 case POLARIZATION_VM: 367 count = sprintf(buf, "vertical:medium\n"); 368 break; 369 case POLARIZATION_VH: 370 count = sprintf(buf, "vertical:high\n"); 371 break; 372 default: 373 count = sprintf(buf, "unknown\n"); 374 break; 375 } 376 mutex_unlock(&smp_cpu_state_mutex); 377 return count; 378 } 379 static DEVICE_ATTR(polarization, 0444, cpu_polarization_show, NULL); 380 381 static struct attribute *topology_cpu_attrs[] = { 382 &dev_attr_polarization.attr, 383 NULL, 384 }; 385 386 static struct attribute_group topology_cpu_attr_group = { 387 .attrs = topology_cpu_attrs, 388 }; 389 390 int topology_cpu_init(struct cpu *cpu) 391 { 392 return sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group); 393 } 394 395 static const struct cpumask *cpu_thread_mask(int cpu) 396 { 397 return &per_cpu(cpu_topology, cpu).thread_mask; 398 } 399 400 401 const struct cpumask *cpu_coregroup_mask(int cpu) 402 { 403 return &per_cpu(cpu_topology, cpu).core_mask; 404 } 405 406 static const struct cpumask *cpu_book_mask(int cpu) 407 { 408 return &per_cpu(cpu_topology, cpu).book_mask; 409 } 410 411 static const struct cpumask *cpu_drawer_mask(int cpu) 412 { 413 return &per_cpu(cpu_topology, cpu).drawer_mask; 414 } 415 416 static int __init early_parse_topology(char *p) 417 { 418 return kstrtobool(p, &topology_enabled); 419 } 420 early_param("topology", early_parse_topology); 421 422 static struct sched_domain_topology_level s390_topology[] = { 423 { cpu_thread_mask, cpu_smt_flags, SD_INIT_NAME(SMT) }, 424 { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) }, 425 { cpu_book_mask, SD_INIT_NAME(BOOK) }, 426 { cpu_drawer_mask, SD_INIT_NAME(DRAWER) }, 427 { cpu_cpu_mask, SD_INIT_NAME(DIE) }, 428 { NULL, }, 429 }; 430 431 static void __init alloc_masks(struct sysinfo_15_1_x *info, 432 struct mask_info *mask, int offset) 433 { 434 int i, nr_masks; 435 436 nr_masks = info->mag[TOPOLOGY_NR_MAG - offset]; 437 for (i = 0; i < info->mnest - offset; i++) 438 nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i]; 439 nr_masks = max(nr_masks, 1); 440 for (i = 0; i < nr_masks; i++) { 441 mask->next = kzalloc(sizeof(*mask->next), GFP_KERNEL); 442 mask = mask->next; 443 } 444 } 445 446 static int __init s390_topology_init(void) 447 { 448 struct sysinfo_15_1_x *info; 449 int i; 450 451 if (!MACHINE_HAS_TOPOLOGY) 452 return 0; 453 tl_info = (struct sysinfo_15_1_x *)__get_free_page(GFP_KERNEL); 454 info = tl_info; 455 store_topology(info); 456 pr_info("The CPU configuration topology of the machine is:"); 457 for (i = 0; i < TOPOLOGY_NR_MAG; i++) 458 printk(KERN_CONT " %d", info->mag[i]); 459 printk(KERN_CONT " / %d\n", info->mnest); 460 alloc_masks(info, &socket_info, 1); 461 alloc_masks(info, &book_info, 2); 462 alloc_masks(info, &drawer_info, 3); 463 set_sched_topology(s390_topology); 464 return 0; 465 } 466 early_initcall(s390_topology_init); 467 468 static int __init topology_init(void) 469 { 470 if (MACHINE_HAS_TOPOLOGY) 471 set_topology_timer(); 472 else 473 topology_update_polarization_simple(); 474 return device_create_file(cpu_subsys.dev_root, &dev_attr_dispatching); 475 } 476 device_initcall(topology_init); 477