1 /* $Id: bbc_envctrl.c,v 1.4 2001/04/06 16:48:08 davem Exp $ 2 * bbc_envctrl.c: UltraSPARC-III environment control driver. 3 * 4 * Copyright (C) 2001 David S. Miller (davem@redhat.com) 5 */ 6 7 #include <linux/kernel.h> 8 #include <linux/sched.h> 9 #include <linux/slab.h> 10 #include <linux/delay.h> 11 #include <asm/oplib.h> 12 #include <asm/ebus.h> 13 #define __KERNEL_SYSCALLS__ 14 static int errno; 15 #include <asm/unistd.h> 16 17 #include "bbc_i2c.h" 18 #include "max1617.h" 19 20 #undef ENVCTRL_TRACE 21 22 /* WARNING: Making changes to this driver is very dangerous. 23 * If you misprogram the sensor chips they can 24 * cut the power on you instantly. 25 */ 26 27 /* Two temperature sensors exist in the SunBLADE-1000 enclosure. 28 * Both are implemented using max1617 i2c devices. Each max1617 29 * monitors 2 temperatures, one for one of the cpu dies and the other 30 * for the ambient temperature. 31 * 32 * The max1617 is capable of being programmed with power-off 33 * temperature values, one low limit and one high limit. These 34 * can be controlled independently for the cpu or ambient temperature. 35 * If a limit is violated, the power is simply shut off. The frequency 36 * with which the max1617 does temperature sampling can be controlled 37 * as well. 38 * 39 * Three fans exist inside the machine, all three are controlled with 40 * an i2c digital to analog converter. There is a fan directed at the 41 * two processor slots, another for the rest of the enclosure, and the 42 * third is for the power supply. The first two fans may be speed 43 * controlled by changing the voltage fed to them. The third fan may 44 * only be completely off or on. The third fan is meant to only be 45 * disabled/enabled when entering/exiting the lowest power-saving 46 * mode of the machine. 47 * 48 * An environmental control kernel thread periodically monitors all 49 * temperature sensors. Based upon the samples it will adjust the 50 * fan speeds to try and keep the system within a certain temperature 51 * range (the goal being to make the fans as quiet as possible without 52 * allowing the system to get too hot). 53 * 54 * If the temperature begins to rise/fall outside of the acceptable 55 * operating range, a periodic warning will be sent to the kernel log. 56 * The fans will be put on full blast to attempt to deal with this 57 * situation. After exceeding the acceptable operating range by a 58 * certain threshold, the kernel thread will shut down the system. 59 * Here, the thread is attempting to shut the machine down cleanly 60 * before the hardware based power-off event is triggered. 61 */ 62 63 /* These settings are in Celsius. We use these defaults only 64 * if we cannot interrogate the cpu-fru SEEPROM. 65 */ 66 struct temp_limits { 67 s8 high_pwroff, high_shutdown, high_warn; 68 s8 low_warn, low_shutdown, low_pwroff; 69 }; 70 71 static struct temp_limits cpu_temp_limits[2] = { 72 { 100, 85, 80, 5, -5, -10 }, 73 { 100, 85, 80, 5, -5, -10 }, 74 }; 75 76 static struct temp_limits amb_temp_limits[2] = { 77 { 65, 55, 40, 5, -5, -10 }, 78 { 65, 55, 40, 5, -5, -10 }, 79 }; 80 81 enum fan_action { FAN_SLOWER, FAN_SAME, FAN_FASTER, FAN_FULLBLAST, FAN_STATE_MAX }; 82 83 struct bbc_cpu_temperature { 84 struct bbc_cpu_temperature *next; 85 86 struct bbc_i2c_client *client; 87 int index; 88 89 /* Current readings, and history. */ 90 s8 curr_cpu_temp; 91 s8 curr_amb_temp; 92 s8 prev_cpu_temp; 93 s8 prev_amb_temp; 94 s8 avg_cpu_temp; 95 s8 avg_amb_temp; 96 97 int sample_tick; 98 99 enum fan_action fan_todo[2]; 100 #define FAN_AMBIENT 0 101 #define FAN_CPU 1 102 }; 103 104 struct bbc_cpu_temperature *all_bbc_temps; 105 106 struct bbc_fan_control { 107 struct bbc_fan_control *next; 108 109 struct bbc_i2c_client *client; 110 int index; 111 112 int psupply_fan_on; 113 int cpu_fan_speed; 114 int system_fan_speed; 115 }; 116 117 struct bbc_fan_control *all_bbc_fans; 118 119 #define CPU_FAN_REG 0xf0 120 #define SYS_FAN_REG 0xf2 121 #define PSUPPLY_FAN_REG 0xf4 122 123 #define FAN_SPEED_MIN 0x0c 124 #define FAN_SPEED_MAX 0x3f 125 126 #define PSUPPLY_FAN_ON 0x1f 127 #define PSUPPLY_FAN_OFF 0x00 128 129 static void set_fan_speeds(struct bbc_fan_control *fp) 130 { 131 /* Put temperatures into range so we don't mis-program 132 * the hardware. 133 */ 134 if (fp->cpu_fan_speed < FAN_SPEED_MIN) 135 fp->cpu_fan_speed = FAN_SPEED_MIN; 136 if (fp->cpu_fan_speed > FAN_SPEED_MAX) 137 fp->cpu_fan_speed = FAN_SPEED_MAX; 138 if (fp->system_fan_speed < FAN_SPEED_MIN) 139 fp->system_fan_speed = FAN_SPEED_MIN; 140 if (fp->system_fan_speed > FAN_SPEED_MAX) 141 fp->system_fan_speed = FAN_SPEED_MAX; 142 #ifdef ENVCTRL_TRACE 143 printk("fan%d: Changed fan speed to cpu(%02x) sys(%02x)\n", 144 fp->index, 145 fp->cpu_fan_speed, fp->system_fan_speed); 146 #endif 147 148 bbc_i2c_writeb(fp->client, fp->cpu_fan_speed, CPU_FAN_REG); 149 bbc_i2c_writeb(fp->client, fp->system_fan_speed, SYS_FAN_REG); 150 bbc_i2c_writeb(fp->client, 151 (fp->psupply_fan_on ? 152 PSUPPLY_FAN_ON : PSUPPLY_FAN_OFF), 153 PSUPPLY_FAN_REG); 154 } 155 156 static void get_current_temps(struct bbc_cpu_temperature *tp) 157 { 158 tp->prev_amb_temp = tp->curr_amb_temp; 159 bbc_i2c_readb(tp->client, 160 (unsigned char *) &tp->curr_amb_temp, 161 MAX1617_AMB_TEMP); 162 tp->prev_cpu_temp = tp->curr_cpu_temp; 163 bbc_i2c_readb(tp->client, 164 (unsigned char *) &tp->curr_cpu_temp, 165 MAX1617_CPU_TEMP); 166 #ifdef ENVCTRL_TRACE 167 printk("temp%d: cpu(%d C) amb(%d C)\n", 168 tp->index, 169 (int) tp->curr_cpu_temp, (int) tp->curr_amb_temp); 170 #endif 171 } 172 173 174 static void do_envctrl_shutdown(struct bbc_cpu_temperature *tp) 175 { 176 static int shutting_down = 0; 177 static char *envp[] = { "HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL }; 178 char *argv[] = { "/sbin/shutdown", "-h", "now", NULL }; 179 char *type = "???"; 180 s8 val = -1; 181 182 if (shutting_down != 0) 183 return; 184 185 if (tp->curr_amb_temp >= amb_temp_limits[tp->index].high_shutdown || 186 tp->curr_amb_temp < amb_temp_limits[tp->index].low_shutdown) { 187 type = "ambient"; 188 val = tp->curr_amb_temp; 189 } else if (tp->curr_cpu_temp >= cpu_temp_limits[tp->index].high_shutdown || 190 tp->curr_cpu_temp < cpu_temp_limits[tp->index].low_shutdown) { 191 type = "CPU"; 192 val = tp->curr_cpu_temp; 193 } 194 195 printk(KERN_CRIT "temp%d: Outside of safe %s " 196 "operating temperature, %d C.\n", 197 tp->index, type, val); 198 199 printk(KERN_CRIT "kenvctrld: Shutting down the system now.\n"); 200 201 shutting_down = 1; 202 if (execve("/sbin/shutdown", argv, envp) < 0) 203 printk(KERN_CRIT "envctrl: shutdown execution failed\n"); 204 } 205 206 #define WARN_INTERVAL (30 * HZ) 207 208 static void analyze_ambient_temp(struct bbc_cpu_temperature *tp, unsigned long *last_warn, int tick) 209 { 210 int ret = 0; 211 212 if (time_after(jiffies, (*last_warn + WARN_INTERVAL))) { 213 if (tp->curr_amb_temp >= 214 amb_temp_limits[tp->index].high_warn) { 215 printk(KERN_WARNING "temp%d: " 216 "Above safe ambient operating temperature, %d C.\n", 217 tp->index, (int) tp->curr_amb_temp); 218 ret = 1; 219 } else if (tp->curr_amb_temp < 220 amb_temp_limits[tp->index].low_warn) { 221 printk(KERN_WARNING "temp%d: " 222 "Below safe ambient operating temperature, %d C.\n", 223 tp->index, (int) tp->curr_amb_temp); 224 ret = 1; 225 } 226 if (ret) 227 *last_warn = jiffies; 228 } else if (tp->curr_amb_temp >= amb_temp_limits[tp->index].high_warn || 229 tp->curr_amb_temp < amb_temp_limits[tp->index].low_warn) 230 ret = 1; 231 232 /* Now check the shutdown limits. */ 233 if (tp->curr_amb_temp >= amb_temp_limits[tp->index].high_shutdown || 234 tp->curr_amb_temp < amb_temp_limits[tp->index].low_shutdown) { 235 do_envctrl_shutdown(tp); 236 ret = 1; 237 } 238 239 if (ret) { 240 tp->fan_todo[FAN_AMBIENT] = FAN_FULLBLAST; 241 } else if ((tick & (8 - 1)) == 0) { 242 s8 amb_goal_hi = amb_temp_limits[tp->index].high_warn - 10; 243 s8 amb_goal_lo; 244 245 amb_goal_lo = amb_goal_hi - 3; 246 247 /* We do not try to avoid 'too cold' events. Basically we 248 * only try to deal with over-heating and fan noise reduction. 249 */ 250 if (tp->avg_amb_temp < amb_goal_hi) { 251 if (tp->avg_amb_temp >= amb_goal_lo) 252 tp->fan_todo[FAN_AMBIENT] = FAN_SAME; 253 else 254 tp->fan_todo[FAN_AMBIENT] = FAN_SLOWER; 255 } else { 256 tp->fan_todo[FAN_AMBIENT] = FAN_FASTER; 257 } 258 } else { 259 tp->fan_todo[FAN_AMBIENT] = FAN_SAME; 260 } 261 } 262 263 static void analyze_cpu_temp(struct bbc_cpu_temperature *tp, unsigned long *last_warn, int tick) 264 { 265 int ret = 0; 266 267 if (time_after(jiffies, (*last_warn + WARN_INTERVAL))) { 268 if (tp->curr_cpu_temp >= 269 cpu_temp_limits[tp->index].high_warn) { 270 printk(KERN_WARNING "temp%d: " 271 "Above safe CPU operating temperature, %d C.\n", 272 tp->index, (int) tp->curr_cpu_temp); 273 ret = 1; 274 } else if (tp->curr_cpu_temp < 275 cpu_temp_limits[tp->index].low_warn) { 276 printk(KERN_WARNING "temp%d: " 277 "Below safe CPU operating temperature, %d C.\n", 278 tp->index, (int) tp->curr_cpu_temp); 279 ret = 1; 280 } 281 if (ret) 282 *last_warn = jiffies; 283 } else if (tp->curr_cpu_temp >= cpu_temp_limits[tp->index].high_warn || 284 tp->curr_cpu_temp < cpu_temp_limits[tp->index].low_warn) 285 ret = 1; 286 287 /* Now check the shutdown limits. */ 288 if (tp->curr_cpu_temp >= cpu_temp_limits[tp->index].high_shutdown || 289 tp->curr_cpu_temp < cpu_temp_limits[tp->index].low_shutdown) { 290 do_envctrl_shutdown(tp); 291 ret = 1; 292 } 293 294 if (ret) { 295 tp->fan_todo[FAN_CPU] = FAN_FULLBLAST; 296 } else if ((tick & (8 - 1)) == 0) { 297 s8 cpu_goal_hi = cpu_temp_limits[tp->index].high_warn - 10; 298 s8 cpu_goal_lo; 299 300 cpu_goal_lo = cpu_goal_hi - 3; 301 302 /* We do not try to avoid 'too cold' events. Basically we 303 * only try to deal with over-heating and fan noise reduction. 304 */ 305 if (tp->avg_cpu_temp < cpu_goal_hi) { 306 if (tp->avg_cpu_temp >= cpu_goal_lo) 307 tp->fan_todo[FAN_CPU] = FAN_SAME; 308 else 309 tp->fan_todo[FAN_CPU] = FAN_SLOWER; 310 } else { 311 tp->fan_todo[FAN_CPU] = FAN_FASTER; 312 } 313 } else { 314 tp->fan_todo[FAN_CPU] = FAN_SAME; 315 } 316 } 317 318 static void analyze_temps(struct bbc_cpu_temperature *tp, unsigned long *last_warn) 319 { 320 tp->avg_amb_temp = (s8)((int)((int)tp->avg_amb_temp + (int)tp->curr_amb_temp) / 2); 321 tp->avg_cpu_temp = (s8)((int)((int)tp->avg_cpu_temp + (int)tp->curr_cpu_temp) / 2); 322 323 analyze_ambient_temp(tp, last_warn, tp->sample_tick); 324 analyze_cpu_temp(tp, last_warn, tp->sample_tick); 325 326 tp->sample_tick++; 327 } 328 329 static enum fan_action prioritize_fan_action(int which_fan) 330 { 331 struct bbc_cpu_temperature *tp; 332 enum fan_action decision = FAN_STATE_MAX; 333 334 /* Basically, prioritize what the temperature sensors 335 * recommend we do, and perform that action on all the 336 * fans. 337 */ 338 for (tp = all_bbc_temps; tp; tp = tp->next) { 339 if (tp->fan_todo[which_fan] == FAN_FULLBLAST) { 340 decision = FAN_FULLBLAST; 341 break; 342 } 343 if (tp->fan_todo[which_fan] == FAN_SAME && 344 decision != FAN_FASTER) 345 decision = FAN_SAME; 346 else if (tp->fan_todo[which_fan] == FAN_FASTER) 347 decision = FAN_FASTER; 348 else if (decision != FAN_FASTER && 349 decision != FAN_SAME && 350 tp->fan_todo[which_fan] == FAN_SLOWER) 351 decision = FAN_SLOWER; 352 } 353 if (decision == FAN_STATE_MAX) 354 decision = FAN_SAME; 355 356 return decision; 357 } 358 359 static int maybe_new_ambient_fan_speed(struct bbc_fan_control *fp) 360 { 361 enum fan_action decision = prioritize_fan_action(FAN_AMBIENT); 362 int ret; 363 364 if (decision == FAN_SAME) 365 return 0; 366 367 ret = 1; 368 if (decision == FAN_FULLBLAST) { 369 if (fp->system_fan_speed >= FAN_SPEED_MAX) 370 ret = 0; 371 else 372 fp->system_fan_speed = FAN_SPEED_MAX; 373 } else { 374 if (decision == FAN_FASTER) { 375 if (fp->system_fan_speed >= FAN_SPEED_MAX) 376 ret = 0; 377 else 378 fp->system_fan_speed += 2; 379 } else { 380 int orig_speed = fp->system_fan_speed; 381 382 if (orig_speed <= FAN_SPEED_MIN || 383 orig_speed <= (fp->cpu_fan_speed - 3)) 384 ret = 0; 385 else 386 fp->system_fan_speed -= 1; 387 } 388 } 389 390 return ret; 391 } 392 393 static int maybe_new_cpu_fan_speed(struct bbc_fan_control *fp) 394 { 395 enum fan_action decision = prioritize_fan_action(FAN_CPU); 396 int ret; 397 398 if (decision == FAN_SAME) 399 return 0; 400 401 ret = 1; 402 if (decision == FAN_FULLBLAST) { 403 if (fp->cpu_fan_speed >= FAN_SPEED_MAX) 404 ret = 0; 405 else 406 fp->cpu_fan_speed = FAN_SPEED_MAX; 407 } else { 408 if (decision == FAN_FASTER) { 409 if (fp->cpu_fan_speed >= FAN_SPEED_MAX) 410 ret = 0; 411 else { 412 fp->cpu_fan_speed += 2; 413 if (fp->system_fan_speed < 414 (fp->cpu_fan_speed - 3)) 415 fp->system_fan_speed = 416 fp->cpu_fan_speed - 3; 417 } 418 } else { 419 if (fp->cpu_fan_speed <= FAN_SPEED_MIN) 420 ret = 0; 421 else 422 fp->cpu_fan_speed -= 1; 423 } 424 } 425 426 return ret; 427 } 428 429 static void maybe_new_fan_speeds(struct bbc_fan_control *fp) 430 { 431 int new; 432 433 new = maybe_new_ambient_fan_speed(fp); 434 new |= maybe_new_cpu_fan_speed(fp); 435 436 if (new) 437 set_fan_speeds(fp); 438 } 439 440 static void fans_full_blast(void) 441 { 442 struct bbc_fan_control *fp; 443 444 /* Since we will not be monitoring things anymore, put 445 * the fans on full blast. 446 */ 447 for (fp = all_bbc_fans; fp; fp = fp->next) { 448 fp->cpu_fan_speed = FAN_SPEED_MAX; 449 fp->system_fan_speed = FAN_SPEED_MAX; 450 fp->psupply_fan_on = 1; 451 set_fan_speeds(fp); 452 } 453 } 454 455 #define POLL_INTERVAL (5 * 1000) 456 static unsigned long last_warning_jiffies; 457 static struct task_struct *kenvctrld_task; 458 459 static int kenvctrld(void *__unused) 460 { 461 daemonize("kenvctrld"); 462 allow_signal(SIGKILL); 463 kenvctrld_task = current; 464 465 printk(KERN_INFO "bbc_envctrl: kenvctrld starting...\n"); 466 last_warning_jiffies = jiffies - WARN_INTERVAL; 467 for (;;) { 468 struct bbc_cpu_temperature *tp; 469 struct bbc_fan_control *fp; 470 471 msleep_interruptible(POLL_INTERVAL); 472 if (signal_pending(current)) 473 break; 474 475 for (tp = all_bbc_temps; tp; tp = tp->next) { 476 get_current_temps(tp); 477 analyze_temps(tp, &last_warning_jiffies); 478 } 479 for (fp = all_bbc_fans; fp; fp = fp->next) 480 maybe_new_fan_speeds(fp); 481 } 482 printk(KERN_INFO "bbc_envctrl: kenvctrld exiting...\n"); 483 484 fans_full_blast(); 485 486 return 0; 487 } 488 489 static void attach_one_temp(struct linux_ebus_child *echild, int temp_idx) 490 { 491 struct bbc_cpu_temperature *tp = kmalloc(sizeof(*tp), GFP_KERNEL); 492 493 if (!tp) 494 return; 495 memset(tp, 0, sizeof(*tp)); 496 tp->client = bbc_i2c_attach(echild); 497 if (!tp->client) { 498 kfree(tp); 499 return; 500 } 501 502 tp->index = temp_idx; 503 { 504 struct bbc_cpu_temperature **tpp = &all_bbc_temps; 505 while (*tpp) 506 tpp = &((*tpp)->next); 507 tp->next = NULL; 508 *tpp = tp; 509 } 510 511 /* Tell it to convert once every 5 seconds, clear all cfg 512 * bits. 513 */ 514 bbc_i2c_writeb(tp->client, 0x00, MAX1617_WR_CFG_BYTE); 515 bbc_i2c_writeb(tp->client, 0x02, MAX1617_WR_CVRATE_BYTE); 516 517 /* Program the hard temperature limits into the chip. */ 518 bbc_i2c_writeb(tp->client, amb_temp_limits[tp->index].high_pwroff, 519 MAX1617_WR_AMB_HIGHLIM); 520 bbc_i2c_writeb(tp->client, amb_temp_limits[tp->index].low_pwroff, 521 MAX1617_WR_AMB_LOWLIM); 522 bbc_i2c_writeb(tp->client, cpu_temp_limits[tp->index].high_pwroff, 523 MAX1617_WR_CPU_HIGHLIM); 524 bbc_i2c_writeb(tp->client, cpu_temp_limits[tp->index].low_pwroff, 525 MAX1617_WR_CPU_LOWLIM); 526 527 get_current_temps(tp); 528 tp->prev_cpu_temp = tp->avg_cpu_temp = tp->curr_cpu_temp; 529 tp->prev_amb_temp = tp->avg_amb_temp = tp->curr_amb_temp; 530 531 tp->fan_todo[FAN_AMBIENT] = FAN_SAME; 532 tp->fan_todo[FAN_CPU] = FAN_SAME; 533 } 534 535 static void attach_one_fan(struct linux_ebus_child *echild, int fan_idx) 536 { 537 struct bbc_fan_control *fp = kmalloc(sizeof(*fp), GFP_KERNEL); 538 539 if (!fp) 540 return; 541 memset(fp, 0, sizeof(*fp)); 542 fp->client = bbc_i2c_attach(echild); 543 if (!fp->client) { 544 kfree(fp); 545 return; 546 } 547 548 fp->index = fan_idx; 549 550 { 551 struct bbc_fan_control **fpp = &all_bbc_fans; 552 while (*fpp) 553 fpp = &((*fpp)->next); 554 fp->next = NULL; 555 *fpp = fp; 556 } 557 558 /* The i2c device controlling the fans is write-only. 559 * So the only way to keep track of the current power 560 * level fed to the fans is via software. Choose half 561 * power for cpu/system and 'on' fo the powersupply fan 562 * and set it now. 563 */ 564 fp->psupply_fan_on = 1; 565 fp->cpu_fan_speed = (FAN_SPEED_MAX - FAN_SPEED_MIN) / 2; 566 fp->cpu_fan_speed += FAN_SPEED_MIN; 567 fp->system_fan_speed = (FAN_SPEED_MAX - FAN_SPEED_MIN) / 2; 568 fp->system_fan_speed += FAN_SPEED_MIN; 569 570 set_fan_speeds(fp); 571 } 572 573 int bbc_envctrl_init(void) 574 { 575 struct linux_ebus_child *echild; 576 int temp_index = 0; 577 int fan_index = 0; 578 int devidx = 0; 579 int err = 0; 580 581 while ((echild = bbc_i2c_getdev(devidx++)) != NULL) { 582 if (!strcmp(echild->prom_name, "temperature")) 583 attach_one_temp(echild, temp_index++); 584 if (!strcmp(echild->prom_name, "fan-control")) 585 attach_one_fan(echild, fan_index++); 586 } 587 if (temp_index != 0 && fan_index != 0) 588 err = kernel_thread(kenvctrld, NULL, CLONE_FS | CLONE_FILES); 589 return err; 590 } 591 592 static void destroy_one_temp(struct bbc_cpu_temperature *tp) 593 { 594 bbc_i2c_detach(tp->client); 595 kfree(tp); 596 } 597 598 static void destroy_one_fan(struct bbc_fan_control *fp) 599 { 600 bbc_i2c_detach(fp->client); 601 kfree(fp); 602 } 603 604 void bbc_envctrl_cleanup(void) 605 { 606 struct bbc_cpu_temperature *tp; 607 struct bbc_fan_control *fp; 608 609 if (kenvctrld_task != NULL) { 610 force_sig(SIGKILL, kenvctrld_task); 611 for (;;) { 612 struct task_struct *p; 613 int found = 0; 614 615 read_lock(&tasklist_lock); 616 for_each_process(p) { 617 if (p == kenvctrld_task) { 618 found = 1; 619 break; 620 } 621 } 622 read_unlock(&tasklist_lock); 623 if (!found) 624 break; 625 msleep(1000); 626 } 627 kenvctrld_task = NULL; 628 } 629 630 tp = all_bbc_temps; 631 while (tp != NULL) { 632 struct bbc_cpu_temperature *next = tp->next; 633 destroy_one_temp(tp); 634 tp = next; 635 } 636 all_bbc_temps = NULL; 637 638 fp = all_bbc_fans; 639 while (fp != NULL) { 640 struct bbc_fan_control *next = fp->next; 641 destroy_one_fan(fp); 642 fp = next; 643 } 644 all_bbc_fans = NULL; 645 } 646