1 /* $Id: bbc_envctrl.c,v 1.4 2001/04/06 16:48:08 davem Exp $ 2 * bbc_envctrl.c: UltraSPARC-III environment control driver. 3 * 4 * Copyright (C) 2001 David S. Miller (davem@redhat.com) 5 */ 6 7 #include <linux/kthread.h> 8 #include <linux/delay.h> 9 #include <linux/kmod.h> 10 #include <linux/reboot.h> 11 #include <asm/oplib.h> 12 #include <asm/ebus.h> 13 14 #include "bbc_i2c.h" 15 #include "max1617.h" 16 17 #undef ENVCTRL_TRACE 18 19 /* WARNING: Making changes to this driver is very dangerous. 20 * If you misprogram the sensor chips they can 21 * cut the power on you instantly. 22 */ 23 24 /* Two temperature sensors exist in the SunBLADE-1000 enclosure. 25 * Both are implemented using max1617 i2c devices. Each max1617 26 * monitors 2 temperatures, one for one of the cpu dies and the other 27 * for the ambient temperature. 28 * 29 * The max1617 is capable of being programmed with power-off 30 * temperature values, one low limit and one high limit. These 31 * can be controlled independently for the cpu or ambient temperature. 32 * If a limit is violated, the power is simply shut off. The frequency 33 * with which the max1617 does temperature sampling can be controlled 34 * as well. 35 * 36 * Three fans exist inside the machine, all three are controlled with 37 * an i2c digital to analog converter. There is a fan directed at the 38 * two processor slots, another for the rest of the enclosure, and the 39 * third is for the power supply. The first two fans may be speed 40 * controlled by changing the voltage fed to them. The third fan may 41 * only be completely off or on. The third fan is meant to only be 42 * disabled/enabled when entering/exiting the lowest power-saving 43 * mode of the machine. 44 * 45 * An environmental control kernel thread periodically monitors all 46 * temperature sensors. Based upon the samples it will adjust the 47 * fan speeds to try and keep the system within a certain temperature 48 * range (the goal being to make the fans as quiet as possible without 49 * allowing the system to get too hot). 50 * 51 * If the temperature begins to rise/fall outside of the acceptable 52 * operating range, a periodic warning will be sent to the kernel log. 53 * The fans will be put on full blast to attempt to deal with this 54 * situation. After exceeding the acceptable operating range by a 55 * certain threshold, the kernel thread will shut down the system. 56 * Here, the thread is attempting to shut the machine down cleanly 57 * before the hardware based power-off event is triggered. 58 */ 59 60 /* These settings are in Celsius. We use these defaults only 61 * if we cannot interrogate the cpu-fru SEEPROM. 62 */ 63 struct temp_limits { 64 s8 high_pwroff, high_shutdown, high_warn; 65 s8 low_warn, low_shutdown, low_pwroff; 66 }; 67 68 static struct temp_limits cpu_temp_limits[2] = { 69 { 100, 85, 80, 5, -5, -10 }, 70 { 100, 85, 80, 5, -5, -10 }, 71 }; 72 73 static struct temp_limits amb_temp_limits[2] = { 74 { 65, 55, 40, 5, -5, -10 }, 75 { 65, 55, 40, 5, -5, -10 }, 76 }; 77 78 enum fan_action { FAN_SLOWER, FAN_SAME, FAN_FASTER, FAN_FULLBLAST, FAN_STATE_MAX }; 79 80 struct bbc_cpu_temperature { 81 struct bbc_cpu_temperature *next; 82 83 struct bbc_i2c_client *client; 84 int index; 85 86 /* Current readings, and history. */ 87 s8 curr_cpu_temp; 88 s8 curr_amb_temp; 89 s8 prev_cpu_temp; 90 s8 prev_amb_temp; 91 s8 avg_cpu_temp; 92 s8 avg_amb_temp; 93 94 int sample_tick; 95 96 enum fan_action fan_todo[2]; 97 #define FAN_AMBIENT 0 98 #define FAN_CPU 1 99 }; 100 101 struct bbc_cpu_temperature *all_bbc_temps; 102 103 struct bbc_fan_control { 104 struct bbc_fan_control *next; 105 106 struct bbc_i2c_client *client; 107 int index; 108 109 int psupply_fan_on; 110 int cpu_fan_speed; 111 int system_fan_speed; 112 }; 113 114 struct bbc_fan_control *all_bbc_fans; 115 116 #define CPU_FAN_REG 0xf0 117 #define SYS_FAN_REG 0xf2 118 #define PSUPPLY_FAN_REG 0xf4 119 120 #define FAN_SPEED_MIN 0x0c 121 #define FAN_SPEED_MAX 0x3f 122 123 #define PSUPPLY_FAN_ON 0x1f 124 #define PSUPPLY_FAN_OFF 0x00 125 126 static void set_fan_speeds(struct bbc_fan_control *fp) 127 { 128 /* Put temperatures into range so we don't mis-program 129 * the hardware. 130 */ 131 if (fp->cpu_fan_speed < FAN_SPEED_MIN) 132 fp->cpu_fan_speed = FAN_SPEED_MIN; 133 if (fp->cpu_fan_speed > FAN_SPEED_MAX) 134 fp->cpu_fan_speed = FAN_SPEED_MAX; 135 if (fp->system_fan_speed < FAN_SPEED_MIN) 136 fp->system_fan_speed = FAN_SPEED_MIN; 137 if (fp->system_fan_speed > FAN_SPEED_MAX) 138 fp->system_fan_speed = FAN_SPEED_MAX; 139 #ifdef ENVCTRL_TRACE 140 printk("fan%d: Changed fan speed to cpu(%02x) sys(%02x)\n", 141 fp->index, 142 fp->cpu_fan_speed, fp->system_fan_speed); 143 #endif 144 145 bbc_i2c_writeb(fp->client, fp->cpu_fan_speed, CPU_FAN_REG); 146 bbc_i2c_writeb(fp->client, fp->system_fan_speed, SYS_FAN_REG); 147 bbc_i2c_writeb(fp->client, 148 (fp->psupply_fan_on ? 149 PSUPPLY_FAN_ON : PSUPPLY_FAN_OFF), 150 PSUPPLY_FAN_REG); 151 } 152 153 static void get_current_temps(struct bbc_cpu_temperature *tp) 154 { 155 tp->prev_amb_temp = tp->curr_amb_temp; 156 bbc_i2c_readb(tp->client, 157 (unsigned char *) &tp->curr_amb_temp, 158 MAX1617_AMB_TEMP); 159 tp->prev_cpu_temp = tp->curr_cpu_temp; 160 bbc_i2c_readb(tp->client, 161 (unsigned char *) &tp->curr_cpu_temp, 162 MAX1617_CPU_TEMP); 163 #ifdef ENVCTRL_TRACE 164 printk("temp%d: cpu(%d C) amb(%d C)\n", 165 tp->index, 166 (int) tp->curr_cpu_temp, (int) tp->curr_amb_temp); 167 #endif 168 } 169 170 171 static void do_envctrl_shutdown(struct bbc_cpu_temperature *tp) 172 { 173 static int shutting_down = 0; 174 char *type = "???"; 175 s8 val = -1; 176 177 if (shutting_down != 0) 178 return; 179 180 if (tp->curr_amb_temp >= amb_temp_limits[tp->index].high_shutdown || 181 tp->curr_amb_temp < amb_temp_limits[tp->index].low_shutdown) { 182 type = "ambient"; 183 val = tp->curr_amb_temp; 184 } else if (tp->curr_cpu_temp >= cpu_temp_limits[tp->index].high_shutdown || 185 tp->curr_cpu_temp < cpu_temp_limits[tp->index].low_shutdown) { 186 type = "CPU"; 187 val = tp->curr_cpu_temp; 188 } 189 190 printk(KERN_CRIT "temp%d: Outside of safe %s " 191 "operating temperature, %d C.\n", 192 tp->index, type, val); 193 194 printk(KERN_CRIT "kenvctrld: Shutting down the system now.\n"); 195 196 shutting_down = 1; 197 if (orderly_poweroff(true) < 0) 198 printk(KERN_CRIT "envctrl: shutdown execution failed\n"); 199 } 200 201 #define WARN_INTERVAL (30 * HZ) 202 203 static void analyze_ambient_temp(struct bbc_cpu_temperature *tp, unsigned long *last_warn, int tick) 204 { 205 int ret = 0; 206 207 if (time_after(jiffies, (*last_warn + WARN_INTERVAL))) { 208 if (tp->curr_amb_temp >= 209 amb_temp_limits[tp->index].high_warn) { 210 printk(KERN_WARNING "temp%d: " 211 "Above safe ambient operating temperature, %d C.\n", 212 tp->index, (int) tp->curr_amb_temp); 213 ret = 1; 214 } else if (tp->curr_amb_temp < 215 amb_temp_limits[tp->index].low_warn) { 216 printk(KERN_WARNING "temp%d: " 217 "Below safe ambient operating temperature, %d C.\n", 218 tp->index, (int) tp->curr_amb_temp); 219 ret = 1; 220 } 221 if (ret) 222 *last_warn = jiffies; 223 } else if (tp->curr_amb_temp >= amb_temp_limits[tp->index].high_warn || 224 tp->curr_amb_temp < amb_temp_limits[tp->index].low_warn) 225 ret = 1; 226 227 /* Now check the shutdown limits. */ 228 if (tp->curr_amb_temp >= amb_temp_limits[tp->index].high_shutdown || 229 tp->curr_amb_temp < amb_temp_limits[tp->index].low_shutdown) { 230 do_envctrl_shutdown(tp); 231 ret = 1; 232 } 233 234 if (ret) { 235 tp->fan_todo[FAN_AMBIENT] = FAN_FULLBLAST; 236 } else if ((tick & (8 - 1)) == 0) { 237 s8 amb_goal_hi = amb_temp_limits[tp->index].high_warn - 10; 238 s8 amb_goal_lo; 239 240 amb_goal_lo = amb_goal_hi - 3; 241 242 /* We do not try to avoid 'too cold' events. Basically we 243 * only try to deal with over-heating and fan noise reduction. 244 */ 245 if (tp->avg_amb_temp < amb_goal_hi) { 246 if (tp->avg_amb_temp >= amb_goal_lo) 247 tp->fan_todo[FAN_AMBIENT] = FAN_SAME; 248 else 249 tp->fan_todo[FAN_AMBIENT] = FAN_SLOWER; 250 } else { 251 tp->fan_todo[FAN_AMBIENT] = FAN_FASTER; 252 } 253 } else { 254 tp->fan_todo[FAN_AMBIENT] = FAN_SAME; 255 } 256 } 257 258 static void analyze_cpu_temp(struct bbc_cpu_temperature *tp, unsigned long *last_warn, int tick) 259 { 260 int ret = 0; 261 262 if (time_after(jiffies, (*last_warn + WARN_INTERVAL))) { 263 if (tp->curr_cpu_temp >= 264 cpu_temp_limits[tp->index].high_warn) { 265 printk(KERN_WARNING "temp%d: " 266 "Above safe CPU operating temperature, %d C.\n", 267 tp->index, (int) tp->curr_cpu_temp); 268 ret = 1; 269 } else if (tp->curr_cpu_temp < 270 cpu_temp_limits[tp->index].low_warn) { 271 printk(KERN_WARNING "temp%d: " 272 "Below safe CPU operating temperature, %d C.\n", 273 tp->index, (int) tp->curr_cpu_temp); 274 ret = 1; 275 } 276 if (ret) 277 *last_warn = jiffies; 278 } else if (tp->curr_cpu_temp >= cpu_temp_limits[tp->index].high_warn || 279 tp->curr_cpu_temp < cpu_temp_limits[tp->index].low_warn) 280 ret = 1; 281 282 /* Now check the shutdown limits. */ 283 if (tp->curr_cpu_temp >= cpu_temp_limits[tp->index].high_shutdown || 284 tp->curr_cpu_temp < cpu_temp_limits[tp->index].low_shutdown) { 285 do_envctrl_shutdown(tp); 286 ret = 1; 287 } 288 289 if (ret) { 290 tp->fan_todo[FAN_CPU] = FAN_FULLBLAST; 291 } else if ((tick & (8 - 1)) == 0) { 292 s8 cpu_goal_hi = cpu_temp_limits[tp->index].high_warn - 10; 293 s8 cpu_goal_lo; 294 295 cpu_goal_lo = cpu_goal_hi - 3; 296 297 /* We do not try to avoid 'too cold' events. Basically we 298 * only try to deal with over-heating and fan noise reduction. 299 */ 300 if (tp->avg_cpu_temp < cpu_goal_hi) { 301 if (tp->avg_cpu_temp >= cpu_goal_lo) 302 tp->fan_todo[FAN_CPU] = FAN_SAME; 303 else 304 tp->fan_todo[FAN_CPU] = FAN_SLOWER; 305 } else { 306 tp->fan_todo[FAN_CPU] = FAN_FASTER; 307 } 308 } else { 309 tp->fan_todo[FAN_CPU] = FAN_SAME; 310 } 311 } 312 313 static void analyze_temps(struct bbc_cpu_temperature *tp, unsigned long *last_warn) 314 { 315 tp->avg_amb_temp = (s8)((int)((int)tp->avg_amb_temp + (int)tp->curr_amb_temp) / 2); 316 tp->avg_cpu_temp = (s8)((int)((int)tp->avg_cpu_temp + (int)tp->curr_cpu_temp) / 2); 317 318 analyze_ambient_temp(tp, last_warn, tp->sample_tick); 319 analyze_cpu_temp(tp, last_warn, tp->sample_tick); 320 321 tp->sample_tick++; 322 } 323 324 static enum fan_action prioritize_fan_action(int which_fan) 325 { 326 struct bbc_cpu_temperature *tp; 327 enum fan_action decision = FAN_STATE_MAX; 328 329 /* Basically, prioritize what the temperature sensors 330 * recommend we do, and perform that action on all the 331 * fans. 332 */ 333 for (tp = all_bbc_temps; tp; tp = tp->next) { 334 if (tp->fan_todo[which_fan] == FAN_FULLBLAST) { 335 decision = FAN_FULLBLAST; 336 break; 337 } 338 if (tp->fan_todo[which_fan] == FAN_SAME && 339 decision != FAN_FASTER) 340 decision = FAN_SAME; 341 else if (tp->fan_todo[which_fan] == FAN_FASTER) 342 decision = FAN_FASTER; 343 else if (decision != FAN_FASTER && 344 decision != FAN_SAME && 345 tp->fan_todo[which_fan] == FAN_SLOWER) 346 decision = FAN_SLOWER; 347 } 348 if (decision == FAN_STATE_MAX) 349 decision = FAN_SAME; 350 351 return decision; 352 } 353 354 static int maybe_new_ambient_fan_speed(struct bbc_fan_control *fp) 355 { 356 enum fan_action decision = prioritize_fan_action(FAN_AMBIENT); 357 int ret; 358 359 if (decision == FAN_SAME) 360 return 0; 361 362 ret = 1; 363 if (decision == FAN_FULLBLAST) { 364 if (fp->system_fan_speed >= FAN_SPEED_MAX) 365 ret = 0; 366 else 367 fp->system_fan_speed = FAN_SPEED_MAX; 368 } else { 369 if (decision == FAN_FASTER) { 370 if (fp->system_fan_speed >= FAN_SPEED_MAX) 371 ret = 0; 372 else 373 fp->system_fan_speed += 2; 374 } else { 375 int orig_speed = fp->system_fan_speed; 376 377 if (orig_speed <= FAN_SPEED_MIN || 378 orig_speed <= (fp->cpu_fan_speed - 3)) 379 ret = 0; 380 else 381 fp->system_fan_speed -= 1; 382 } 383 } 384 385 return ret; 386 } 387 388 static int maybe_new_cpu_fan_speed(struct bbc_fan_control *fp) 389 { 390 enum fan_action decision = prioritize_fan_action(FAN_CPU); 391 int ret; 392 393 if (decision == FAN_SAME) 394 return 0; 395 396 ret = 1; 397 if (decision == FAN_FULLBLAST) { 398 if (fp->cpu_fan_speed >= FAN_SPEED_MAX) 399 ret = 0; 400 else 401 fp->cpu_fan_speed = FAN_SPEED_MAX; 402 } else { 403 if (decision == FAN_FASTER) { 404 if (fp->cpu_fan_speed >= FAN_SPEED_MAX) 405 ret = 0; 406 else { 407 fp->cpu_fan_speed += 2; 408 if (fp->system_fan_speed < 409 (fp->cpu_fan_speed - 3)) 410 fp->system_fan_speed = 411 fp->cpu_fan_speed - 3; 412 } 413 } else { 414 if (fp->cpu_fan_speed <= FAN_SPEED_MIN) 415 ret = 0; 416 else 417 fp->cpu_fan_speed -= 1; 418 } 419 } 420 421 return ret; 422 } 423 424 static void maybe_new_fan_speeds(struct bbc_fan_control *fp) 425 { 426 int new; 427 428 new = maybe_new_ambient_fan_speed(fp); 429 new |= maybe_new_cpu_fan_speed(fp); 430 431 if (new) 432 set_fan_speeds(fp); 433 } 434 435 static void fans_full_blast(void) 436 { 437 struct bbc_fan_control *fp; 438 439 /* Since we will not be monitoring things anymore, put 440 * the fans on full blast. 441 */ 442 for (fp = all_bbc_fans; fp; fp = fp->next) { 443 fp->cpu_fan_speed = FAN_SPEED_MAX; 444 fp->system_fan_speed = FAN_SPEED_MAX; 445 fp->psupply_fan_on = 1; 446 set_fan_speeds(fp); 447 } 448 } 449 450 #define POLL_INTERVAL (5 * 1000) 451 static unsigned long last_warning_jiffies; 452 static struct task_struct *kenvctrld_task; 453 454 static int kenvctrld(void *__unused) 455 { 456 printk(KERN_INFO "bbc_envctrl: kenvctrld starting...\n"); 457 last_warning_jiffies = jiffies - WARN_INTERVAL; 458 for (;;) { 459 struct bbc_cpu_temperature *tp; 460 struct bbc_fan_control *fp; 461 462 msleep_interruptible(POLL_INTERVAL); 463 if (kthread_should_stop()) 464 break; 465 466 for (tp = all_bbc_temps; tp; tp = tp->next) { 467 get_current_temps(tp); 468 analyze_temps(tp, &last_warning_jiffies); 469 } 470 for (fp = all_bbc_fans; fp; fp = fp->next) 471 maybe_new_fan_speeds(fp); 472 } 473 printk(KERN_INFO "bbc_envctrl: kenvctrld exiting...\n"); 474 475 fans_full_blast(); 476 477 return 0; 478 } 479 480 static void attach_one_temp(struct linux_ebus_child *echild, int temp_idx) 481 { 482 struct bbc_cpu_temperature *tp = kmalloc(sizeof(*tp), GFP_KERNEL); 483 484 if (!tp) 485 return; 486 memset(tp, 0, sizeof(*tp)); 487 tp->client = bbc_i2c_attach(echild); 488 if (!tp->client) { 489 kfree(tp); 490 return; 491 } 492 493 tp->index = temp_idx; 494 { 495 struct bbc_cpu_temperature **tpp = &all_bbc_temps; 496 while (*tpp) 497 tpp = &((*tpp)->next); 498 tp->next = NULL; 499 *tpp = tp; 500 } 501 502 /* Tell it to convert once every 5 seconds, clear all cfg 503 * bits. 504 */ 505 bbc_i2c_writeb(tp->client, 0x00, MAX1617_WR_CFG_BYTE); 506 bbc_i2c_writeb(tp->client, 0x02, MAX1617_WR_CVRATE_BYTE); 507 508 /* Program the hard temperature limits into the chip. */ 509 bbc_i2c_writeb(tp->client, amb_temp_limits[tp->index].high_pwroff, 510 MAX1617_WR_AMB_HIGHLIM); 511 bbc_i2c_writeb(tp->client, amb_temp_limits[tp->index].low_pwroff, 512 MAX1617_WR_AMB_LOWLIM); 513 bbc_i2c_writeb(tp->client, cpu_temp_limits[tp->index].high_pwroff, 514 MAX1617_WR_CPU_HIGHLIM); 515 bbc_i2c_writeb(tp->client, cpu_temp_limits[tp->index].low_pwroff, 516 MAX1617_WR_CPU_LOWLIM); 517 518 get_current_temps(tp); 519 tp->prev_cpu_temp = tp->avg_cpu_temp = tp->curr_cpu_temp; 520 tp->prev_amb_temp = tp->avg_amb_temp = tp->curr_amb_temp; 521 522 tp->fan_todo[FAN_AMBIENT] = FAN_SAME; 523 tp->fan_todo[FAN_CPU] = FAN_SAME; 524 } 525 526 static void attach_one_fan(struct linux_ebus_child *echild, int fan_idx) 527 { 528 struct bbc_fan_control *fp = kmalloc(sizeof(*fp), GFP_KERNEL); 529 530 if (!fp) 531 return; 532 memset(fp, 0, sizeof(*fp)); 533 fp->client = bbc_i2c_attach(echild); 534 if (!fp->client) { 535 kfree(fp); 536 return; 537 } 538 539 fp->index = fan_idx; 540 541 { 542 struct bbc_fan_control **fpp = &all_bbc_fans; 543 while (*fpp) 544 fpp = &((*fpp)->next); 545 fp->next = NULL; 546 *fpp = fp; 547 } 548 549 /* The i2c device controlling the fans is write-only. 550 * So the only way to keep track of the current power 551 * level fed to the fans is via software. Choose half 552 * power for cpu/system and 'on' fo the powersupply fan 553 * and set it now. 554 */ 555 fp->psupply_fan_on = 1; 556 fp->cpu_fan_speed = (FAN_SPEED_MAX - FAN_SPEED_MIN) / 2; 557 fp->cpu_fan_speed += FAN_SPEED_MIN; 558 fp->system_fan_speed = (FAN_SPEED_MAX - FAN_SPEED_MIN) / 2; 559 fp->system_fan_speed += FAN_SPEED_MIN; 560 561 set_fan_speeds(fp); 562 } 563 564 int bbc_envctrl_init(void) 565 { 566 struct linux_ebus_child *echild; 567 int temp_index = 0; 568 int fan_index = 0; 569 int devidx = 0; 570 571 while ((echild = bbc_i2c_getdev(devidx++)) != NULL) { 572 if (!strcmp(echild->prom_node->name, "temperature")) 573 attach_one_temp(echild, temp_index++); 574 if (!strcmp(echild->prom_node->name, "fan-control")) 575 attach_one_fan(echild, fan_index++); 576 } 577 if (temp_index != 0 && fan_index != 0) { 578 kenvctrld_task = kthread_run(kenvctrld, NULL, "kenvctrld"); 579 if (IS_ERR(kenvctrld_task)) 580 return PTR_ERR(kenvctrld_task); 581 } 582 583 return 0; 584 } 585 586 static void destroy_one_temp(struct bbc_cpu_temperature *tp) 587 { 588 bbc_i2c_detach(tp->client); 589 kfree(tp); 590 } 591 592 static void destroy_one_fan(struct bbc_fan_control *fp) 593 { 594 bbc_i2c_detach(fp->client); 595 kfree(fp); 596 } 597 598 void bbc_envctrl_cleanup(void) 599 { 600 struct bbc_cpu_temperature *tp; 601 struct bbc_fan_control *fp; 602 603 kthread_stop(kenvctrld_task); 604 605 tp = all_bbc_temps; 606 while (tp != NULL) { 607 struct bbc_cpu_temperature *next = tp->next; 608 destroy_one_temp(tp); 609 tp = next; 610 } 611 all_bbc_temps = NULL; 612 613 fp = all_bbc_fans; 614 while (fp != NULL) { 615 struct bbc_fan_control *next = fp->next; 616 destroy_one_fan(fp); 617 fp = next; 618 } 619 all_bbc_fans = NULL; 620 } 621