1 /* $Id: bbc_envctrl.c,v 1.4 2001/04/06 16:48:08 davem Exp $ 2 * bbc_envctrl.c: UltraSPARC-III environment control driver. 3 * 4 * Copyright (C) 2001 David S. Miller (davem@redhat.com) 5 */ 6 7 #define __KERNEL_SYSCALLS__ 8 static int errno; 9 10 #include <linux/kernel.h> 11 #include <linux/kthread.h> 12 #include <linux/sched.h> 13 #include <linux/slab.h> 14 #include <linux/delay.h> 15 #include <asm/oplib.h> 16 #include <asm/ebus.h> 17 18 #include "bbc_i2c.h" 19 #include "max1617.h" 20 21 #undef ENVCTRL_TRACE 22 23 /* WARNING: Making changes to this driver is very dangerous. 24 * If you misprogram the sensor chips they can 25 * cut the power on you instantly. 26 */ 27 28 /* Two temperature sensors exist in the SunBLADE-1000 enclosure. 29 * Both are implemented using max1617 i2c devices. Each max1617 30 * monitors 2 temperatures, one for one of the cpu dies and the other 31 * for the ambient temperature. 32 * 33 * The max1617 is capable of being programmed with power-off 34 * temperature values, one low limit and one high limit. These 35 * can be controlled independently for the cpu or ambient temperature. 36 * If a limit is violated, the power is simply shut off. The frequency 37 * with which the max1617 does temperature sampling can be controlled 38 * as well. 39 * 40 * Three fans exist inside the machine, all three are controlled with 41 * an i2c digital to analog converter. There is a fan directed at the 42 * two processor slots, another for the rest of the enclosure, and the 43 * third is for the power supply. The first two fans may be speed 44 * controlled by changing the voltage fed to them. The third fan may 45 * only be completely off or on. The third fan is meant to only be 46 * disabled/enabled when entering/exiting the lowest power-saving 47 * mode of the machine. 48 * 49 * An environmental control kernel thread periodically monitors all 50 * temperature sensors. Based upon the samples it will adjust the 51 * fan speeds to try and keep the system within a certain temperature 52 * range (the goal being to make the fans as quiet as possible without 53 * allowing the system to get too hot). 54 * 55 * If the temperature begins to rise/fall outside of the acceptable 56 * operating range, a periodic warning will be sent to the kernel log. 57 * The fans will be put on full blast to attempt to deal with this 58 * situation. After exceeding the acceptable operating range by a 59 * certain threshold, the kernel thread will shut down the system. 60 * Here, the thread is attempting to shut the machine down cleanly 61 * before the hardware based power-off event is triggered. 62 */ 63 64 /* These settings are in Celsius. We use these defaults only 65 * if we cannot interrogate the cpu-fru SEEPROM. 66 */ 67 struct temp_limits { 68 s8 high_pwroff, high_shutdown, high_warn; 69 s8 low_warn, low_shutdown, low_pwroff; 70 }; 71 72 static struct temp_limits cpu_temp_limits[2] = { 73 { 100, 85, 80, 5, -5, -10 }, 74 { 100, 85, 80, 5, -5, -10 }, 75 }; 76 77 static struct temp_limits amb_temp_limits[2] = { 78 { 65, 55, 40, 5, -5, -10 }, 79 { 65, 55, 40, 5, -5, -10 }, 80 }; 81 82 enum fan_action { FAN_SLOWER, FAN_SAME, FAN_FASTER, FAN_FULLBLAST, FAN_STATE_MAX }; 83 84 struct bbc_cpu_temperature { 85 struct bbc_cpu_temperature *next; 86 87 struct bbc_i2c_client *client; 88 int index; 89 90 /* Current readings, and history. */ 91 s8 curr_cpu_temp; 92 s8 curr_amb_temp; 93 s8 prev_cpu_temp; 94 s8 prev_amb_temp; 95 s8 avg_cpu_temp; 96 s8 avg_amb_temp; 97 98 int sample_tick; 99 100 enum fan_action fan_todo[2]; 101 #define FAN_AMBIENT 0 102 #define FAN_CPU 1 103 }; 104 105 struct bbc_cpu_temperature *all_bbc_temps; 106 107 struct bbc_fan_control { 108 struct bbc_fan_control *next; 109 110 struct bbc_i2c_client *client; 111 int index; 112 113 int psupply_fan_on; 114 int cpu_fan_speed; 115 int system_fan_speed; 116 }; 117 118 struct bbc_fan_control *all_bbc_fans; 119 120 #define CPU_FAN_REG 0xf0 121 #define SYS_FAN_REG 0xf2 122 #define PSUPPLY_FAN_REG 0xf4 123 124 #define FAN_SPEED_MIN 0x0c 125 #define FAN_SPEED_MAX 0x3f 126 127 #define PSUPPLY_FAN_ON 0x1f 128 #define PSUPPLY_FAN_OFF 0x00 129 130 static void set_fan_speeds(struct bbc_fan_control *fp) 131 { 132 /* Put temperatures into range so we don't mis-program 133 * the hardware. 134 */ 135 if (fp->cpu_fan_speed < FAN_SPEED_MIN) 136 fp->cpu_fan_speed = FAN_SPEED_MIN; 137 if (fp->cpu_fan_speed > FAN_SPEED_MAX) 138 fp->cpu_fan_speed = FAN_SPEED_MAX; 139 if (fp->system_fan_speed < FAN_SPEED_MIN) 140 fp->system_fan_speed = FAN_SPEED_MIN; 141 if (fp->system_fan_speed > FAN_SPEED_MAX) 142 fp->system_fan_speed = FAN_SPEED_MAX; 143 #ifdef ENVCTRL_TRACE 144 printk("fan%d: Changed fan speed to cpu(%02x) sys(%02x)\n", 145 fp->index, 146 fp->cpu_fan_speed, fp->system_fan_speed); 147 #endif 148 149 bbc_i2c_writeb(fp->client, fp->cpu_fan_speed, CPU_FAN_REG); 150 bbc_i2c_writeb(fp->client, fp->system_fan_speed, SYS_FAN_REG); 151 bbc_i2c_writeb(fp->client, 152 (fp->psupply_fan_on ? 153 PSUPPLY_FAN_ON : PSUPPLY_FAN_OFF), 154 PSUPPLY_FAN_REG); 155 } 156 157 static void get_current_temps(struct bbc_cpu_temperature *tp) 158 { 159 tp->prev_amb_temp = tp->curr_amb_temp; 160 bbc_i2c_readb(tp->client, 161 (unsigned char *) &tp->curr_amb_temp, 162 MAX1617_AMB_TEMP); 163 tp->prev_cpu_temp = tp->curr_cpu_temp; 164 bbc_i2c_readb(tp->client, 165 (unsigned char *) &tp->curr_cpu_temp, 166 MAX1617_CPU_TEMP); 167 #ifdef ENVCTRL_TRACE 168 printk("temp%d: cpu(%d C) amb(%d C)\n", 169 tp->index, 170 (int) tp->curr_cpu_temp, (int) tp->curr_amb_temp); 171 #endif 172 } 173 174 175 static void do_envctrl_shutdown(struct bbc_cpu_temperature *tp) 176 { 177 static int shutting_down = 0; 178 static char *envp[] = { "HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL }; 179 char *argv[] = { "/sbin/shutdown", "-h", "now", NULL }; 180 char *type = "???"; 181 s8 val = -1; 182 183 if (shutting_down != 0) 184 return; 185 186 if (tp->curr_amb_temp >= amb_temp_limits[tp->index].high_shutdown || 187 tp->curr_amb_temp < amb_temp_limits[tp->index].low_shutdown) { 188 type = "ambient"; 189 val = tp->curr_amb_temp; 190 } else if (tp->curr_cpu_temp >= cpu_temp_limits[tp->index].high_shutdown || 191 tp->curr_cpu_temp < cpu_temp_limits[tp->index].low_shutdown) { 192 type = "CPU"; 193 val = tp->curr_cpu_temp; 194 } 195 196 printk(KERN_CRIT "temp%d: Outside of safe %s " 197 "operating temperature, %d C.\n", 198 tp->index, type, val); 199 200 printk(KERN_CRIT "kenvctrld: Shutting down the system now.\n"); 201 202 shutting_down = 1; 203 if (execve("/sbin/shutdown", argv, envp) < 0) 204 printk(KERN_CRIT "envctrl: shutdown execution failed\n"); 205 } 206 207 #define WARN_INTERVAL (30 * HZ) 208 209 static void analyze_ambient_temp(struct bbc_cpu_temperature *tp, unsigned long *last_warn, int tick) 210 { 211 int ret = 0; 212 213 if (time_after(jiffies, (*last_warn + WARN_INTERVAL))) { 214 if (tp->curr_amb_temp >= 215 amb_temp_limits[tp->index].high_warn) { 216 printk(KERN_WARNING "temp%d: " 217 "Above safe ambient operating temperature, %d C.\n", 218 tp->index, (int) tp->curr_amb_temp); 219 ret = 1; 220 } else if (tp->curr_amb_temp < 221 amb_temp_limits[tp->index].low_warn) { 222 printk(KERN_WARNING "temp%d: " 223 "Below safe ambient operating temperature, %d C.\n", 224 tp->index, (int) tp->curr_amb_temp); 225 ret = 1; 226 } 227 if (ret) 228 *last_warn = jiffies; 229 } else if (tp->curr_amb_temp >= amb_temp_limits[tp->index].high_warn || 230 tp->curr_amb_temp < amb_temp_limits[tp->index].low_warn) 231 ret = 1; 232 233 /* Now check the shutdown limits. */ 234 if (tp->curr_amb_temp >= amb_temp_limits[tp->index].high_shutdown || 235 tp->curr_amb_temp < amb_temp_limits[tp->index].low_shutdown) { 236 do_envctrl_shutdown(tp); 237 ret = 1; 238 } 239 240 if (ret) { 241 tp->fan_todo[FAN_AMBIENT] = FAN_FULLBLAST; 242 } else if ((tick & (8 - 1)) == 0) { 243 s8 amb_goal_hi = amb_temp_limits[tp->index].high_warn - 10; 244 s8 amb_goal_lo; 245 246 amb_goal_lo = amb_goal_hi - 3; 247 248 /* We do not try to avoid 'too cold' events. Basically we 249 * only try to deal with over-heating and fan noise reduction. 250 */ 251 if (tp->avg_amb_temp < amb_goal_hi) { 252 if (tp->avg_amb_temp >= amb_goal_lo) 253 tp->fan_todo[FAN_AMBIENT] = FAN_SAME; 254 else 255 tp->fan_todo[FAN_AMBIENT] = FAN_SLOWER; 256 } else { 257 tp->fan_todo[FAN_AMBIENT] = FAN_FASTER; 258 } 259 } else { 260 tp->fan_todo[FAN_AMBIENT] = FAN_SAME; 261 } 262 } 263 264 static void analyze_cpu_temp(struct bbc_cpu_temperature *tp, unsigned long *last_warn, int tick) 265 { 266 int ret = 0; 267 268 if (time_after(jiffies, (*last_warn + WARN_INTERVAL))) { 269 if (tp->curr_cpu_temp >= 270 cpu_temp_limits[tp->index].high_warn) { 271 printk(KERN_WARNING "temp%d: " 272 "Above safe CPU operating temperature, %d C.\n", 273 tp->index, (int) tp->curr_cpu_temp); 274 ret = 1; 275 } else if (tp->curr_cpu_temp < 276 cpu_temp_limits[tp->index].low_warn) { 277 printk(KERN_WARNING "temp%d: " 278 "Below safe CPU operating temperature, %d C.\n", 279 tp->index, (int) tp->curr_cpu_temp); 280 ret = 1; 281 } 282 if (ret) 283 *last_warn = jiffies; 284 } else if (tp->curr_cpu_temp >= cpu_temp_limits[tp->index].high_warn || 285 tp->curr_cpu_temp < cpu_temp_limits[tp->index].low_warn) 286 ret = 1; 287 288 /* Now check the shutdown limits. */ 289 if (tp->curr_cpu_temp >= cpu_temp_limits[tp->index].high_shutdown || 290 tp->curr_cpu_temp < cpu_temp_limits[tp->index].low_shutdown) { 291 do_envctrl_shutdown(tp); 292 ret = 1; 293 } 294 295 if (ret) { 296 tp->fan_todo[FAN_CPU] = FAN_FULLBLAST; 297 } else if ((tick & (8 - 1)) == 0) { 298 s8 cpu_goal_hi = cpu_temp_limits[tp->index].high_warn - 10; 299 s8 cpu_goal_lo; 300 301 cpu_goal_lo = cpu_goal_hi - 3; 302 303 /* We do not try to avoid 'too cold' events. Basically we 304 * only try to deal with over-heating and fan noise reduction. 305 */ 306 if (tp->avg_cpu_temp < cpu_goal_hi) { 307 if (tp->avg_cpu_temp >= cpu_goal_lo) 308 tp->fan_todo[FAN_CPU] = FAN_SAME; 309 else 310 tp->fan_todo[FAN_CPU] = FAN_SLOWER; 311 } else { 312 tp->fan_todo[FAN_CPU] = FAN_FASTER; 313 } 314 } else { 315 tp->fan_todo[FAN_CPU] = FAN_SAME; 316 } 317 } 318 319 static void analyze_temps(struct bbc_cpu_temperature *tp, unsigned long *last_warn) 320 { 321 tp->avg_amb_temp = (s8)((int)((int)tp->avg_amb_temp + (int)tp->curr_amb_temp) / 2); 322 tp->avg_cpu_temp = (s8)((int)((int)tp->avg_cpu_temp + (int)tp->curr_cpu_temp) / 2); 323 324 analyze_ambient_temp(tp, last_warn, tp->sample_tick); 325 analyze_cpu_temp(tp, last_warn, tp->sample_tick); 326 327 tp->sample_tick++; 328 } 329 330 static enum fan_action prioritize_fan_action(int which_fan) 331 { 332 struct bbc_cpu_temperature *tp; 333 enum fan_action decision = FAN_STATE_MAX; 334 335 /* Basically, prioritize what the temperature sensors 336 * recommend we do, and perform that action on all the 337 * fans. 338 */ 339 for (tp = all_bbc_temps; tp; tp = tp->next) { 340 if (tp->fan_todo[which_fan] == FAN_FULLBLAST) { 341 decision = FAN_FULLBLAST; 342 break; 343 } 344 if (tp->fan_todo[which_fan] == FAN_SAME && 345 decision != FAN_FASTER) 346 decision = FAN_SAME; 347 else if (tp->fan_todo[which_fan] == FAN_FASTER) 348 decision = FAN_FASTER; 349 else if (decision != FAN_FASTER && 350 decision != FAN_SAME && 351 tp->fan_todo[which_fan] == FAN_SLOWER) 352 decision = FAN_SLOWER; 353 } 354 if (decision == FAN_STATE_MAX) 355 decision = FAN_SAME; 356 357 return decision; 358 } 359 360 static int maybe_new_ambient_fan_speed(struct bbc_fan_control *fp) 361 { 362 enum fan_action decision = prioritize_fan_action(FAN_AMBIENT); 363 int ret; 364 365 if (decision == FAN_SAME) 366 return 0; 367 368 ret = 1; 369 if (decision == FAN_FULLBLAST) { 370 if (fp->system_fan_speed >= FAN_SPEED_MAX) 371 ret = 0; 372 else 373 fp->system_fan_speed = FAN_SPEED_MAX; 374 } else { 375 if (decision == FAN_FASTER) { 376 if (fp->system_fan_speed >= FAN_SPEED_MAX) 377 ret = 0; 378 else 379 fp->system_fan_speed += 2; 380 } else { 381 int orig_speed = fp->system_fan_speed; 382 383 if (orig_speed <= FAN_SPEED_MIN || 384 orig_speed <= (fp->cpu_fan_speed - 3)) 385 ret = 0; 386 else 387 fp->system_fan_speed -= 1; 388 } 389 } 390 391 return ret; 392 } 393 394 static int maybe_new_cpu_fan_speed(struct bbc_fan_control *fp) 395 { 396 enum fan_action decision = prioritize_fan_action(FAN_CPU); 397 int ret; 398 399 if (decision == FAN_SAME) 400 return 0; 401 402 ret = 1; 403 if (decision == FAN_FULLBLAST) { 404 if (fp->cpu_fan_speed >= FAN_SPEED_MAX) 405 ret = 0; 406 else 407 fp->cpu_fan_speed = FAN_SPEED_MAX; 408 } else { 409 if (decision == FAN_FASTER) { 410 if (fp->cpu_fan_speed >= FAN_SPEED_MAX) 411 ret = 0; 412 else { 413 fp->cpu_fan_speed += 2; 414 if (fp->system_fan_speed < 415 (fp->cpu_fan_speed - 3)) 416 fp->system_fan_speed = 417 fp->cpu_fan_speed - 3; 418 } 419 } else { 420 if (fp->cpu_fan_speed <= FAN_SPEED_MIN) 421 ret = 0; 422 else 423 fp->cpu_fan_speed -= 1; 424 } 425 } 426 427 return ret; 428 } 429 430 static void maybe_new_fan_speeds(struct bbc_fan_control *fp) 431 { 432 int new; 433 434 new = maybe_new_ambient_fan_speed(fp); 435 new |= maybe_new_cpu_fan_speed(fp); 436 437 if (new) 438 set_fan_speeds(fp); 439 } 440 441 static void fans_full_blast(void) 442 { 443 struct bbc_fan_control *fp; 444 445 /* Since we will not be monitoring things anymore, put 446 * the fans on full blast. 447 */ 448 for (fp = all_bbc_fans; fp; fp = fp->next) { 449 fp->cpu_fan_speed = FAN_SPEED_MAX; 450 fp->system_fan_speed = FAN_SPEED_MAX; 451 fp->psupply_fan_on = 1; 452 set_fan_speeds(fp); 453 } 454 } 455 456 #define POLL_INTERVAL (5 * 1000) 457 static unsigned long last_warning_jiffies; 458 static struct task_struct *kenvctrld_task; 459 460 static int kenvctrld(void *__unused) 461 { 462 printk(KERN_INFO "bbc_envctrl: kenvctrld starting...\n"); 463 last_warning_jiffies = jiffies - WARN_INTERVAL; 464 for (;;) { 465 struct bbc_cpu_temperature *tp; 466 struct bbc_fan_control *fp; 467 468 msleep_interruptible(POLL_INTERVAL); 469 if (kthread_should_stop()) 470 break; 471 472 for (tp = all_bbc_temps; tp; tp = tp->next) { 473 get_current_temps(tp); 474 analyze_temps(tp, &last_warning_jiffies); 475 } 476 for (fp = all_bbc_fans; fp; fp = fp->next) 477 maybe_new_fan_speeds(fp); 478 } 479 printk(KERN_INFO "bbc_envctrl: kenvctrld exiting...\n"); 480 481 fans_full_blast(); 482 483 return 0; 484 } 485 486 static void attach_one_temp(struct linux_ebus_child *echild, int temp_idx) 487 { 488 struct bbc_cpu_temperature *tp = kmalloc(sizeof(*tp), GFP_KERNEL); 489 490 if (!tp) 491 return; 492 memset(tp, 0, sizeof(*tp)); 493 tp->client = bbc_i2c_attach(echild); 494 if (!tp->client) { 495 kfree(tp); 496 return; 497 } 498 499 tp->index = temp_idx; 500 { 501 struct bbc_cpu_temperature **tpp = &all_bbc_temps; 502 while (*tpp) 503 tpp = &((*tpp)->next); 504 tp->next = NULL; 505 *tpp = tp; 506 } 507 508 /* Tell it to convert once every 5 seconds, clear all cfg 509 * bits. 510 */ 511 bbc_i2c_writeb(tp->client, 0x00, MAX1617_WR_CFG_BYTE); 512 bbc_i2c_writeb(tp->client, 0x02, MAX1617_WR_CVRATE_BYTE); 513 514 /* Program the hard temperature limits into the chip. */ 515 bbc_i2c_writeb(tp->client, amb_temp_limits[tp->index].high_pwroff, 516 MAX1617_WR_AMB_HIGHLIM); 517 bbc_i2c_writeb(tp->client, amb_temp_limits[tp->index].low_pwroff, 518 MAX1617_WR_AMB_LOWLIM); 519 bbc_i2c_writeb(tp->client, cpu_temp_limits[tp->index].high_pwroff, 520 MAX1617_WR_CPU_HIGHLIM); 521 bbc_i2c_writeb(tp->client, cpu_temp_limits[tp->index].low_pwroff, 522 MAX1617_WR_CPU_LOWLIM); 523 524 get_current_temps(tp); 525 tp->prev_cpu_temp = tp->avg_cpu_temp = tp->curr_cpu_temp; 526 tp->prev_amb_temp = tp->avg_amb_temp = tp->curr_amb_temp; 527 528 tp->fan_todo[FAN_AMBIENT] = FAN_SAME; 529 tp->fan_todo[FAN_CPU] = FAN_SAME; 530 } 531 532 static void attach_one_fan(struct linux_ebus_child *echild, int fan_idx) 533 { 534 struct bbc_fan_control *fp = kmalloc(sizeof(*fp), GFP_KERNEL); 535 536 if (!fp) 537 return; 538 memset(fp, 0, sizeof(*fp)); 539 fp->client = bbc_i2c_attach(echild); 540 if (!fp->client) { 541 kfree(fp); 542 return; 543 } 544 545 fp->index = fan_idx; 546 547 { 548 struct bbc_fan_control **fpp = &all_bbc_fans; 549 while (*fpp) 550 fpp = &((*fpp)->next); 551 fp->next = NULL; 552 *fpp = fp; 553 } 554 555 /* The i2c device controlling the fans is write-only. 556 * So the only way to keep track of the current power 557 * level fed to the fans is via software. Choose half 558 * power for cpu/system and 'on' fo the powersupply fan 559 * and set it now. 560 */ 561 fp->psupply_fan_on = 1; 562 fp->cpu_fan_speed = (FAN_SPEED_MAX - FAN_SPEED_MIN) / 2; 563 fp->cpu_fan_speed += FAN_SPEED_MIN; 564 fp->system_fan_speed = (FAN_SPEED_MAX - FAN_SPEED_MIN) / 2; 565 fp->system_fan_speed += FAN_SPEED_MIN; 566 567 set_fan_speeds(fp); 568 } 569 570 int bbc_envctrl_init(void) 571 { 572 struct linux_ebus_child *echild; 573 int temp_index = 0; 574 int fan_index = 0; 575 int devidx = 0; 576 577 while ((echild = bbc_i2c_getdev(devidx++)) != NULL) { 578 if (!strcmp(echild->prom_name, "temperature")) 579 attach_one_temp(echild, temp_index++); 580 if (!strcmp(echild->prom_name, "fan-control")) 581 attach_one_fan(echild, fan_index++); 582 } 583 if (temp_index != 0 && fan_index != 0) { 584 kenvctrld_task = kthread_run(kenvctrld, NULL, "kenvctrld"); 585 if (IS_ERR(kenvctrld_task)) 586 return PTR_ERR(kenvctrld_task); 587 } 588 589 return 0; 590 } 591 592 static void destroy_one_temp(struct bbc_cpu_temperature *tp) 593 { 594 bbc_i2c_detach(tp->client); 595 kfree(tp); 596 } 597 598 static void destroy_one_fan(struct bbc_fan_control *fp) 599 { 600 bbc_i2c_detach(fp->client); 601 kfree(fp); 602 } 603 604 void bbc_envctrl_cleanup(void) 605 { 606 struct bbc_cpu_temperature *tp; 607 struct bbc_fan_control *fp; 608 609 kthread_stop(kenvctrld_task); 610 611 tp = all_bbc_temps; 612 while (tp != NULL) { 613 struct bbc_cpu_temperature *next = tp->next; 614 destroy_one_temp(tp); 615 tp = next; 616 } 617 all_bbc_temps = NULL; 618 619 fp = all_bbc_fans; 620 while (fp != NULL) { 621 struct bbc_fan_control *next = fp->next; 622 destroy_one_fan(fp); 623 fp = next; 624 } 625 all_bbc_fans = NULL; 626 } 627