1 /* bbc_envctrl.c: UltraSPARC-III environment control driver. 2 * 3 * Copyright (C) 2001, 2008 David S. Miller (davem@davemloft.net) 4 */ 5 6 #include <linux/kthread.h> 7 #include <linux/delay.h> 8 #include <linux/kmod.h> 9 #include <linux/reboot.h> 10 #include <linux/of.h> 11 #include <linux/of_device.h> 12 #include <asm/oplib.h> 13 14 #include "bbc_i2c.h" 15 #include "max1617.h" 16 17 #undef ENVCTRL_TRACE 18 19 /* WARNING: Making changes to this driver is very dangerous. 20 * If you misprogram the sensor chips they can 21 * cut the power on you instantly. 22 */ 23 24 /* Two temperature sensors exist in the SunBLADE-1000 enclosure. 25 * Both are implemented using max1617 i2c devices. Each max1617 26 * monitors 2 temperatures, one for one of the cpu dies and the other 27 * for the ambient temperature. 28 * 29 * The max1617 is capable of being programmed with power-off 30 * temperature values, one low limit and one high limit. These 31 * can be controlled independently for the cpu or ambient temperature. 32 * If a limit is violated, the power is simply shut off. The frequency 33 * with which the max1617 does temperature sampling can be controlled 34 * as well. 35 * 36 * Three fans exist inside the machine, all three are controlled with 37 * an i2c digital to analog converter. There is a fan directed at the 38 * two processor slots, another for the rest of the enclosure, and the 39 * third is for the power supply. The first two fans may be speed 40 * controlled by changing the voltage fed to them. The third fan may 41 * only be completely off or on. The third fan is meant to only be 42 * disabled/enabled when entering/exiting the lowest power-saving 43 * mode of the machine. 44 * 45 * An environmental control kernel thread periodically monitors all 46 * temperature sensors. Based upon the samples it will adjust the 47 * fan speeds to try and keep the system within a certain temperature 48 * range (the goal being to make the fans as quiet as possible without 49 * allowing the system to get too hot). 50 * 51 * If the temperature begins to rise/fall outside of the acceptable 52 * operating range, a periodic warning will be sent to the kernel log. 53 * The fans will be put on full blast to attempt to deal with this 54 * situation. After exceeding the acceptable operating range by a 55 * certain threshold, the kernel thread will shut down the system. 56 * Here, the thread is attempting to shut the machine down cleanly 57 * before the hardware based power-off event is triggered. 58 */ 59 60 /* These settings are in Celsius. We use these defaults only 61 * if we cannot interrogate the cpu-fru SEEPROM. 62 */ 63 struct temp_limits { 64 s8 high_pwroff, high_shutdown, high_warn; 65 s8 low_warn, low_shutdown, low_pwroff; 66 }; 67 68 static struct temp_limits cpu_temp_limits[2] = { 69 { 100, 85, 80, 5, -5, -10 }, 70 { 100, 85, 80, 5, -5, -10 }, 71 }; 72 73 static struct temp_limits amb_temp_limits[2] = { 74 { 65, 55, 40, 5, -5, -10 }, 75 { 65, 55, 40, 5, -5, -10 }, 76 }; 77 78 static LIST_HEAD(all_temps); 79 static LIST_HEAD(all_fans); 80 81 #define CPU_FAN_REG 0xf0 82 #define SYS_FAN_REG 0xf2 83 #define PSUPPLY_FAN_REG 0xf4 84 85 #define FAN_SPEED_MIN 0x0c 86 #define FAN_SPEED_MAX 0x3f 87 88 #define PSUPPLY_FAN_ON 0x1f 89 #define PSUPPLY_FAN_OFF 0x00 90 91 static void set_fan_speeds(struct bbc_fan_control *fp) 92 { 93 /* Put temperatures into range so we don't mis-program 94 * the hardware. 95 */ 96 if (fp->cpu_fan_speed < FAN_SPEED_MIN) 97 fp->cpu_fan_speed = FAN_SPEED_MIN; 98 if (fp->cpu_fan_speed > FAN_SPEED_MAX) 99 fp->cpu_fan_speed = FAN_SPEED_MAX; 100 if (fp->system_fan_speed < FAN_SPEED_MIN) 101 fp->system_fan_speed = FAN_SPEED_MIN; 102 if (fp->system_fan_speed > FAN_SPEED_MAX) 103 fp->system_fan_speed = FAN_SPEED_MAX; 104 #ifdef ENVCTRL_TRACE 105 printk("fan%d: Changed fan speed to cpu(%02x) sys(%02x)\n", 106 fp->index, 107 fp->cpu_fan_speed, fp->system_fan_speed); 108 #endif 109 110 bbc_i2c_writeb(fp->client, fp->cpu_fan_speed, CPU_FAN_REG); 111 bbc_i2c_writeb(fp->client, fp->system_fan_speed, SYS_FAN_REG); 112 bbc_i2c_writeb(fp->client, 113 (fp->psupply_fan_on ? 114 PSUPPLY_FAN_ON : PSUPPLY_FAN_OFF), 115 PSUPPLY_FAN_REG); 116 } 117 118 static void get_current_temps(struct bbc_cpu_temperature *tp) 119 { 120 tp->prev_amb_temp = tp->curr_amb_temp; 121 bbc_i2c_readb(tp->client, 122 (unsigned char *) &tp->curr_amb_temp, 123 MAX1617_AMB_TEMP); 124 tp->prev_cpu_temp = tp->curr_cpu_temp; 125 bbc_i2c_readb(tp->client, 126 (unsigned char *) &tp->curr_cpu_temp, 127 MAX1617_CPU_TEMP); 128 #ifdef ENVCTRL_TRACE 129 printk("temp%d: cpu(%d C) amb(%d C)\n", 130 tp->index, 131 (int) tp->curr_cpu_temp, (int) tp->curr_amb_temp); 132 #endif 133 } 134 135 136 static void do_envctrl_shutdown(struct bbc_cpu_temperature *tp) 137 { 138 static int shutting_down = 0; 139 char *type = "???"; 140 s8 val = -1; 141 142 if (shutting_down != 0) 143 return; 144 145 if (tp->curr_amb_temp >= amb_temp_limits[tp->index].high_shutdown || 146 tp->curr_amb_temp < amb_temp_limits[tp->index].low_shutdown) { 147 type = "ambient"; 148 val = tp->curr_amb_temp; 149 } else if (tp->curr_cpu_temp >= cpu_temp_limits[tp->index].high_shutdown || 150 tp->curr_cpu_temp < cpu_temp_limits[tp->index].low_shutdown) { 151 type = "CPU"; 152 val = tp->curr_cpu_temp; 153 } 154 155 printk(KERN_CRIT "temp%d: Outside of safe %s " 156 "operating temperature, %d C.\n", 157 tp->index, type, val); 158 159 printk(KERN_CRIT "kenvctrld: Shutting down the system now.\n"); 160 161 shutting_down = 1; 162 if (orderly_poweroff(true) < 0) 163 printk(KERN_CRIT "envctrl: shutdown execution failed\n"); 164 } 165 166 #define WARN_INTERVAL (30 * HZ) 167 168 static void analyze_ambient_temp(struct bbc_cpu_temperature *tp, unsigned long *last_warn, int tick) 169 { 170 int ret = 0; 171 172 if (time_after(jiffies, (*last_warn + WARN_INTERVAL))) { 173 if (tp->curr_amb_temp >= 174 amb_temp_limits[tp->index].high_warn) { 175 printk(KERN_WARNING "temp%d: " 176 "Above safe ambient operating temperature, %d C.\n", 177 tp->index, (int) tp->curr_amb_temp); 178 ret = 1; 179 } else if (tp->curr_amb_temp < 180 amb_temp_limits[tp->index].low_warn) { 181 printk(KERN_WARNING "temp%d: " 182 "Below safe ambient operating temperature, %d C.\n", 183 tp->index, (int) tp->curr_amb_temp); 184 ret = 1; 185 } 186 if (ret) 187 *last_warn = jiffies; 188 } else if (tp->curr_amb_temp >= amb_temp_limits[tp->index].high_warn || 189 tp->curr_amb_temp < amb_temp_limits[tp->index].low_warn) 190 ret = 1; 191 192 /* Now check the shutdown limits. */ 193 if (tp->curr_amb_temp >= amb_temp_limits[tp->index].high_shutdown || 194 tp->curr_amb_temp < amb_temp_limits[tp->index].low_shutdown) { 195 do_envctrl_shutdown(tp); 196 ret = 1; 197 } 198 199 if (ret) { 200 tp->fan_todo[FAN_AMBIENT] = FAN_FULLBLAST; 201 } else if ((tick & (8 - 1)) == 0) { 202 s8 amb_goal_hi = amb_temp_limits[tp->index].high_warn - 10; 203 s8 amb_goal_lo; 204 205 amb_goal_lo = amb_goal_hi - 3; 206 207 /* We do not try to avoid 'too cold' events. Basically we 208 * only try to deal with over-heating and fan noise reduction. 209 */ 210 if (tp->avg_amb_temp < amb_goal_hi) { 211 if (tp->avg_amb_temp >= amb_goal_lo) 212 tp->fan_todo[FAN_AMBIENT] = FAN_SAME; 213 else 214 tp->fan_todo[FAN_AMBIENT] = FAN_SLOWER; 215 } else { 216 tp->fan_todo[FAN_AMBIENT] = FAN_FASTER; 217 } 218 } else { 219 tp->fan_todo[FAN_AMBIENT] = FAN_SAME; 220 } 221 } 222 223 static void analyze_cpu_temp(struct bbc_cpu_temperature *tp, unsigned long *last_warn, int tick) 224 { 225 int ret = 0; 226 227 if (time_after(jiffies, (*last_warn + WARN_INTERVAL))) { 228 if (tp->curr_cpu_temp >= 229 cpu_temp_limits[tp->index].high_warn) { 230 printk(KERN_WARNING "temp%d: " 231 "Above safe CPU operating temperature, %d C.\n", 232 tp->index, (int) tp->curr_cpu_temp); 233 ret = 1; 234 } else if (tp->curr_cpu_temp < 235 cpu_temp_limits[tp->index].low_warn) { 236 printk(KERN_WARNING "temp%d: " 237 "Below safe CPU operating temperature, %d C.\n", 238 tp->index, (int) tp->curr_cpu_temp); 239 ret = 1; 240 } 241 if (ret) 242 *last_warn = jiffies; 243 } else if (tp->curr_cpu_temp >= cpu_temp_limits[tp->index].high_warn || 244 tp->curr_cpu_temp < cpu_temp_limits[tp->index].low_warn) 245 ret = 1; 246 247 /* Now check the shutdown limits. */ 248 if (tp->curr_cpu_temp >= cpu_temp_limits[tp->index].high_shutdown || 249 tp->curr_cpu_temp < cpu_temp_limits[tp->index].low_shutdown) { 250 do_envctrl_shutdown(tp); 251 ret = 1; 252 } 253 254 if (ret) { 255 tp->fan_todo[FAN_CPU] = FAN_FULLBLAST; 256 } else if ((tick & (8 - 1)) == 0) { 257 s8 cpu_goal_hi = cpu_temp_limits[tp->index].high_warn - 10; 258 s8 cpu_goal_lo; 259 260 cpu_goal_lo = cpu_goal_hi - 3; 261 262 /* We do not try to avoid 'too cold' events. Basically we 263 * only try to deal with over-heating and fan noise reduction. 264 */ 265 if (tp->avg_cpu_temp < cpu_goal_hi) { 266 if (tp->avg_cpu_temp >= cpu_goal_lo) 267 tp->fan_todo[FAN_CPU] = FAN_SAME; 268 else 269 tp->fan_todo[FAN_CPU] = FAN_SLOWER; 270 } else { 271 tp->fan_todo[FAN_CPU] = FAN_FASTER; 272 } 273 } else { 274 tp->fan_todo[FAN_CPU] = FAN_SAME; 275 } 276 } 277 278 static void analyze_temps(struct bbc_cpu_temperature *tp, unsigned long *last_warn) 279 { 280 tp->avg_amb_temp = (s8)((int)((int)tp->avg_amb_temp + (int)tp->curr_amb_temp) / 2); 281 tp->avg_cpu_temp = (s8)((int)((int)tp->avg_cpu_temp + (int)tp->curr_cpu_temp) / 2); 282 283 analyze_ambient_temp(tp, last_warn, tp->sample_tick); 284 analyze_cpu_temp(tp, last_warn, tp->sample_tick); 285 286 tp->sample_tick++; 287 } 288 289 static enum fan_action prioritize_fan_action(int which_fan) 290 { 291 struct bbc_cpu_temperature *tp; 292 enum fan_action decision = FAN_STATE_MAX; 293 294 /* Basically, prioritize what the temperature sensors 295 * recommend we do, and perform that action on all the 296 * fans. 297 */ 298 list_for_each_entry(tp, &all_temps, glob_list) { 299 if (tp->fan_todo[which_fan] == FAN_FULLBLAST) { 300 decision = FAN_FULLBLAST; 301 break; 302 } 303 if (tp->fan_todo[which_fan] == FAN_SAME && 304 decision != FAN_FASTER) 305 decision = FAN_SAME; 306 else if (tp->fan_todo[which_fan] == FAN_FASTER) 307 decision = FAN_FASTER; 308 else if (decision != FAN_FASTER && 309 decision != FAN_SAME && 310 tp->fan_todo[which_fan] == FAN_SLOWER) 311 decision = FAN_SLOWER; 312 } 313 if (decision == FAN_STATE_MAX) 314 decision = FAN_SAME; 315 316 return decision; 317 } 318 319 static int maybe_new_ambient_fan_speed(struct bbc_fan_control *fp) 320 { 321 enum fan_action decision = prioritize_fan_action(FAN_AMBIENT); 322 int ret; 323 324 if (decision == FAN_SAME) 325 return 0; 326 327 ret = 1; 328 if (decision == FAN_FULLBLAST) { 329 if (fp->system_fan_speed >= FAN_SPEED_MAX) 330 ret = 0; 331 else 332 fp->system_fan_speed = FAN_SPEED_MAX; 333 } else { 334 if (decision == FAN_FASTER) { 335 if (fp->system_fan_speed >= FAN_SPEED_MAX) 336 ret = 0; 337 else 338 fp->system_fan_speed += 2; 339 } else { 340 int orig_speed = fp->system_fan_speed; 341 342 if (orig_speed <= FAN_SPEED_MIN || 343 orig_speed <= (fp->cpu_fan_speed - 3)) 344 ret = 0; 345 else 346 fp->system_fan_speed -= 1; 347 } 348 } 349 350 return ret; 351 } 352 353 static int maybe_new_cpu_fan_speed(struct bbc_fan_control *fp) 354 { 355 enum fan_action decision = prioritize_fan_action(FAN_CPU); 356 int ret; 357 358 if (decision == FAN_SAME) 359 return 0; 360 361 ret = 1; 362 if (decision == FAN_FULLBLAST) { 363 if (fp->cpu_fan_speed >= FAN_SPEED_MAX) 364 ret = 0; 365 else 366 fp->cpu_fan_speed = FAN_SPEED_MAX; 367 } else { 368 if (decision == FAN_FASTER) { 369 if (fp->cpu_fan_speed >= FAN_SPEED_MAX) 370 ret = 0; 371 else { 372 fp->cpu_fan_speed += 2; 373 if (fp->system_fan_speed < 374 (fp->cpu_fan_speed - 3)) 375 fp->system_fan_speed = 376 fp->cpu_fan_speed - 3; 377 } 378 } else { 379 if (fp->cpu_fan_speed <= FAN_SPEED_MIN) 380 ret = 0; 381 else 382 fp->cpu_fan_speed -= 1; 383 } 384 } 385 386 return ret; 387 } 388 389 static void maybe_new_fan_speeds(struct bbc_fan_control *fp) 390 { 391 int new; 392 393 new = maybe_new_ambient_fan_speed(fp); 394 new |= maybe_new_cpu_fan_speed(fp); 395 396 if (new) 397 set_fan_speeds(fp); 398 } 399 400 static void fans_full_blast(void) 401 { 402 struct bbc_fan_control *fp; 403 404 /* Since we will not be monitoring things anymore, put 405 * the fans on full blast. 406 */ 407 list_for_each_entry(fp, &all_fans, glob_list) { 408 fp->cpu_fan_speed = FAN_SPEED_MAX; 409 fp->system_fan_speed = FAN_SPEED_MAX; 410 fp->psupply_fan_on = 1; 411 set_fan_speeds(fp); 412 } 413 } 414 415 #define POLL_INTERVAL (5 * 1000) 416 static unsigned long last_warning_jiffies; 417 static struct task_struct *kenvctrld_task; 418 419 static int kenvctrld(void *__unused) 420 { 421 printk(KERN_INFO "bbc_envctrl: kenvctrld starting...\n"); 422 last_warning_jiffies = jiffies - WARN_INTERVAL; 423 for (;;) { 424 struct bbc_cpu_temperature *tp; 425 struct bbc_fan_control *fp; 426 427 msleep_interruptible(POLL_INTERVAL); 428 if (kthread_should_stop()) 429 break; 430 431 list_for_each_entry(tp, &all_temps, glob_list) { 432 get_current_temps(tp); 433 analyze_temps(tp, &last_warning_jiffies); 434 } 435 list_for_each_entry(fp, &all_fans, glob_list) 436 maybe_new_fan_speeds(fp); 437 } 438 printk(KERN_INFO "bbc_envctrl: kenvctrld exiting...\n"); 439 440 fans_full_blast(); 441 442 return 0; 443 } 444 445 static void attach_one_temp(struct bbc_i2c_bus *bp, struct of_device *op, 446 int temp_idx) 447 { 448 struct bbc_cpu_temperature *tp; 449 450 tp = kzalloc(sizeof(*tp), GFP_KERNEL); 451 if (!tp) 452 return; 453 454 tp->client = bbc_i2c_attach(bp, op); 455 if (!tp->client) { 456 kfree(tp); 457 return; 458 } 459 460 461 tp->index = temp_idx; 462 463 list_add(&tp->glob_list, &all_temps); 464 list_add(&tp->bp_list, &bp->temps); 465 466 /* Tell it to convert once every 5 seconds, clear all cfg 467 * bits. 468 */ 469 bbc_i2c_writeb(tp->client, 0x00, MAX1617_WR_CFG_BYTE); 470 bbc_i2c_writeb(tp->client, 0x02, MAX1617_WR_CVRATE_BYTE); 471 472 /* Program the hard temperature limits into the chip. */ 473 bbc_i2c_writeb(tp->client, amb_temp_limits[tp->index].high_pwroff, 474 MAX1617_WR_AMB_HIGHLIM); 475 bbc_i2c_writeb(tp->client, amb_temp_limits[tp->index].low_pwroff, 476 MAX1617_WR_AMB_LOWLIM); 477 bbc_i2c_writeb(tp->client, cpu_temp_limits[tp->index].high_pwroff, 478 MAX1617_WR_CPU_HIGHLIM); 479 bbc_i2c_writeb(tp->client, cpu_temp_limits[tp->index].low_pwroff, 480 MAX1617_WR_CPU_LOWLIM); 481 482 get_current_temps(tp); 483 tp->prev_cpu_temp = tp->avg_cpu_temp = tp->curr_cpu_temp; 484 tp->prev_amb_temp = tp->avg_amb_temp = tp->curr_amb_temp; 485 486 tp->fan_todo[FAN_AMBIENT] = FAN_SAME; 487 tp->fan_todo[FAN_CPU] = FAN_SAME; 488 } 489 490 static void attach_one_fan(struct bbc_i2c_bus *bp, struct of_device *op, 491 int fan_idx) 492 { 493 struct bbc_fan_control *fp; 494 495 fp = kzalloc(sizeof(*fp), GFP_KERNEL); 496 if (!fp) 497 return; 498 499 fp->client = bbc_i2c_attach(bp, op); 500 if (!fp->client) { 501 kfree(fp); 502 return; 503 } 504 505 fp->index = fan_idx; 506 507 list_add(&fp->glob_list, &all_fans); 508 list_add(&fp->bp_list, &bp->fans); 509 510 /* The i2c device controlling the fans is write-only. 511 * So the only way to keep track of the current power 512 * level fed to the fans is via software. Choose half 513 * power for cpu/system and 'on' fo the powersupply fan 514 * and set it now. 515 */ 516 fp->psupply_fan_on = 1; 517 fp->cpu_fan_speed = (FAN_SPEED_MAX - FAN_SPEED_MIN) / 2; 518 fp->cpu_fan_speed += FAN_SPEED_MIN; 519 fp->system_fan_speed = (FAN_SPEED_MAX - FAN_SPEED_MIN) / 2; 520 fp->system_fan_speed += FAN_SPEED_MIN; 521 522 set_fan_speeds(fp); 523 } 524 525 static void destroy_one_temp(struct bbc_cpu_temperature *tp) 526 { 527 bbc_i2c_detach(tp->client); 528 kfree(tp); 529 } 530 531 static void destroy_all_temps(struct bbc_i2c_bus *bp) 532 { 533 struct bbc_cpu_temperature *tp, *tpos; 534 535 list_for_each_entry_safe(tp, tpos, &bp->temps, bp_list) { 536 list_del(&tp->bp_list); 537 list_del(&tp->glob_list); 538 destroy_one_temp(tp); 539 } 540 } 541 542 static void destroy_one_fan(struct bbc_fan_control *fp) 543 { 544 bbc_i2c_detach(fp->client); 545 kfree(fp); 546 } 547 548 static void destroy_all_fans(struct bbc_i2c_bus *bp) 549 { 550 struct bbc_fan_control *fp, *fpos; 551 552 list_for_each_entry_safe(fp, fpos, &bp->fans, bp_list) { 553 list_del(&fp->bp_list); 554 list_del(&fp->glob_list); 555 destroy_one_fan(fp); 556 } 557 } 558 559 int bbc_envctrl_init(struct bbc_i2c_bus *bp) 560 { 561 struct of_device *op; 562 int temp_index = 0; 563 int fan_index = 0; 564 int devidx = 0; 565 566 while ((op = bbc_i2c_getdev(bp, devidx++)) != NULL) { 567 if (!strcmp(op->node->name, "temperature")) 568 attach_one_temp(bp, op, temp_index++); 569 if (!strcmp(op->node->name, "fan-control")) 570 attach_one_fan(bp, op, fan_index++); 571 } 572 if (temp_index != 0 && fan_index != 0) { 573 kenvctrld_task = kthread_run(kenvctrld, NULL, "kenvctrld"); 574 if (IS_ERR(kenvctrld_task)) { 575 int err = PTR_ERR(kenvctrld_task); 576 577 kenvctrld_task = NULL; 578 destroy_all_temps(bp); 579 destroy_all_fans(bp); 580 return err; 581 } 582 } 583 584 return 0; 585 } 586 587 void bbc_envctrl_cleanup(struct bbc_i2c_bus *bp) 588 { 589 if (kenvctrld_task) 590 kthread_stop(kenvctrld_task); 591 592 destroy_all_temps(bp); 593 destroy_all_fans(bp); 594 } 595