xref: /openbmc/linux/drivers/sbus/char/bbc_envctrl.c (revision 8569c914)
1 /* bbc_envctrl.c: UltraSPARC-III environment control driver.
2  *
3  * Copyright (C) 2001, 2008 David S. Miller (davem@davemloft.net)
4  */
5 
6 #include <linux/kthread.h>
7 #include <linux/delay.h>
8 #include <linux/kmod.h>
9 #include <linux/reboot.h>
10 #include <linux/of.h>
11 #include <linux/of_device.h>
12 #include <asm/oplib.h>
13 
14 #include "bbc_i2c.h"
15 #include "max1617.h"
16 
17 #undef ENVCTRL_TRACE
18 
19 /* WARNING: Making changes to this driver is very dangerous.
20  *          If you misprogram the sensor chips they can
21  *          cut the power on you instantly.
22  */
23 
24 /* Two temperature sensors exist in the SunBLADE-1000 enclosure.
25  * Both are implemented using max1617 i2c devices.  Each max1617
26  * monitors 2 temperatures, one for one of the cpu dies and the other
27  * for the ambient temperature.
28  *
29  * The max1617 is capable of being programmed with power-off
30  * temperature values, one low limit and one high limit.  These
31  * can be controlled independently for the cpu or ambient temperature.
32  * If a limit is violated, the power is simply shut off.  The frequency
33  * with which the max1617 does temperature sampling can be controlled
34  * as well.
35  *
36  * Three fans exist inside the machine, all three are controlled with
37  * an i2c digital to analog converter.  There is a fan directed at the
38  * two processor slots, another for the rest of the enclosure, and the
39  * third is for the power supply.  The first two fans may be speed
40  * controlled by changing the voltage fed to them.  The third fan may
41  * only be completely off or on.  The third fan is meant to only be
42  * disabled/enabled when entering/exiting the lowest power-saving
43  * mode of the machine.
44  *
45  * An environmental control kernel thread periodically monitors all
46  * temperature sensors.  Based upon the samples it will adjust the
47  * fan speeds to try and keep the system within a certain temperature
48  * range (the goal being to make the fans as quiet as possible without
49  * allowing the system to get too hot).
50  *
51  * If the temperature begins to rise/fall outside of the acceptable
52  * operating range, a periodic warning will be sent to the kernel log.
53  * The fans will be put on full blast to attempt to deal with this
54  * situation.  After exceeding the acceptable operating range by a
55  * certain threshold, the kernel thread will shut down the system.
56  * Here, the thread is attempting to shut the machine down cleanly
57  * before the hardware based power-off event is triggered.
58  */
59 
60 /* These settings are in Celsius.  We use these defaults only
61  * if we cannot interrogate the cpu-fru SEEPROM.
62  */
63 struct temp_limits {
64 	s8 high_pwroff, high_shutdown, high_warn;
65 	s8 low_warn, low_shutdown, low_pwroff;
66 };
67 
68 static struct temp_limits cpu_temp_limits[2] = {
69 	{ 100, 85, 80, 5, -5, -10 },
70 	{ 100, 85, 80, 5, -5, -10 },
71 };
72 
73 static struct temp_limits amb_temp_limits[2] = {
74 	{ 65, 55, 40, 5, -5, -10 },
75 	{ 65, 55, 40, 5, -5, -10 },
76 };
77 
78 static LIST_HEAD(all_temps);
79 static LIST_HEAD(all_fans);
80 
81 #define CPU_FAN_REG	0xf0
82 #define SYS_FAN_REG	0xf2
83 #define PSUPPLY_FAN_REG	0xf4
84 
85 #define FAN_SPEED_MIN	0x0c
86 #define FAN_SPEED_MAX	0x3f
87 
88 #define PSUPPLY_FAN_ON	0x1f
89 #define PSUPPLY_FAN_OFF	0x00
90 
91 static void set_fan_speeds(struct bbc_fan_control *fp)
92 {
93 	/* Put temperatures into range so we don't mis-program
94 	 * the hardware.
95 	 */
96 	if (fp->cpu_fan_speed < FAN_SPEED_MIN)
97 		fp->cpu_fan_speed = FAN_SPEED_MIN;
98 	if (fp->cpu_fan_speed > FAN_SPEED_MAX)
99 		fp->cpu_fan_speed = FAN_SPEED_MAX;
100 	if (fp->system_fan_speed < FAN_SPEED_MIN)
101 		fp->system_fan_speed = FAN_SPEED_MIN;
102 	if (fp->system_fan_speed > FAN_SPEED_MAX)
103 		fp->system_fan_speed = FAN_SPEED_MAX;
104 #ifdef ENVCTRL_TRACE
105 	printk("fan%d: Changed fan speed to cpu(%02x) sys(%02x)\n",
106 	       fp->index,
107 	       fp->cpu_fan_speed, fp->system_fan_speed);
108 #endif
109 
110 	bbc_i2c_writeb(fp->client, fp->cpu_fan_speed, CPU_FAN_REG);
111 	bbc_i2c_writeb(fp->client, fp->system_fan_speed, SYS_FAN_REG);
112 	bbc_i2c_writeb(fp->client,
113 		       (fp->psupply_fan_on ?
114 			PSUPPLY_FAN_ON : PSUPPLY_FAN_OFF),
115 		       PSUPPLY_FAN_REG);
116 }
117 
118 static void get_current_temps(struct bbc_cpu_temperature *tp)
119 {
120 	tp->prev_amb_temp = tp->curr_amb_temp;
121 	bbc_i2c_readb(tp->client,
122 		      (unsigned char *) &tp->curr_amb_temp,
123 		      MAX1617_AMB_TEMP);
124 	tp->prev_cpu_temp = tp->curr_cpu_temp;
125 	bbc_i2c_readb(tp->client,
126 		      (unsigned char *) &tp->curr_cpu_temp,
127 		      MAX1617_CPU_TEMP);
128 #ifdef ENVCTRL_TRACE
129 	printk("temp%d: cpu(%d C) amb(%d C)\n",
130 	       tp->index,
131 	       (int) tp->curr_cpu_temp, (int) tp->curr_amb_temp);
132 #endif
133 }
134 
135 
136 static void do_envctrl_shutdown(struct bbc_cpu_temperature *tp)
137 {
138 	static int shutting_down = 0;
139 	char *type = "???";
140 	s8 val = -1;
141 
142 	if (shutting_down != 0)
143 		return;
144 
145 	if (tp->curr_amb_temp >= amb_temp_limits[tp->index].high_shutdown ||
146 	    tp->curr_amb_temp < amb_temp_limits[tp->index].low_shutdown) {
147 		type = "ambient";
148 		val = tp->curr_amb_temp;
149 	} else if (tp->curr_cpu_temp >= cpu_temp_limits[tp->index].high_shutdown ||
150 		   tp->curr_cpu_temp < cpu_temp_limits[tp->index].low_shutdown) {
151 		type = "CPU";
152 		val = tp->curr_cpu_temp;
153 	}
154 
155 	printk(KERN_CRIT "temp%d: Outside of safe %s "
156 	       "operating temperature, %d C.\n",
157 	       tp->index, type, val);
158 
159 	printk(KERN_CRIT "kenvctrld: Shutting down the system now.\n");
160 
161 	shutting_down = 1;
162 	if (orderly_poweroff(true) < 0)
163 		printk(KERN_CRIT "envctrl: shutdown execution failed\n");
164 }
165 
166 #define WARN_INTERVAL	(30 * HZ)
167 
168 static void analyze_ambient_temp(struct bbc_cpu_temperature *tp, unsigned long *last_warn, int tick)
169 {
170 	int ret = 0;
171 
172 	if (time_after(jiffies, (*last_warn + WARN_INTERVAL))) {
173 		if (tp->curr_amb_temp >=
174 		    amb_temp_limits[tp->index].high_warn) {
175 			printk(KERN_WARNING "temp%d: "
176 			       "Above safe ambient operating temperature, %d C.\n",
177 			       tp->index, (int) tp->curr_amb_temp);
178 			ret = 1;
179 		} else if (tp->curr_amb_temp <
180 			   amb_temp_limits[tp->index].low_warn) {
181 			printk(KERN_WARNING "temp%d: "
182 			       "Below safe ambient operating temperature, %d C.\n",
183 			       tp->index, (int) tp->curr_amb_temp);
184 			ret = 1;
185 		}
186 		if (ret)
187 			*last_warn = jiffies;
188 	} else if (tp->curr_amb_temp >= amb_temp_limits[tp->index].high_warn ||
189 		   tp->curr_amb_temp < amb_temp_limits[tp->index].low_warn)
190 		ret = 1;
191 
192 	/* Now check the shutdown limits. */
193 	if (tp->curr_amb_temp >= amb_temp_limits[tp->index].high_shutdown ||
194 	    tp->curr_amb_temp < amb_temp_limits[tp->index].low_shutdown) {
195 		do_envctrl_shutdown(tp);
196 		ret = 1;
197 	}
198 
199 	if (ret) {
200 		tp->fan_todo[FAN_AMBIENT] = FAN_FULLBLAST;
201 	} else if ((tick & (8 - 1)) == 0) {
202 		s8 amb_goal_hi = amb_temp_limits[tp->index].high_warn - 10;
203 		s8 amb_goal_lo;
204 
205 		amb_goal_lo = amb_goal_hi - 3;
206 
207 		/* We do not try to avoid 'too cold' events.  Basically we
208 		 * only try to deal with over-heating and fan noise reduction.
209 		 */
210 		if (tp->avg_amb_temp < amb_goal_hi) {
211 			if (tp->avg_amb_temp >= amb_goal_lo)
212 				tp->fan_todo[FAN_AMBIENT] = FAN_SAME;
213 			else
214 				tp->fan_todo[FAN_AMBIENT] = FAN_SLOWER;
215 		} else {
216 			tp->fan_todo[FAN_AMBIENT] = FAN_FASTER;
217 		}
218 	} else {
219 		tp->fan_todo[FAN_AMBIENT] = FAN_SAME;
220 	}
221 }
222 
223 static void analyze_cpu_temp(struct bbc_cpu_temperature *tp, unsigned long *last_warn, int tick)
224 {
225 	int ret = 0;
226 
227 	if (time_after(jiffies, (*last_warn + WARN_INTERVAL))) {
228 		if (tp->curr_cpu_temp >=
229 		    cpu_temp_limits[tp->index].high_warn) {
230 			printk(KERN_WARNING "temp%d: "
231 			       "Above safe CPU operating temperature, %d C.\n",
232 			       tp->index, (int) tp->curr_cpu_temp);
233 			ret = 1;
234 		} else if (tp->curr_cpu_temp <
235 			   cpu_temp_limits[tp->index].low_warn) {
236 			printk(KERN_WARNING "temp%d: "
237 			       "Below safe CPU operating temperature, %d C.\n",
238 			       tp->index, (int) tp->curr_cpu_temp);
239 			ret = 1;
240 		}
241 		if (ret)
242 			*last_warn = jiffies;
243 	} else if (tp->curr_cpu_temp >= cpu_temp_limits[tp->index].high_warn ||
244 		   tp->curr_cpu_temp < cpu_temp_limits[tp->index].low_warn)
245 		ret = 1;
246 
247 	/* Now check the shutdown limits. */
248 	if (tp->curr_cpu_temp >= cpu_temp_limits[tp->index].high_shutdown ||
249 	    tp->curr_cpu_temp < cpu_temp_limits[tp->index].low_shutdown) {
250 		do_envctrl_shutdown(tp);
251 		ret = 1;
252 	}
253 
254 	if (ret) {
255 		tp->fan_todo[FAN_CPU] = FAN_FULLBLAST;
256 	} else if ((tick & (8 - 1)) == 0) {
257 		s8 cpu_goal_hi = cpu_temp_limits[tp->index].high_warn - 10;
258 		s8 cpu_goal_lo;
259 
260 		cpu_goal_lo = cpu_goal_hi - 3;
261 
262 		/* We do not try to avoid 'too cold' events.  Basically we
263 		 * only try to deal with over-heating and fan noise reduction.
264 		 */
265 		if (tp->avg_cpu_temp < cpu_goal_hi) {
266 			if (tp->avg_cpu_temp >= cpu_goal_lo)
267 				tp->fan_todo[FAN_CPU] = FAN_SAME;
268 			else
269 				tp->fan_todo[FAN_CPU] = FAN_SLOWER;
270 		} else {
271 			tp->fan_todo[FAN_CPU] = FAN_FASTER;
272 		}
273 	} else {
274 		tp->fan_todo[FAN_CPU] = FAN_SAME;
275 	}
276 }
277 
278 static void analyze_temps(struct bbc_cpu_temperature *tp, unsigned long *last_warn)
279 {
280 	tp->avg_amb_temp = (s8)((int)((int)tp->avg_amb_temp + (int)tp->curr_amb_temp) / 2);
281 	tp->avg_cpu_temp = (s8)((int)((int)tp->avg_cpu_temp + (int)tp->curr_cpu_temp) / 2);
282 
283 	analyze_ambient_temp(tp, last_warn, tp->sample_tick);
284 	analyze_cpu_temp(tp, last_warn, tp->sample_tick);
285 
286 	tp->sample_tick++;
287 }
288 
289 static enum fan_action prioritize_fan_action(int which_fan)
290 {
291 	struct bbc_cpu_temperature *tp;
292 	enum fan_action decision = FAN_STATE_MAX;
293 
294 	/* Basically, prioritize what the temperature sensors
295 	 * recommend we do, and perform that action on all the
296 	 * fans.
297 	 */
298 	list_for_each_entry(tp, &all_temps, glob_list) {
299 		if (tp->fan_todo[which_fan] == FAN_FULLBLAST) {
300 			decision = FAN_FULLBLAST;
301 			break;
302 		}
303 		if (tp->fan_todo[which_fan] == FAN_SAME &&
304 		    decision != FAN_FASTER)
305 			decision = FAN_SAME;
306 		else if (tp->fan_todo[which_fan] == FAN_FASTER)
307 			decision = FAN_FASTER;
308 		else if (decision != FAN_FASTER &&
309 			 decision != FAN_SAME &&
310 			 tp->fan_todo[which_fan] == FAN_SLOWER)
311 			decision = FAN_SLOWER;
312 	}
313 	if (decision == FAN_STATE_MAX)
314 		decision = FAN_SAME;
315 
316 	return decision;
317 }
318 
319 static int maybe_new_ambient_fan_speed(struct bbc_fan_control *fp)
320 {
321 	enum fan_action decision = prioritize_fan_action(FAN_AMBIENT);
322 	int ret;
323 
324 	if (decision == FAN_SAME)
325 		return 0;
326 
327 	ret = 1;
328 	if (decision == FAN_FULLBLAST) {
329 		if (fp->system_fan_speed >= FAN_SPEED_MAX)
330 			ret = 0;
331 		else
332 			fp->system_fan_speed = FAN_SPEED_MAX;
333 	} else {
334 		if (decision == FAN_FASTER) {
335 			if (fp->system_fan_speed >= FAN_SPEED_MAX)
336 				ret = 0;
337 			else
338 				fp->system_fan_speed += 2;
339 		} else {
340 			int orig_speed = fp->system_fan_speed;
341 
342 			if (orig_speed <= FAN_SPEED_MIN ||
343 			    orig_speed <= (fp->cpu_fan_speed - 3))
344 				ret = 0;
345 			else
346 				fp->system_fan_speed -= 1;
347 		}
348 	}
349 
350 	return ret;
351 }
352 
353 static int maybe_new_cpu_fan_speed(struct bbc_fan_control *fp)
354 {
355 	enum fan_action decision = prioritize_fan_action(FAN_CPU);
356 	int ret;
357 
358 	if (decision == FAN_SAME)
359 		return 0;
360 
361 	ret = 1;
362 	if (decision == FAN_FULLBLAST) {
363 		if (fp->cpu_fan_speed >= FAN_SPEED_MAX)
364 			ret = 0;
365 		else
366 			fp->cpu_fan_speed = FAN_SPEED_MAX;
367 	} else {
368 		if (decision == FAN_FASTER) {
369 			if (fp->cpu_fan_speed >= FAN_SPEED_MAX)
370 				ret = 0;
371 			else {
372 				fp->cpu_fan_speed += 2;
373 				if (fp->system_fan_speed <
374 				    (fp->cpu_fan_speed - 3))
375 					fp->system_fan_speed =
376 						fp->cpu_fan_speed - 3;
377 			}
378 		} else {
379 			if (fp->cpu_fan_speed <= FAN_SPEED_MIN)
380 				ret = 0;
381 			else
382 				fp->cpu_fan_speed -= 1;
383 		}
384 	}
385 
386 	return ret;
387 }
388 
389 static void maybe_new_fan_speeds(struct bbc_fan_control *fp)
390 {
391 	int new;
392 
393 	new  = maybe_new_ambient_fan_speed(fp);
394 	new |= maybe_new_cpu_fan_speed(fp);
395 
396 	if (new)
397 		set_fan_speeds(fp);
398 }
399 
400 static void fans_full_blast(void)
401 {
402 	struct bbc_fan_control *fp;
403 
404 	/* Since we will not be monitoring things anymore, put
405 	 * the fans on full blast.
406 	 */
407 	list_for_each_entry(fp, &all_fans, glob_list) {
408 		fp->cpu_fan_speed = FAN_SPEED_MAX;
409 		fp->system_fan_speed = FAN_SPEED_MAX;
410 		fp->psupply_fan_on = 1;
411 		set_fan_speeds(fp);
412 	}
413 }
414 
415 #define POLL_INTERVAL	(5 * 1000)
416 static unsigned long last_warning_jiffies;
417 static struct task_struct *kenvctrld_task;
418 
419 static int kenvctrld(void *__unused)
420 {
421 	printk(KERN_INFO "bbc_envctrl: kenvctrld starting...\n");
422 	last_warning_jiffies = jiffies - WARN_INTERVAL;
423 	for (;;) {
424 		struct bbc_cpu_temperature *tp;
425 		struct bbc_fan_control *fp;
426 
427 		msleep_interruptible(POLL_INTERVAL);
428 		if (kthread_should_stop())
429 			break;
430 
431 		list_for_each_entry(tp, &all_temps, glob_list) {
432 			get_current_temps(tp);
433 			analyze_temps(tp, &last_warning_jiffies);
434 		}
435 		list_for_each_entry(fp, &all_fans, glob_list)
436 			maybe_new_fan_speeds(fp);
437 	}
438 	printk(KERN_INFO "bbc_envctrl: kenvctrld exiting...\n");
439 
440 	fans_full_blast();
441 
442 	return 0;
443 }
444 
445 static void attach_one_temp(struct bbc_i2c_bus *bp, struct of_device *op,
446 			    int temp_idx)
447 {
448 	struct bbc_cpu_temperature *tp;
449 
450 	tp = kzalloc(sizeof(*tp), GFP_KERNEL);
451 	if (!tp)
452 		return;
453 
454 	tp->client = bbc_i2c_attach(bp, op);
455 	if (!tp->client) {
456 		kfree(tp);
457 		return;
458 	}
459 
460 
461 	tp->index = temp_idx;
462 
463 	list_add(&tp->glob_list, &all_temps);
464 	list_add(&tp->bp_list, &bp->temps);
465 
466 	/* Tell it to convert once every 5 seconds, clear all cfg
467 	 * bits.
468 	 */
469 	bbc_i2c_writeb(tp->client, 0x00, MAX1617_WR_CFG_BYTE);
470 	bbc_i2c_writeb(tp->client, 0x02, MAX1617_WR_CVRATE_BYTE);
471 
472 	/* Program the hard temperature limits into the chip. */
473 	bbc_i2c_writeb(tp->client, amb_temp_limits[tp->index].high_pwroff,
474 		       MAX1617_WR_AMB_HIGHLIM);
475 	bbc_i2c_writeb(tp->client, amb_temp_limits[tp->index].low_pwroff,
476 		       MAX1617_WR_AMB_LOWLIM);
477 	bbc_i2c_writeb(tp->client, cpu_temp_limits[tp->index].high_pwroff,
478 		       MAX1617_WR_CPU_HIGHLIM);
479 	bbc_i2c_writeb(tp->client, cpu_temp_limits[tp->index].low_pwroff,
480 		       MAX1617_WR_CPU_LOWLIM);
481 
482 	get_current_temps(tp);
483 	tp->prev_cpu_temp = tp->avg_cpu_temp = tp->curr_cpu_temp;
484 	tp->prev_amb_temp = tp->avg_amb_temp = tp->curr_amb_temp;
485 
486 	tp->fan_todo[FAN_AMBIENT] = FAN_SAME;
487 	tp->fan_todo[FAN_CPU] = FAN_SAME;
488 }
489 
490 static void attach_one_fan(struct bbc_i2c_bus *bp, struct of_device *op,
491 			   int fan_idx)
492 {
493 	struct bbc_fan_control *fp;
494 
495 	fp = kzalloc(sizeof(*fp), GFP_KERNEL);
496 	if (!fp)
497 		return;
498 
499 	fp->client = bbc_i2c_attach(bp, op);
500 	if (!fp->client) {
501 		kfree(fp);
502 		return;
503 	}
504 
505 	fp->index = fan_idx;
506 
507 	list_add(&fp->glob_list, &all_fans);
508 	list_add(&fp->bp_list, &bp->fans);
509 
510 	/* The i2c device controlling the fans is write-only.
511 	 * So the only way to keep track of the current power
512 	 * level fed to the fans is via software.  Choose half
513 	 * power for cpu/system and 'on' fo the powersupply fan
514 	 * and set it now.
515 	 */
516 	fp->psupply_fan_on = 1;
517 	fp->cpu_fan_speed = (FAN_SPEED_MAX - FAN_SPEED_MIN) / 2;
518 	fp->cpu_fan_speed += FAN_SPEED_MIN;
519 	fp->system_fan_speed = (FAN_SPEED_MAX - FAN_SPEED_MIN) / 2;
520 	fp->system_fan_speed += FAN_SPEED_MIN;
521 
522 	set_fan_speeds(fp);
523 }
524 
525 int bbc_envctrl_init(struct bbc_i2c_bus *bp)
526 {
527 	struct of_device *op;
528 	int temp_index = 0;
529 	int fan_index = 0;
530 	int devidx = 0;
531 
532 	while ((op = bbc_i2c_getdev(bp, devidx++)) != NULL) {
533 		if (!strcmp(op->node->name, "temperature"))
534 			attach_one_temp(bp, op, temp_index++);
535 		if (!strcmp(op->node->name, "fan-control"))
536 			attach_one_fan(bp, op, fan_index++);
537 	}
538 	if (temp_index != 0 && fan_index != 0) {
539 		kenvctrld_task = kthread_run(kenvctrld, NULL, "kenvctrld");
540 		if (IS_ERR(kenvctrld_task))
541 			return PTR_ERR(kenvctrld_task);
542 	}
543 
544 	return 0;
545 }
546 
547 static void destroy_one_temp(struct bbc_cpu_temperature *tp)
548 {
549 	bbc_i2c_detach(tp->client);
550 	kfree(tp);
551 }
552 
553 static void destroy_one_fan(struct bbc_fan_control *fp)
554 {
555 	bbc_i2c_detach(fp->client);
556 	kfree(fp);
557 }
558 
559 void bbc_envctrl_cleanup(struct bbc_i2c_bus *bp)
560 {
561 	struct bbc_cpu_temperature *tp, *tpos;
562 	struct bbc_fan_control *fp, *fpos;
563 
564 	kthread_stop(kenvctrld_task);
565 
566 	list_for_each_entry_safe(tp, tpos, &bp->temps, bp_list) {
567 		list_del(&tp->bp_list);
568 		list_del(&tp->glob_list);
569 		destroy_one_temp(tp);
570 	}
571 
572 	list_for_each_entry_safe(fp, fpos, &bp->fans, bp_list) {
573 		list_del(&fp->bp_list);
574 		list_del(&fp->glob_list);
575 		destroy_one_fan(fp);
576 	}
577 }
578