xref: /openbmc/linux/drivers/sbus/char/bbc_envctrl.c (revision 479965a2)
1 // SPDX-License-Identifier: GPL-2.0
2 /* bbc_envctrl.c: UltraSPARC-III environment control driver.
3  *
4  * Copyright (C) 2001, 2008 David S. Miller (davem@davemloft.net)
5  */
6 
7 #include <linux/kthread.h>
8 #include <linux/delay.h>
9 #include <linux/kmod.h>
10 #include <linux/reboot.h>
11 #include <linux/of.h>
12 #include <linux/platform_device.h>
13 #include <linux/slab.h>
14 #include <asm/oplib.h>
15 
16 #include "bbc_i2c.h"
17 #include "max1617.h"
18 
19 #undef ENVCTRL_TRACE
20 
21 /* WARNING: Making changes to this driver is very dangerous.
22  *          If you misprogram the sensor chips they can
23  *          cut the power on you instantly.
24  */
25 
26 /* Two temperature sensors exist in the SunBLADE-1000 enclosure.
27  * Both are implemented using max1617 i2c devices.  Each max1617
28  * monitors 2 temperatures, one for one of the cpu dies and the other
29  * for the ambient temperature.
30  *
31  * The max1617 is capable of being programmed with power-off
32  * temperature values, one low limit and one high limit.  These
33  * can be controlled independently for the cpu or ambient temperature.
34  * If a limit is violated, the power is simply shut off.  The frequency
35  * with which the max1617 does temperature sampling can be controlled
36  * as well.
37  *
38  * Three fans exist inside the machine, all three are controlled with
39  * an i2c digital to analog converter.  There is a fan directed at the
40  * two processor slots, another for the rest of the enclosure, and the
41  * third is for the power supply.  The first two fans may be speed
42  * controlled by changing the voltage fed to them.  The third fan may
43  * only be completely off or on.  The third fan is meant to only be
44  * disabled/enabled when entering/exiting the lowest power-saving
45  * mode of the machine.
46  *
47  * An environmental control kernel thread periodically monitors all
48  * temperature sensors.  Based upon the samples it will adjust the
49  * fan speeds to try and keep the system within a certain temperature
50  * range (the goal being to make the fans as quiet as possible without
51  * allowing the system to get too hot).
52  *
53  * If the temperature begins to rise/fall outside of the acceptable
54  * operating range, a periodic warning will be sent to the kernel log.
55  * The fans will be put on full blast to attempt to deal with this
56  * situation.  After exceeding the acceptable operating range by a
57  * certain threshold, the kernel thread will shut down the system.
58  * Here, the thread is attempting to shut the machine down cleanly
59  * before the hardware based power-off event is triggered.
60  */
61 
62 /* These settings are in Celsius.  We use these defaults only
63  * if we cannot interrogate the cpu-fru SEEPROM.
64  */
65 struct temp_limits {
66 	s8 high_pwroff, high_shutdown, high_warn;
67 	s8 low_warn, low_shutdown, low_pwroff;
68 };
69 
70 static struct temp_limits cpu_temp_limits[2] = {
71 	{ 100, 85, 80, 5, -5, -10 },
72 	{ 100, 85, 80, 5, -5, -10 },
73 };
74 
75 static struct temp_limits amb_temp_limits[2] = {
76 	{ 65, 55, 40, 5, -5, -10 },
77 	{ 65, 55, 40, 5, -5, -10 },
78 };
79 
80 static LIST_HEAD(all_temps);
81 static LIST_HEAD(all_fans);
82 
83 #define CPU_FAN_REG	0xf0
84 #define SYS_FAN_REG	0xf2
85 #define PSUPPLY_FAN_REG	0xf4
86 
87 #define FAN_SPEED_MIN	0x0c
88 #define FAN_SPEED_MAX	0x3f
89 
90 #define PSUPPLY_FAN_ON	0x1f
91 #define PSUPPLY_FAN_OFF	0x00
92 
93 static void set_fan_speeds(struct bbc_fan_control *fp)
94 {
95 	/* Put temperatures into range so we don't mis-program
96 	 * the hardware.
97 	 */
98 	if (fp->cpu_fan_speed < FAN_SPEED_MIN)
99 		fp->cpu_fan_speed = FAN_SPEED_MIN;
100 	if (fp->cpu_fan_speed > FAN_SPEED_MAX)
101 		fp->cpu_fan_speed = FAN_SPEED_MAX;
102 	if (fp->system_fan_speed < FAN_SPEED_MIN)
103 		fp->system_fan_speed = FAN_SPEED_MIN;
104 	if (fp->system_fan_speed > FAN_SPEED_MAX)
105 		fp->system_fan_speed = FAN_SPEED_MAX;
106 #ifdef ENVCTRL_TRACE
107 	printk("fan%d: Changed fan speed to cpu(%02x) sys(%02x)\n",
108 	       fp->index,
109 	       fp->cpu_fan_speed, fp->system_fan_speed);
110 #endif
111 
112 	bbc_i2c_writeb(fp->client, fp->cpu_fan_speed, CPU_FAN_REG);
113 	bbc_i2c_writeb(fp->client, fp->system_fan_speed, SYS_FAN_REG);
114 	bbc_i2c_writeb(fp->client,
115 		       (fp->psupply_fan_on ?
116 			PSUPPLY_FAN_ON : PSUPPLY_FAN_OFF),
117 		       PSUPPLY_FAN_REG);
118 }
119 
120 static void get_current_temps(struct bbc_cpu_temperature *tp)
121 {
122 	tp->prev_amb_temp = tp->curr_amb_temp;
123 	bbc_i2c_readb(tp->client,
124 		      (unsigned char *) &tp->curr_amb_temp,
125 		      MAX1617_AMB_TEMP);
126 	tp->prev_cpu_temp = tp->curr_cpu_temp;
127 	bbc_i2c_readb(tp->client,
128 		      (unsigned char *) &tp->curr_cpu_temp,
129 		      MAX1617_CPU_TEMP);
130 #ifdef ENVCTRL_TRACE
131 	printk("temp%d: cpu(%d C) amb(%d C)\n",
132 	       tp->index,
133 	       (int) tp->curr_cpu_temp, (int) tp->curr_amb_temp);
134 #endif
135 }
136 
137 
138 static void do_envctrl_shutdown(struct bbc_cpu_temperature *tp)
139 {
140 	static int shutting_down = 0;
141 	char *type = "???";
142 	s8 val = -1;
143 
144 	if (shutting_down != 0)
145 		return;
146 
147 	if (tp->curr_amb_temp >= amb_temp_limits[tp->index].high_shutdown ||
148 	    tp->curr_amb_temp < amb_temp_limits[tp->index].low_shutdown) {
149 		type = "ambient";
150 		val = tp->curr_amb_temp;
151 	} else if (tp->curr_cpu_temp >= cpu_temp_limits[tp->index].high_shutdown ||
152 		   tp->curr_cpu_temp < cpu_temp_limits[tp->index].low_shutdown) {
153 		type = "CPU";
154 		val = tp->curr_cpu_temp;
155 	}
156 
157 	printk(KERN_CRIT "temp%d: Outside of safe %s "
158 	       "operating temperature, %d C.\n",
159 	       tp->index, type, val);
160 
161 	printk(KERN_CRIT "kenvctrld: Shutting down the system now.\n");
162 
163 	shutting_down = 1;
164 	orderly_poweroff(true);
165 }
166 
167 #define WARN_INTERVAL	(30 * HZ)
168 
169 static void analyze_ambient_temp(struct bbc_cpu_temperature *tp, unsigned long *last_warn, int tick)
170 {
171 	int ret = 0;
172 
173 	if (time_after(jiffies, (*last_warn + WARN_INTERVAL))) {
174 		if (tp->curr_amb_temp >=
175 		    amb_temp_limits[tp->index].high_warn) {
176 			printk(KERN_WARNING "temp%d: "
177 			       "Above safe ambient operating temperature, %d C.\n",
178 			       tp->index, (int) tp->curr_amb_temp);
179 			ret = 1;
180 		} else if (tp->curr_amb_temp <
181 			   amb_temp_limits[tp->index].low_warn) {
182 			printk(KERN_WARNING "temp%d: "
183 			       "Below safe ambient operating temperature, %d C.\n",
184 			       tp->index, (int) tp->curr_amb_temp);
185 			ret = 1;
186 		}
187 		if (ret)
188 			*last_warn = jiffies;
189 	} else if (tp->curr_amb_temp >= amb_temp_limits[tp->index].high_warn ||
190 		   tp->curr_amb_temp < amb_temp_limits[tp->index].low_warn)
191 		ret = 1;
192 
193 	/* Now check the shutdown limits. */
194 	if (tp->curr_amb_temp >= amb_temp_limits[tp->index].high_shutdown ||
195 	    tp->curr_amb_temp < amb_temp_limits[tp->index].low_shutdown) {
196 		do_envctrl_shutdown(tp);
197 		ret = 1;
198 	}
199 
200 	if (ret) {
201 		tp->fan_todo[FAN_AMBIENT] = FAN_FULLBLAST;
202 	} else if ((tick & (8 - 1)) == 0) {
203 		s8 amb_goal_hi = amb_temp_limits[tp->index].high_warn - 10;
204 		s8 amb_goal_lo;
205 
206 		amb_goal_lo = amb_goal_hi - 3;
207 
208 		/* We do not try to avoid 'too cold' events.  Basically we
209 		 * only try to deal with over-heating and fan noise reduction.
210 		 */
211 		if (tp->avg_amb_temp < amb_goal_hi) {
212 			if (tp->avg_amb_temp >= amb_goal_lo)
213 				tp->fan_todo[FAN_AMBIENT] = FAN_SAME;
214 			else
215 				tp->fan_todo[FAN_AMBIENT] = FAN_SLOWER;
216 		} else {
217 			tp->fan_todo[FAN_AMBIENT] = FAN_FASTER;
218 		}
219 	} else {
220 		tp->fan_todo[FAN_AMBIENT] = FAN_SAME;
221 	}
222 }
223 
224 static void analyze_cpu_temp(struct bbc_cpu_temperature *tp, unsigned long *last_warn, int tick)
225 {
226 	int ret = 0;
227 
228 	if (time_after(jiffies, (*last_warn + WARN_INTERVAL))) {
229 		if (tp->curr_cpu_temp >=
230 		    cpu_temp_limits[tp->index].high_warn) {
231 			printk(KERN_WARNING "temp%d: "
232 			       "Above safe CPU operating temperature, %d C.\n",
233 			       tp->index, (int) tp->curr_cpu_temp);
234 			ret = 1;
235 		} else if (tp->curr_cpu_temp <
236 			   cpu_temp_limits[tp->index].low_warn) {
237 			printk(KERN_WARNING "temp%d: "
238 			       "Below safe CPU operating temperature, %d C.\n",
239 			       tp->index, (int) tp->curr_cpu_temp);
240 			ret = 1;
241 		}
242 		if (ret)
243 			*last_warn = jiffies;
244 	} else if (tp->curr_cpu_temp >= cpu_temp_limits[tp->index].high_warn ||
245 		   tp->curr_cpu_temp < cpu_temp_limits[tp->index].low_warn)
246 		ret = 1;
247 
248 	/* Now check the shutdown limits. */
249 	if (tp->curr_cpu_temp >= cpu_temp_limits[tp->index].high_shutdown ||
250 	    tp->curr_cpu_temp < cpu_temp_limits[tp->index].low_shutdown) {
251 		do_envctrl_shutdown(tp);
252 		ret = 1;
253 	}
254 
255 	if (ret) {
256 		tp->fan_todo[FAN_CPU] = FAN_FULLBLAST;
257 	} else if ((tick & (8 - 1)) == 0) {
258 		s8 cpu_goal_hi = cpu_temp_limits[tp->index].high_warn - 10;
259 		s8 cpu_goal_lo;
260 
261 		cpu_goal_lo = cpu_goal_hi - 3;
262 
263 		/* We do not try to avoid 'too cold' events.  Basically we
264 		 * only try to deal with over-heating and fan noise reduction.
265 		 */
266 		if (tp->avg_cpu_temp < cpu_goal_hi) {
267 			if (tp->avg_cpu_temp >= cpu_goal_lo)
268 				tp->fan_todo[FAN_CPU] = FAN_SAME;
269 			else
270 				tp->fan_todo[FAN_CPU] = FAN_SLOWER;
271 		} else {
272 			tp->fan_todo[FAN_CPU] = FAN_FASTER;
273 		}
274 	} else {
275 		tp->fan_todo[FAN_CPU] = FAN_SAME;
276 	}
277 }
278 
279 static void analyze_temps(struct bbc_cpu_temperature *tp, unsigned long *last_warn)
280 {
281 	tp->avg_amb_temp = (s8)((int)((int)tp->avg_amb_temp + (int)tp->curr_amb_temp) / 2);
282 	tp->avg_cpu_temp = (s8)((int)((int)tp->avg_cpu_temp + (int)tp->curr_cpu_temp) / 2);
283 
284 	analyze_ambient_temp(tp, last_warn, tp->sample_tick);
285 	analyze_cpu_temp(tp, last_warn, tp->sample_tick);
286 
287 	tp->sample_tick++;
288 }
289 
290 static enum fan_action prioritize_fan_action(int which_fan)
291 {
292 	struct bbc_cpu_temperature *tp;
293 	enum fan_action decision = FAN_STATE_MAX;
294 
295 	/* Basically, prioritize what the temperature sensors
296 	 * recommend we do, and perform that action on all the
297 	 * fans.
298 	 */
299 	list_for_each_entry(tp, &all_temps, glob_list) {
300 		if (tp->fan_todo[which_fan] == FAN_FULLBLAST) {
301 			decision = FAN_FULLBLAST;
302 			break;
303 		}
304 		if (tp->fan_todo[which_fan] == FAN_SAME &&
305 		    decision != FAN_FASTER)
306 			decision = FAN_SAME;
307 		else if (tp->fan_todo[which_fan] == FAN_FASTER)
308 			decision = FAN_FASTER;
309 		else if (decision != FAN_FASTER &&
310 			 decision != FAN_SAME &&
311 			 tp->fan_todo[which_fan] == FAN_SLOWER)
312 			decision = FAN_SLOWER;
313 	}
314 	if (decision == FAN_STATE_MAX)
315 		decision = FAN_SAME;
316 
317 	return decision;
318 }
319 
320 static int maybe_new_ambient_fan_speed(struct bbc_fan_control *fp)
321 {
322 	enum fan_action decision = prioritize_fan_action(FAN_AMBIENT);
323 	int ret;
324 
325 	if (decision == FAN_SAME)
326 		return 0;
327 
328 	ret = 1;
329 	if (decision == FAN_FULLBLAST) {
330 		if (fp->system_fan_speed >= FAN_SPEED_MAX)
331 			ret = 0;
332 		else
333 			fp->system_fan_speed = FAN_SPEED_MAX;
334 	} else {
335 		if (decision == FAN_FASTER) {
336 			if (fp->system_fan_speed >= FAN_SPEED_MAX)
337 				ret = 0;
338 			else
339 				fp->system_fan_speed += 2;
340 		} else {
341 			int orig_speed = fp->system_fan_speed;
342 
343 			if (orig_speed <= FAN_SPEED_MIN ||
344 			    orig_speed <= (fp->cpu_fan_speed - 3))
345 				ret = 0;
346 			else
347 				fp->system_fan_speed -= 1;
348 		}
349 	}
350 
351 	return ret;
352 }
353 
354 static int maybe_new_cpu_fan_speed(struct bbc_fan_control *fp)
355 {
356 	enum fan_action decision = prioritize_fan_action(FAN_CPU);
357 	int ret;
358 
359 	if (decision == FAN_SAME)
360 		return 0;
361 
362 	ret = 1;
363 	if (decision == FAN_FULLBLAST) {
364 		if (fp->cpu_fan_speed >= FAN_SPEED_MAX)
365 			ret = 0;
366 		else
367 			fp->cpu_fan_speed = FAN_SPEED_MAX;
368 	} else {
369 		if (decision == FAN_FASTER) {
370 			if (fp->cpu_fan_speed >= FAN_SPEED_MAX)
371 				ret = 0;
372 			else {
373 				fp->cpu_fan_speed += 2;
374 				if (fp->system_fan_speed <
375 				    (fp->cpu_fan_speed - 3))
376 					fp->system_fan_speed =
377 						fp->cpu_fan_speed - 3;
378 			}
379 		} else {
380 			if (fp->cpu_fan_speed <= FAN_SPEED_MIN)
381 				ret = 0;
382 			else
383 				fp->cpu_fan_speed -= 1;
384 		}
385 	}
386 
387 	return ret;
388 }
389 
390 static void maybe_new_fan_speeds(struct bbc_fan_control *fp)
391 {
392 	int new;
393 
394 	new  = maybe_new_ambient_fan_speed(fp);
395 	new |= maybe_new_cpu_fan_speed(fp);
396 
397 	if (new)
398 		set_fan_speeds(fp);
399 }
400 
401 static void fans_full_blast(void)
402 {
403 	struct bbc_fan_control *fp;
404 
405 	/* Since we will not be monitoring things anymore, put
406 	 * the fans on full blast.
407 	 */
408 	list_for_each_entry(fp, &all_fans, glob_list) {
409 		fp->cpu_fan_speed = FAN_SPEED_MAX;
410 		fp->system_fan_speed = FAN_SPEED_MAX;
411 		fp->psupply_fan_on = 1;
412 		set_fan_speeds(fp);
413 	}
414 }
415 
416 #define POLL_INTERVAL	(5 * 1000)
417 static unsigned long last_warning_jiffies;
418 static struct task_struct *kenvctrld_task;
419 
420 static int kenvctrld(void *__unused)
421 {
422 	printk(KERN_INFO "bbc_envctrl: kenvctrld starting...\n");
423 	last_warning_jiffies = jiffies - WARN_INTERVAL;
424 	for (;;) {
425 		struct bbc_cpu_temperature *tp;
426 		struct bbc_fan_control *fp;
427 
428 		msleep_interruptible(POLL_INTERVAL);
429 		if (kthread_should_stop())
430 			break;
431 
432 		list_for_each_entry(tp, &all_temps, glob_list) {
433 			get_current_temps(tp);
434 			analyze_temps(tp, &last_warning_jiffies);
435 		}
436 		list_for_each_entry(fp, &all_fans, glob_list)
437 			maybe_new_fan_speeds(fp);
438 	}
439 	printk(KERN_INFO "bbc_envctrl: kenvctrld exiting...\n");
440 
441 	fans_full_blast();
442 
443 	return 0;
444 }
445 
446 static void attach_one_temp(struct bbc_i2c_bus *bp, struct platform_device *op,
447 			    int temp_idx)
448 {
449 	struct bbc_cpu_temperature *tp;
450 
451 	tp = kzalloc(sizeof(*tp), GFP_KERNEL);
452 	if (!tp)
453 		return;
454 
455 	INIT_LIST_HEAD(&tp->bp_list);
456 	INIT_LIST_HEAD(&tp->glob_list);
457 
458 	tp->client = bbc_i2c_attach(bp, op);
459 	if (!tp->client) {
460 		kfree(tp);
461 		return;
462 	}
463 
464 
465 	tp->index = temp_idx;
466 
467 	list_add(&tp->glob_list, &all_temps);
468 	list_add(&tp->bp_list, &bp->temps);
469 
470 	/* Tell it to convert once every 5 seconds, clear all cfg
471 	 * bits.
472 	 */
473 	bbc_i2c_writeb(tp->client, 0x00, MAX1617_WR_CFG_BYTE);
474 	bbc_i2c_writeb(tp->client, 0x02, MAX1617_WR_CVRATE_BYTE);
475 
476 	/* Program the hard temperature limits into the chip. */
477 	bbc_i2c_writeb(tp->client, amb_temp_limits[tp->index].high_pwroff,
478 		       MAX1617_WR_AMB_HIGHLIM);
479 	bbc_i2c_writeb(tp->client, amb_temp_limits[tp->index].low_pwroff,
480 		       MAX1617_WR_AMB_LOWLIM);
481 	bbc_i2c_writeb(tp->client, cpu_temp_limits[tp->index].high_pwroff,
482 		       MAX1617_WR_CPU_HIGHLIM);
483 	bbc_i2c_writeb(tp->client, cpu_temp_limits[tp->index].low_pwroff,
484 		       MAX1617_WR_CPU_LOWLIM);
485 
486 	get_current_temps(tp);
487 	tp->prev_cpu_temp = tp->avg_cpu_temp = tp->curr_cpu_temp;
488 	tp->prev_amb_temp = tp->avg_amb_temp = tp->curr_amb_temp;
489 
490 	tp->fan_todo[FAN_AMBIENT] = FAN_SAME;
491 	tp->fan_todo[FAN_CPU] = FAN_SAME;
492 }
493 
494 static void attach_one_fan(struct bbc_i2c_bus *bp, struct platform_device *op,
495 			   int fan_idx)
496 {
497 	struct bbc_fan_control *fp;
498 
499 	fp = kzalloc(sizeof(*fp), GFP_KERNEL);
500 	if (!fp)
501 		return;
502 
503 	INIT_LIST_HEAD(&fp->bp_list);
504 	INIT_LIST_HEAD(&fp->glob_list);
505 
506 	fp->client = bbc_i2c_attach(bp, op);
507 	if (!fp->client) {
508 		kfree(fp);
509 		return;
510 	}
511 
512 	fp->index = fan_idx;
513 
514 	list_add(&fp->glob_list, &all_fans);
515 	list_add(&fp->bp_list, &bp->fans);
516 
517 	/* The i2c device controlling the fans is write-only.
518 	 * So the only way to keep track of the current power
519 	 * level fed to the fans is via software.  Choose half
520 	 * power for cpu/system and 'on' fo the powersupply fan
521 	 * and set it now.
522 	 */
523 	fp->psupply_fan_on = 1;
524 	fp->cpu_fan_speed = (FAN_SPEED_MAX - FAN_SPEED_MIN) / 2;
525 	fp->cpu_fan_speed += FAN_SPEED_MIN;
526 	fp->system_fan_speed = (FAN_SPEED_MAX - FAN_SPEED_MIN) / 2;
527 	fp->system_fan_speed += FAN_SPEED_MIN;
528 
529 	set_fan_speeds(fp);
530 }
531 
532 static void destroy_one_temp(struct bbc_cpu_temperature *tp)
533 {
534 	bbc_i2c_detach(tp->client);
535 	kfree(tp);
536 }
537 
538 static void destroy_all_temps(struct bbc_i2c_bus *bp)
539 {
540 	struct bbc_cpu_temperature *tp, *tpos;
541 
542 	list_for_each_entry_safe(tp, tpos, &bp->temps, bp_list) {
543 		list_del(&tp->bp_list);
544 		list_del(&tp->glob_list);
545 		destroy_one_temp(tp);
546 	}
547 }
548 
549 static void destroy_one_fan(struct bbc_fan_control *fp)
550 {
551 	bbc_i2c_detach(fp->client);
552 	kfree(fp);
553 }
554 
555 static void destroy_all_fans(struct bbc_i2c_bus *bp)
556 {
557 	struct bbc_fan_control *fp, *fpos;
558 
559 	list_for_each_entry_safe(fp, fpos, &bp->fans, bp_list) {
560 		list_del(&fp->bp_list);
561 		list_del(&fp->glob_list);
562 		destroy_one_fan(fp);
563 	}
564 }
565 
566 int bbc_envctrl_init(struct bbc_i2c_bus *bp)
567 {
568 	struct platform_device *op;
569 	int temp_index = 0;
570 	int fan_index = 0;
571 	int devidx = 0;
572 
573 	while ((op = bbc_i2c_getdev(bp, devidx++)) != NULL) {
574 		if (of_node_name_eq(op->dev.of_node, "temperature"))
575 			attach_one_temp(bp, op, temp_index++);
576 		if (of_node_name_eq(op->dev.of_node, "fan-control"))
577 			attach_one_fan(bp, op, fan_index++);
578 	}
579 	if (temp_index != 0 && fan_index != 0) {
580 		kenvctrld_task = kthread_run(kenvctrld, NULL, "kenvctrld");
581 		if (IS_ERR(kenvctrld_task)) {
582 			int err = PTR_ERR(kenvctrld_task);
583 
584 			kenvctrld_task = NULL;
585 			destroy_all_temps(bp);
586 			destroy_all_fans(bp);
587 			return err;
588 		}
589 	}
590 
591 	return 0;
592 }
593 
594 void bbc_envctrl_cleanup(struct bbc_i2c_bus *bp)
595 {
596 	if (kenvctrld_task)
597 		kthread_stop(kenvctrld_task);
598 
599 	destroy_all_temps(bp);
600 	destroy_all_fans(bp);
601 }
602