1 // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
2 /* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved
3  * Copyright (c) 2016 Ivan Vecera <cera@cera.cz>
4  */
5 
6 #include <linux/kernel.h>
7 #include <linux/types.h>
8 #include <linux/device.h>
9 #include <linux/sysfs.h>
10 #include <linux/thermal.h>
11 #include <linux/err.h>
12 #include <linux/sfp.h>
13 
14 #include "core.h"
15 #include "core_env.h"
16 
17 #define MLXSW_THERMAL_POLL_INT	1000	/* ms */
18 #define MLXSW_THERMAL_SLOW_POLL_INT	20000	/* ms */
19 #define MLXSW_THERMAL_ASIC_TEMP_NORM	75000	/* 75C */
20 #define MLXSW_THERMAL_ASIC_TEMP_HIGH	85000	/* 85C */
21 #define MLXSW_THERMAL_ASIC_TEMP_HOT	105000	/* 105C */
22 #define MLXSW_THERMAL_HYSTERESIS_TEMP	5000	/* 5C */
23 #define MLXSW_THERMAL_MODULE_TEMP_SHIFT	(MLXSW_THERMAL_HYSTERESIS_TEMP * 2)
24 #define MLXSW_THERMAL_ZONE_MAX_NAME	16
25 #define MLXSW_THERMAL_TEMP_SCORE_MAX	GENMASK(31, 0)
26 #define MLXSW_THERMAL_MAX_STATE	10
27 #define MLXSW_THERMAL_MAX_DUTY	255
28 /* Minimum and maximum fan allowed speed in percent: from 20% to 100%. Values
29  * MLXSW_THERMAL_MAX_STATE + x, where x is between 2 and 10 are used for
30  * setting fan speed dynamic minimum. For example, if value is set to 14 (40%)
31  * cooling levels vector will be set to 4, 4, 4, 4, 4, 5, 6, 7, 8, 9, 10 to
32  * introduce PWM speed in percent: 40, 40, 40, 40, 40, 50, 60. 70, 80, 90, 100.
33  */
34 #define MLXSW_THERMAL_SPEED_MIN		(MLXSW_THERMAL_MAX_STATE + 2)
35 #define MLXSW_THERMAL_SPEED_MAX		(MLXSW_THERMAL_MAX_STATE * 2)
36 #define MLXSW_THERMAL_SPEED_MIN_LEVEL	2		/* 20% */
37 
38 /* External cooling devices, allowed for binding to mlxsw thermal zones. */
39 static char * const mlxsw_thermal_external_allowed_cdev[] = {
40 	"mlxreg_fan",
41 };
42 
43 enum mlxsw_thermal_trips {
44 	MLXSW_THERMAL_TEMP_TRIP_NORM,
45 	MLXSW_THERMAL_TEMP_TRIP_HIGH,
46 	MLXSW_THERMAL_TEMP_TRIP_HOT,
47 };
48 
49 struct mlxsw_thermal_trip {
50 	int	type;
51 	int	temp;
52 	int	hyst;
53 	int	min_state;
54 	int	max_state;
55 };
56 
57 static const struct mlxsw_thermal_trip default_thermal_trips[] = {
58 	{	/* In range - 0-40% PWM */
59 		.type		= THERMAL_TRIP_ACTIVE,
60 		.temp		= MLXSW_THERMAL_ASIC_TEMP_NORM,
61 		.hyst		= MLXSW_THERMAL_HYSTERESIS_TEMP,
62 		.min_state	= 0,
63 		.max_state	= (4 * MLXSW_THERMAL_MAX_STATE) / 10,
64 	},
65 	{
66 		/* In range - 40-100% PWM */
67 		.type		= THERMAL_TRIP_ACTIVE,
68 		.temp		= MLXSW_THERMAL_ASIC_TEMP_HIGH,
69 		.hyst		= MLXSW_THERMAL_HYSTERESIS_TEMP,
70 		.min_state	= (4 * MLXSW_THERMAL_MAX_STATE) / 10,
71 		.max_state	= MLXSW_THERMAL_MAX_STATE,
72 	},
73 	{	/* Warning */
74 		.type		= THERMAL_TRIP_HOT,
75 		.temp		= MLXSW_THERMAL_ASIC_TEMP_HOT,
76 		.min_state	= MLXSW_THERMAL_MAX_STATE,
77 		.max_state	= MLXSW_THERMAL_MAX_STATE,
78 	},
79 };
80 
81 #define MLXSW_THERMAL_NUM_TRIPS	ARRAY_SIZE(default_thermal_trips)
82 
83 /* Make sure all trips are writable */
84 #define MLXSW_THERMAL_TRIP_MASK	(BIT(MLXSW_THERMAL_NUM_TRIPS) - 1)
85 
86 struct mlxsw_thermal;
87 
88 struct mlxsw_thermal_module {
89 	struct mlxsw_thermal *parent;
90 	struct thermal_zone_device *tzdev;
91 	struct mlxsw_thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS];
92 	int module; /* Module or gearbox number */
93 };
94 
95 struct mlxsw_thermal {
96 	struct mlxsw_core *core;
97 	const struct mlxsw_bus_info *bus_info;
98 	struct thermal_zone_device *tzdev;
99 	int polling_delay;
100 	struct thermal_cooling_device *cdevs[MLXSW_MFCR_PWMS_MAX];
101 	u8 cooling_levels[MLXSW_THERMAL_MAX_STATE + 1];
102 	struct mlxsw_thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS];
103 	struct mlxsw_thermal_module *tz_module_arr;
104 	u8 tz_module_num;
105 	struct mlxsw_thermal_module *tz_gearbox_arr;
106 	u8 tz_gearbox_num;
107 	unsigned int tz_highest_score;
108 	struct thermal_zone_device *tz_highest_dev;
109 };
110 
111 static inline u8 mlxsw_state_to_duty(int state)
112 {
113 	return DIV_ROUND_CLOSEST(state * MLXSW_THERMAL_MAX_DUTY,
114 				 MLXSW_THERMAL_MAX_STATE);
115 }
116 
117 static inline int mlxsw_duty_to_state(u8 duty)
118 {
119 	return DIV_ROUND_CLOSEST(duty * MLXSW_THERMAL_MAX_STATE,
120 				 MLXSW_THERMAL_MAX_DUTY);
121 }
122 
123 static int mlxsw_get_cooling_device_idx(struct mlxsw_thermal *thermal,
124 					struct thermal_cooling_device *cdev)
125 {
126 	int i;
127 
128 	for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++)
129 		if (thermal->cdevs[i] == cdev)
130 			return i;
131 
132 	/* Allow mlxsw thermal zone binding to an external cooling device */
133 	for (i = 0; i < ARRAY_SIZE(mlxsw_thermal_external_allowed_cdev); i++) {
134 		if (strnstr(cdev->type, mlxsw_thermal_external_allowed_cdev[i],
135 			    strlen(cdev->type)))
136 			return 0;
137 	}
138 
139 	return -ENODEV;
140 }
141 
142 static void
143 mlxsw_thermal_module_trips_reset(struct mlxsw_thermal_module *tz)
144 {
145 	tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temp = 0;
146 	tz->trips[MLXSW_THERMAL_TEMP_TRIP_HIGH].temp = 0;
147 	tz->trips[MLXSW_THERMAL_TEMP_TRIP_HOT].temp = 0;
148 }
149 
150 static int
151 mlxsw_thermal_module_trips_update(struct device *dev, struct mlxsw_core *core,
152 				  struct mlxsw_thermal_module *tz,
153 				  int crit_temp, int emerg_temp)
154 {
155 	int err;
156 
157 	/* Do not try to query temperature thresholds directly from the module's
158 	 * EEPROM if we got valid thresholds from MTMP.
159 	 */
160 	if (!emerg_temp || !crit_temp) {
161 		err = mlxsw_env_module_temp_thresholds_get(core, tz->module,
162 							   SFP_TEMP_HIGH_WARN,
163 							   &crit_temp);
164 		if (err)
165 			return err;
166 
167 		err = mlxsw_env_module_temp_thresholds_get(core, tz->module,
168 							   SFP_TEMP_HIGH_ALARM,
169 							   &emerg_temp);
170 		if (err)
171 			return err;
172 	}
173 
174 	if (crit_temp > emerg_temp) {
175 		dev_warn(dev, "%s : Critical threshold %d is above emergency threshold %d\n",
176 			 tz->tzdev->type, crit_temp, emerg_temp);
177 		return 0;
178 	}
179 
180 	/* According to the system thermal requirements, the thermal zones are
181 	 * defined with three trip points. The critical and emergency
182 	 * temperature thresholds, provided by QSFP module are set as "active"
183 	 * and "hot" trip points, "normal" trip point is derived from "active"
184 	 * by subtracting double hysteresis value.
185 	 */
186 	if (crit_temp >= MLXSW_THERMAL_MODULE_TEMP_SHIFT)
187 		tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temp = crit_temp -
188 					MLXSW_THERMAL_MODULE_TEMP_SHIFT;
189 	else
190 		tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temp = crit_temp;
191 	tz->trips[MLXSW_THERMAL_TEMP_TRIP_HIGH].temp = crit_temp;
192 	tz->trips[MLXSW_THERMAL_TEMP_TRIP_HOT].temp = emerg_temp;
193 
194 	return 0;
195 }
196 
197 static void mlxsw_thermal_tz_score_update(struct mlxsw_thermal *thermal,
198 					  struct thermal_zone_device *tzdev,
199 					  struct mlxsw_thermal_trip *trips,
200 					  int temp)
201 {
202 	struct mlxsw_thermal_trip *trip = trips;
203 	unsigned int score, delta, i, shift = 1;
204 
205 	/* Calculate thermal zone score, if temperature is above the hot
206 	 * threshold score is set to MLXSW_THERMAL_TEMP_SCORE_MAX.
207 	 */
208 	score = MLXSW_THERMAL_TEMP_SCORE_MAX;
209 	for (i = MLXSW_THERMAL_TEMP_TRIP_NORM; i < MLXSW_THERMAL_NUM_TRIPS;
210 	     i++, trip++) {
211 		if (temp < trip->temp) {
212 			delta = DIV_ROUND_CLOSEST(temp, trip->temp - temp);
213 			score = delta * shift;
214 			break;
215 		}
216 		shift *= 256;
217 	}
218 
219 	if (score > thermal->tz_highest_score) {
220 		thermal->tz_highest_score = score;
221 		thermal->tz_highest_dev = tzdev;
222 	}
223 }
224 
225 static int mlxsw_thermal_bind(struct thermal_zone_device *tzdev,
226 			      struct thermal_cooling_device *cdev)
227 {
228 	struct mlxsw_thermal *thermal = tzdev->devdata;
229 	struct device *dev = thermal->bus_info->dev;
230 	int i, err;
231 
232 	/* If the cooling device is one of ours bind it */
233 	if (mlxsw_get_cooling_device_idx(thermal, cdev) < 0)
234 		return 0;
235 
236 	for (i = 0; i < MLXSW_THERMAL_NUM_TRIPS; i++) {
237 		const struct mlxsw_thermal_trip *trip = &thermal->trips[i];
238 
239 		err = thermal_zone_bind_cooling_device(tzdev, i, cdev,
240 						       trip->max_state,
241 						       trip->min_state,
242 						       THERMAL_WEIGHT_DEFAULT);
243 		if (err < 0) {
244 			dev_err(dev, "Failed to bind cooling device to trip %d\n", i);
245 			return err;
246 		}
247 	}
248 	return 0;
249 }
250 
251 static int mlxsw_thermal_unbind(struct thermal_zone_device *tzdev,
252 				struct thermal_cooling_device *cdev)
253 {
254 	struct mlxsw_thermal *thermal = tzdev->devdata;
255 	struct device *dev = thermal->bus_info->dev;
256 	int i;
257 	int err;
258 
259 	/* If the cooling device is our one unbind it */
260 	if (mlxsw_get_cooling_device_idx(thermal, cdev) < 0)
261 		return 0;
262 
263 	for (i = 0; i < MLXSW_THERMAL_NUM_TRIPS; i++) {
264 		err = thermal_zone_unbind_cooling_device(tzdev, i, cdev);
265 		if (err < 0) {
266 			dev_err(dev, "Failed to unbind cooling device\n");
267 			return err;
268 		}
269 	}
270 	return 0;
271 }
272 
273 static int mlxsw_thermal_get_temp(struct thermal_zone_device *tzdev,
274 				  int *p_temp)
275 {
276 	struct mlxsw_thermal *thermal = tzdev->devdata;
277 	struct device *dev = thermal->bus_info->dev;
278 	char mtmp_pl[MLXSW_REG_MTMP_LEN];
279 	int temp;
280 	int err;
281 
282 	mlxsw_reg_mtmp_pack(mtmp_pl, 0, false, false);
283 
284 	err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtmp), mtmp_pl);
285 	if (err) {
286 		dev_err(dev, "Failed to query temp sensor\n");
287 		return err;
288 	}
289 	mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL, NULL, NULL);
290 	if (temp > 0)
291 		mlxsw_thermal_tz_score_update(thermal, tzdev, thermal->trips,
292 					      temp);
293 
294 	*p_temp = temp;
295 	return 0;
296 }
297 
298 static int mlxsw_thermal_get_trip_type(struct thermal_zone_device *tzdev,
299 				       int trip,
300 				       enum thermal_trip_type *p_type)
301 {
302 	struct mlxsw_thermal *thermal = tzdev->devdata;
303 
304 	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
305 		return -EINVAL;
306 
307 	*p_type = thermal->trips[trip].type;
308 	return 0;
309 }
310 
311 static int mlxsw_thermal_get_trip_temp(struct thermal_zone_device *tzdev,
312 				       int trip, int *p_temp)
313 {
314 	struct mlxsw_thermal *thermal = tzdev->devdata;
315 
316 	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
317 		return -EINVAL;
318 
319 	*p_temp = thermal->trips[trip].temp;
320 	return 0;
321 }
322 
323 static int mlxsw_thermal_set_trip_temp(struct thermal_zone_device *tzdev,
324 				       int trip, int temp)
325 {
326 	struct mlxsw_thermal *thermal = tzdev->devdata;
327 
328 	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
329 		return -EINVAL;
330 
331 	thermal->trips[trip].temp = temp;
332 	return 0;
333 }
334 
335 static int mlxsw_thermal_get_trip_hyst(struct thermal_zone_device *tzdev,
336 				       int trip, int *p_hyst)
337 {
338 	struct mlxsw_thermal *thermal = tzdev->devdata;
339 
340 	*p_hyst = thermal->trips[trip].hyst;
341 	return 0;
342 }
343 
344 static int mlxsw_thermal_set_trip_hyst(struct thermal_zone_device *tzdev,
345 				       int trip, int hyst)
346 {
347 	struct mlxsw_thermal *thermal = tzdev->devdata;
348 
349 	thermal->trips[trip].hyst = hyst;
350 	return 0;
351 }
352 
353 static int mlxsw_thermal_trend_get(struct thermal_zone_device *tzdev,
354 				   int trip, enum thermal_trend *trend)
355 {
356 	struct mlxsw_thermal *thermal = tzdev->devdata;
357 
358 	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
359 		return -EINVAL;
360 
361 	if (tzdev == thermal->tz_highest_dev)
362 		return 1;
363 
364 	*trend = THERMAL_TREND_STABLE;
365 	return 0;
366 }
367 
368 static struct thermal_zone_device_ops mlxsw_thermal_ops = {
369 	.bind = mlxsw_thermal_bind,
370 	.unbind = mlxsw_thermal_unbind,
371 	.get_temp = mlxsw_thermal_get_temp,
372 	.get_trip_type	= mlxsw_thermal_get_trip_type,
373 	.get_trip_temp	= mlxsw_thermal_get_trip_temp,
374 	.set_trip_temp	= mlxsw_thermal_set_trip_temp,
375 	.get_trip_hyst	= mlxsw_thermal_get_trip_hyst,
376 	.set_trip_hyst	= mlxsw_thermal_set_trip_hyst,
377 	.get_trend	= mlxsw_thermal_trend_get,
378 };
379 
380 static int mlxsw_thermal_module_bind(struct thermal_zone_device *tzdev,
381 				     struct thermal_cooling_device *cdev)
382 {
383 	struct mlxsw_thermal_module *tz = tzdev->devdata;
384 	struct mlxsw_thermal *thermal = tz->parent;
385 	int i, j, err;
386 
387 	/* If the cooling device is one of ours bind it */
388 	if (mlxsw_get_cooling_device_idx(thermal, cdev) < 0)
389 		return 0;
390 
391 	for (i = 0; i < MLXSW_THERMAL_NUM_TRIPS; i++) {
392 		const struct mlxsw_thermal_trip *trip = &tz->trips[i];
393 
394 		err = thermal_zone_bind_cooling_device(tzdev, i, cdev,
395 						       trip->max_state,
396 						       trip->min_state,
397 						       THERMAL_WEIGHT_DEFAULT);
398 		if (err < 0)
399 			goto err_bind_cooling_device;
400 	}
401 	return 0;
402 
403 err_bind_cooling_device:
404 	for (j = i - 1; j >= 0; j--)
405 		thermal_zone_unbind_cooling_device(tzdev, j, cdev);
406 	return err;
407 }
408 
409 static int mlxsw_thermal_module_unbind(struct thermal_zone_device *tzdev,
410 				       struct thermal_cooling_device *cdev)
411 {
412 	struct mlxsw_thermal_module *tz = tzdev->devdata;
413 	struct mlxsw_thermal *thermal = tz->parent;
414 	int i;
415 	int err;
416 
417 	/* If the cooling device is one of ours unbind it */
418 	if (mlxsw_get_cooling_device_idx(thermal, cdev) < 0)
419 		return 0;
420 
421 	for (i = 0; i < MLXSW_THERMAL_NUM_TRIPS; i++) {
422 		err = thermal_zone_unbind_cooling_device(tzdev, i, cdev);
423 		WARN_ON(err);
424 	}
425 	return err;
426 }
427 
428 static void
429 mlxsw_thermal_module_temp_and_thresholds_get(struct mlxsw_core *core,
430 					     u16 sensor_index, int *p_temp,
431 					     int *p_crit_temp,
432 					     int *p_emerg_temp)
433 {
434 	char mtmp_pl[MLXSW_REG_MTMP_LEN];
435 	int err;
436 
437 	/* Read module temperature and thresholds. */
438 	mlxsw_reg_mtmp_pack(mtmp_pl, sensor_index, false, false);
439 	err = mlxsw_reg_query(core, MLXSW_REG(mtmp), mtmp_pl);
440 	if (err) {
441 		/* Set temperature and thresholds to zero to avoid passing
442 		 * uninitialized data back to the caller.
443 		 */
444 		*p_temp = 0;
445 		*p_crit_temp = 0;
446 		*p_emerg_temp = 0;
447 
448 		return;
449 	}
450 	mlxsw_reg_mtmp_unpack(mtmp_pl, p_temp, NULL, p_crit_temp, p_emerg_temp,
451 			      NULL);
452 }
453 
454 static int mlxsw_thermal_module_temp_get(struct thermal_zone_device *tzdev,
455 					 int *p_temp)
456 {
457 	struct mlxsw_thermal_module *tz = tzdev->devdata;
458 	struct mlxsw_thermal *thermal = tz->parent;
459 	int temp, crit_temp, emerg_temp;
460 	struct device *dev;
461 	u16 sensor_index;
462 	int err;
463 
464 	dev = thermal->bus_info->dev;
465 	sensor_index = MLXSW_REG_MTMP_MODULE_INDEX_MIN + tz->module;
466 
467 	/* Read module temperature and thresholds. */
468 	mlxsw_thermal_module_temp_and_thresholds_get(thermal->core,
469 						     sensor_index, &temp,
470 						     &crit_temp, &emerg_temp);
471 	*p_temp = temp;
472 
473 	if (!temp)
474 		return 0;
475 
476 	/* Update trip points. */
477 	err = mlxsw_thermal_module_trips_update(dev, thermal->core, tz,
478 						crit_temp, emerg_temp);
479 	if (!err && temp > 0)
480 		mlxsw_thermal_tz_score_update(thermal, tzdev, tz->trips, temp);
481 
482 	return 0;
483 }
484 
485 static int
486 mlxsw_thermal_module_trip_type_get(struct thermal_zone_device *tzdev, int trip,
487 				   enum thermal_trip_type *p_type)
488 {
489 	struct mlxsw_thermal_module *tz = tzdev->devdata;
490 
491 	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
492 		return -EINVAL;
493 
494 	*p_type = tz->trips[trip].type;
495 	return 0;
496 }
497 
498 static int
499 mlxsw_thermal_module_trip_temp_get(struct thermal_zone_device *tzdev,
500 				   int trip, int *p_temp)
501 {
502 	struct mlxsw_thermal_module *tz = tzdev->devdata;
503 
504 	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
505 		return -EINVAL;
506 
507 	*p_temp = tz->trips[trip].temp;
508 	return 0;
509 }
510 
511 static int
512 mlxsw_thermal_module_trip_temp_set(struct thermal_zone_device *tzdev,
513 				   int trip, int temp)
514 {
515 	struct mlxsw_thermal_module *tz = tzdev->devdata;
516 
517 	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
518 		return -EINVAL;
519 
520 	tz->trips[trip].temp = temp;
521 	return 0;
522 }
523 
524 static int
525 mlxsw_thermal_module_trip_hyst_get(struct thermal_zone_device *tzdev, int trip,
526 				   int *p_hyst)
527 {
528 	struct mlxsw_thermal_module *tz = tzdev->devdata;
529 
530 	*p_hyst = tz->trips[trip].hyst;
531 	return 0;
532 }
533 
534 static int
535 mlxsw_thermal_module_trip_hyst_set(struct thermal_zone_device *tzdev, int trip,
536 				   int hyst)
537 {
538 	struct mlxsw_thermal_module *tz = tzdev->devdata;
539 
540 	tz->trips[trip].hyst = hyst;
541 	return 0;
542 }
543 
544 static int mlxsw_thermal_module_trend_get(struct thermal_zone_device *tzdev,
545 					  int trip, enum thermal_trend *trend)
546 {
547 	struct mlxsw_thermal_module *tz = tzdev->devdata;
548 	struct mlxsw_thermal *thermal = tz->parent;
549 
550 	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
551 		return -EINVAL;
552 
553 	if (tzdev == thermal->tz_highest_dev)
554 		return 1;
555 
556 	*trend = THERMAL_TREND_STABLE;
557 	return 0;
558 }
559 
560 static struct thermal_zone_device_ops mlxsw_thermal_module_ops = {
561 	.bind		= mlxsw_thermal_module_bind,
562 	.unbind		= mlxsw_thermal_module_unbind,
563 	.get_temp	= mlxsw_thermal_module_temp_get,
564 	.get_trip_type	= mlxsw_thermal_module_trip_type_get,
565 	.get_trip_temp	= mlxsw_thermal_module_trip_temp_get,
566 	.set_trip_temp	= mlxsw_thermal_module_trip_temp_set,
567 	.get_trip_hyst	= mlxsw_thermal_module_trip_hyst_get,
568 	.set_trip_hyst	= mlxsw_thermal_module_trip_hyst_set,
569 	.get_trend	= mlxsw_thermal_module_trend_get,
570 };
571 
572 static int mlxsw_thermal_gearbox_temp_get(struct thermal_zone_device *tzdev,
573 					  int *p_temp)
574 {
575 	struct mlxsw_thermal_module *tz = tzdev->devdata;
576 	struct mlxsw_thermal *thermal = tz->parent;
577 	char mtmp_pl[MLXSW_REG_MTMP_LEN];
578 	u16 index;
579 	int temp;
580 	int err;
581 
582 	index = MLXSW_REG_MTMP_GBOX_INDEX_MIN + tz->module;
583 	mlxsw_reg_mtmp_pack(mtmp_pl, index, false, false);
584 
585 	err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtmp), mtmp_pl);
586 	if (err)
587 		return err;
588 
589 	mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL, NULL, NULL);
590 	if (temp > 0)
591 		mlxsw_thermal_tz_score_update(thermal, tzdev, tz->trips, temp);
592 
593 	*p_temp = temp;
594 	return 0;
595 }
596 
597 static struct thermal_zone_device_ops mlxsw_thermal_gearbox_ops = {
598 	.bind		= mlxsw_thermal_module_bind,
599 	.unbind		= mlxsw_thermal_module_unbind,
600 	.get_temp	= mlxsw_thermal_gearbox_temp_get,
601 	.get_trip_type	= mlxsw_thermal_module_trip_type_get,
602 	.get_trip_temp	= mlxsw_thermal_module_trip_temp_get,
603 	.set_trip_temp	= mlxsw_thermal_module_trip_temp_set,
604 	.get_trip_hyst	= mlxsw_thermal_module_trip_hyst_get,
605 	.set_trip_hyst	= mlxsw_thermal_module_trip_hyst_set,
606 	.get_trend	= mlxsw_thermal_module_trend_get,
607 };
608 
609 static int mlxsw_thermal_get_max_state(struct thermal_cooling_device *cdev,
610 				       unsigned long *p_state)
611 {
612 	*p_state = MLXSW_THERMAL_MAX_STATE;
613 	return 0;
614 }
615 
616 static int mlxsw_thermal_get_cur_state(struct thermal_cooling_device *cdev,
617 				       unsigned long *p_state)
618 
619 {
620 	struct mlxsw_thermal *thermal = cdev->devdata;
621 	struct device *dev = thermal->bus_info->dev;
622 	char mfsc_pl[MLXSW_REG_MFSC_LEN];
623 	int err, idx;
624 	u8 duty;
625 
626 	idx = mlxsw_get_cooling_device_idx(thermal, cdev);
627 	if (idx < 0)
628 		return idx;
629 
630 	mlxsw_reg_mfsc_pack(mfsc_pl, idx, 0);
631 	err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfsc), mfsc_pl);
632 	if (err) {
633 		dev_err(dev, "Failed to query PWM duty\n");
634 		return err;
635 	}
636 
637 	duty = mlxsw_reg_mfsc_pwm_duty_cycle_get(mfsc_pl);
638 	*p_state = mlxsw_duty_to_state(duty);
639 	return 0;
640 }
641 
642 static int mlxsw_thermal_set_cur_state(struct thermal_cooling_device *cdev,
643 				       unsigned long state)
644 
645 {
646 	struct mlxsw_thermal *thermal = cdev->devdata;
647 	struct device *dev = thermal->bus_info->dev;
648 	char mfsc_pl[MLXSW_REG_MFSC_LEN];
649 	unsigned long cur_state, i;
650 	int idx;
651 	u8 duty;
652 	int err;
653 
654 	idx = mlxsw_get_cooling_device_idx(thermal, cdev);
655 	if (idx < 0)
656 		return idx;
657 
658 	/* Verify if this request is for changing allowed fan dynamical
659 	 * minimum. If it is - update cooling levels accordingly and update
660 	 * state, if current state is below the newly requested minimum state.
661 	 * For example, if current state is 5, and minimal state is to be
662 	 * changed from 4 to 6, thermal->cooling_levels[0 to 5] will be changed
663 	 * all from 4 to 6. And state 5 (thermal->cooling_levels[4]) should be
664 	 * overwritten.
665 	 */
666 	if (state >= MLXSW_THERMAL_SPEED_MIN &&
667 	    state <= MLXSW_THERMAL_SPEED_MAX) {
668 		state -= MLXSW_THERMAL_MAX_STATE;
669 		for (i = 0; i <= MLXSW_THERMAL_MAX_STATE; i++)
670 			thermal->cooling_levels[i] = max(state, i);
671 
672 		mlxsw_reg_mfsc_pack(mfsc_pl, idx, 0);
673 		err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfsc), mfsc_pl);
674 		if (err)
675 			return err;
676 
677 		duty = mlxsw_reg_mfsc_pwm_duty_cycle_get(mfsc_pl);
678 		cur_state = mlxsw_duty_to_state(duty);
679 
680 		/* If current fan state is lower than requested dynamical
681 		 * minimum, increase fan speed up to dynamical minimum.
682 		 */
683 		if (state < cur_state)
684 			return 0;
685 
686 		state = cur_state;
687 	}
688 
689 	if (state > MLXSW_THERMAL_MAX_STATE)
690 		return -EINVAL;
691 
692 	/* Normalize the state to the valid speed range. */
693 	state = thermal->cooling_levels[state];
694 	mlxsw_reg_mfsc_pack(mfsc_pl, idx, mlxsw_state_to_duty(state));
695 	err = mlxsw_reg_write(thermal->core, MLXSW_REG(mfsc), mfsc_pl);
696 	if (err) {
697 		dev_err(dev, "Failed to write PWM duty\n");
698 		return err;
699 	}
700 	return 0;
701 }
702 
703 static const struct thermal_cooling_device_ops mlxsw_cooling_ops = {
704 	.get_max_state	= mlxsw_thermal_get_max_state,
705 	.get_cur_state	= mlxsw_thermal_get_cur_state,
706 	.set_cur_state	= mlxsw_thermal_set_cur_state,
707 };
708 
709 static int
710 mlxsw_thermal_module_tz_init(struct mlxsw_thermal_module *module_tz)
711 {
712 	char tz_name[MLXSW_THERMAL_ZONE_MAX_NAME];
713 	int err;
714 
715 	snprintf(tz_name, sizeof(tz_name), "mlxsw-module%d",
716 		 module_tz->module + 1);
717 	module_tz->tzdev = thermal_zone_device_register(tz_name,
718 							MLXSW_THERMAL_NUM_TRIPS,
719 							MLXSW_THERMAL_TRIP_MASK,
720 							module_tz,
721 							&mlxsw_thermal_module_ops,
722 							NULL, 0,
723 							module_tz->parent->polling_delay);
724 	if (IS_ERR(module_tz->tzdev)) {
725 		err = PTR_ERR(module_tz->tzdev);
726 		return err;
727 	}
728 
729 	err = thermal_zone_device_enable(module_tz->tzdev);
730 	if (err)
731 		thermal_zone_device_unregister(module_tz->tzdev);
732 
733 	return err;
734 }
735 
736 static void mlxsw_thermal_module_tz_fini(struct thermal_zone_device *tzdev)
737 {
738 	thermal_zone_device_unregister(tzdev);
739 }
740 
741 static int
742 mlxsw_thermal_module_init(struct device *dev, struct mlxsw_core *core,
743 			  struct mlxsw_thermal *thermal, u8 module)
744 {
745 	struct mlxsw_thermal_module *module_tz;
746 	int dummy_temp, crit_temp, emerg_temp;
747 	u16 sensor_index;
748 
749 	sensor_index = MLXSW_REG_MTMP_MODULE_INDEX_MIN + module;
750 	module_tz = &thermal->tz_module_arr[module];
751 	/* Skip if parent is already set (case of port split). */
752 	if (module_tz->parent)
753 		return 0;
754 	module_tz->module = module;
755 	module_tz->parent = thermal;
756 	memcpy(module_tz->trips, default_thermal_trips,
757 	       sizeof(thermal->trips));
758 	/* Initialize all trip point. */
759 	mlxsw_thermal_module_trips_reset(module_tz);
760 	/* Read module temperature and thresholds. */
761 	mlxsw_thermal_module_temp_and_thresholds_get(core, sensor_index, &dummy_temp,
762 						     &crit_temp, &emerg_temp);
763 	/* Update trip point according to the module data. */
764 	return mlxsw_thermal_module_trips_update(dev, core, module_tz,
765 						 crit_temp, emerg_temp);
766 }
767 
768 static void mlxsw_thermal_module_fini(struct mlxsw_thermal_module *module_tz)
769 {
770 	if (module_tz && module_tz->tzdev) {
771 		mlxsw_thermal_module_tz_fini(module_tz->tzdev);
772 		module_tz->tzdev = NULL;
773 		module_tz->parent = NULL;
774 	}
775 }
776 
777 static int
778 mlxsw_thermal_modules_init(struct device *dev, struct mlxsw_core *core,
779 			   struct mlxsw_thermal *thermal)
780 {
781 	struct mlxsw_thermal_module *module_tz;
782 	char mgpir_pl[MLXSW_REG_MGPIR_LEN];
783 	int i, err;
784 
785 	if (!mlxsw_core_res_query_enabled(core))
786 		return 0;
787 
788 	mlxsw_reg_mgpir_pack(mgpir_pl);
789 	err = mlxsw_reg_query(core, MLXSW_REG(mgpir), mgpir_pl);
790 	if (err)
791 		return err;
792 
793 	mlxsw_reg_mgpir_unpack(mgpir_pl, NULL, NULL, NULL,
794 			       &thermal->tz_module_num);
795 
796 	thermal->tz_module_arr = kcalloc(thermal->tz_module_num,
797 					 sizeof(*thermal->tz_module_arr),
798 					 GFP_KERNEL);
799 	if (!thermal->tz_module_arr)
800 		return -ENOMEM;
801 
802 	for (i = 0; i < thermal->tz_module_num; i++) {
803 		err = mlxsw_thermal_module_init(dev, core, thermal, i);
804 		if (err)
805 			goto err_unreg_tz_module_arr;
806 	}
807 
808 	for (i = 0; i < thermal->tz_module_num; i++) {
809 		module_tz = &thermal->tz_module_arr[i];
810 		if (!module_tz->parent)
811 			continue;
812 		err = mlxsw_thermal_module_tz_init(module_tz);
813 		if (err)
814 			goto err_unreg_tz_module_arr;
815 	}
816 
817 	return 0;
818 
819 err_unreg_tz_module_arr:
820 	for (i = thermal->tz_module_num - 1; i >= 0; i--)
821 		mlxsw_thermal_module_fini(&thermal->tz_module_arr[i]);
822 	kfree(thermal->tz_module_arr);
823 	return err;
824 }
825 
826 static void
827 mlxsw_thermal_modules_fini(struct mlxsw_thermal *thermal)
828 {
829 	int i;
830 
831 	if (!mlxsw_core_res_query_enabled(thermal->core))
832 		return;
833 
834 	for (i = thermal->tz_module_num - 1; i >= 0; i--)
835 		mlxsw_thermal_module_fini(&thermal->tz_module_arr[i]);
836 	kfree(thermal->tz_module_arr);
837 }
838 
839 static int
840 mlxsw_thermal_gearbox_tz_init(struct mlxsw_thermal_module *gearbox_tz)
841 {
842 	char tz_name[MLXSW_THERMAL_ZONE_MAX_NAME];
843 	int ret;
844 
845 	snprintf(tz_name, sizeof(tz_name), "mlxsw-gearbox%d",
846 		 gearbox_tz->module + 1);
847 	gearbox_tz->tzdev = thermal_zone_device_register(tz_name,
848 						MLXSW_THERMAL_NUM_TRIPS,
849 						MLXSW_THERMAL_TRIP_MASK,
850 						gearbox_tz,
851 						&mlxsw_thermal_gearbox_ops,
852 						NULL, 0,
853 						gearbox_tz->parent->polling_delay);
854 	if (IS_ERR(gearbox_tz->tzdev))
855 		return PTR_ERR(gearbox_tz->tzdev);
856 
857 	ret = thermal_zone_device_enable(gearbox_tz->tzdev);
858 	if (ret)
859 		thermal_zone_device_unregister(gearbox_tz->tzdev);
860 
861 	return ret;
862 }
863 
864 static void
865 mlxsw_thermal_gearbox_tz_fini(struct mlxsw_thermal_module *gearbox_tz)
866 {
867 	thermal_zone_device_unregister(gearbox_tz->tzdev);
868 }
869 
870 static int
871 mlxsw_thermal_gearboxes_init(struct device *dev, struct mlxsw_core *core,
872 			     struct mlxsw_thermal *thermal)
873 {
874 	enum mlxsw_reg_mgpir_device_type device_type;
875 	struct mlxsw_thermal_module *gearbox_tz;
876 	char mgpir_pl[MLXSW_REG_MGPIR_LEN];
877 	u8 gbox_num;
878 	int i;
879 	int err;
880 
881 	if (!mlxsw_core_res_query_enabled(core))
882 		return 0;
883 
884 	mlxsw_reg_mgpir_pack(mgpir_pl);
885 	err = mlxsw_reg_query(core, MLXSW_REG(mgpir), mgpir_pl);
886 	if (err)
887 		return err;
888 
889 	mlxsw_reg_mgpir_unpack(mgpir_pl, &gbox_num, &device_type, NULL,
890 			       NULL);
891 	if (device_type != MLXSW_REG_MGPIR_DEVICE_TYPE_GEARBOX_DIE ||
892 	    !gbox_num)
893 		return 0;
894 
895 	thermal->tz_gearbox_num = gbox_num;
896 	thermal->tz_gearbox_arr = kcalloc(thermal->tz_gearbox_num,
897 					  sizeof(*thermal->tz_gearbox_arr),
898 					  GFP_KERNEL);
899 	if (!thermal->tz_gearbox_arr)
900 		return -ENOMEM;
901 
902 	for (i = 0; i < thermal->tz_gearbox_num; i++) {
903 		gearbox_tz = &thermal->tz_gearbox_arr[i];
904 		memcpy(gearbox_tz->trips, default_thermal_trips,
905 		       sizeof(thermal->trips));
906 		gearbox_tz->module = i;
907 		gearbox_tz->parent = thermal;
908 		err = mlxsw_thermal_gearbox_tz_init(gearbox_tz);
909 		if (err)
910 			goto err_unreg_tz_gearbox;
911 	}
912 
913 	return 0;
914 
915 err_unreg_tz_gearbox:
916 	for (i--; i >= 0; i--)
917 		mlxsw_thermal_gearbox_tz_fini(&thermal->tz_gearbox_arr[i]);
918 	kfree(thermal->tz_gearbox_arr);
919 	return err;
920 }
921 
922 static void
923 mlxsw_thermal_gearboxes_fini(struct mlxsw_thermal *thermal)
924 {
925 	int i;
926 
927 	if (!mlxsw_core_res_query_enabled(thermal->core))
928 		return;
929 
930 	for (i = thermal->tz_gearbox_num - 1; i >= 0; i--)
931 		mlxsw_thermal_gearbox_tz_fini(&thermal->tz_gearbox_arr[i]);
932 	kfree(thermal->tz_gearbox_arr);
933 }
934 
935 int mlxsw_thermal_init(struct mlxsw_core *core,
936 		       const struct mlxsw_bus_info *bus_info,
937 		       struct mlxsw_thermal **p_thermal)
938 {
939 	char mfcr_pl[MLXSW_REG_MFCR_LEN] = { 0 };
940 	enum mlxsw_reg_mfcr_pwm_frequency freq;
941 	struct device *dev = bus_info->dev;
942 	struct mlxsw_thermal *thermal;
943 	u16 tacho_active;
944 	u8 pwm_active;
945 	int err, i;
946 
947 	thermal = devm_kzalloc(dev, sizeof(*thermal),
948 			       GFP_KERNEL);
949 	if (!thermal)
950 		return -ENOMEM;
951 
952 	thermal->core = core;
953 	thermal->bus_info = bus_info;
954 	memcpy(thermal->trips, default_thermal_trips, sizeof(thermal->trips));
955 
956 	err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfcr), mfcr_pl);
957 	if (err) {
958 		dev_err(dev, "Failed to probe PWMs\n");
959 		goto err_free_thermal;
960 	}
961 	mlxsw_reg_mfcr_unpack(mfcr_pl, &freq, &tacho_active, &pwm_active);
962 
963 	for (i = 0; i < MLXSW_MFCR_TACHOS_MAX; i++) {
964 		if (tacho_active & BIT(i)) {
965 			char mfsl_pl[MLXSW_REG_MFSL_LEN];
966 
967 			mlxsw_reg_mfsl_pack(mfsl_pl, i, 0, 0);
968 
969 			/* We need to query the register to preserve maximum */
970 			err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfsl),
971 					      mfsl_pl);
972 			if (err)
973 				goto err_free_thermal;
974 
975 			/* set the minimal RPMs to 0 */
976 			mlxsw_reg_mfsl_tach_min_set(mfsl_pl, 0);
977 			err = mlxsw_reg_write(thermal->core, MLXSW_REG(mfsl),
978 					      mfsl_pl);
979 			if (err)
980 				goto err_free_thermal;
981 		}
982 	}
983 	for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++) {
984 		if (pwm_active & BIT(i)) {
985 			struct thermal_cooling_device *cdev;
986 
987 			cdev = thermal_cooling_device_register("mlxsw_fan",
988 							       thermal,
989 							       &mlxsw_cooling_ops);
990 			if (IS_ERR(cdev)) {
991 				err = PTR_ERR(cdev);
992 				dev_err(dev, "Failed to register cooling device\n");
993 				goto err_unreg_cdevs;
994 			}
995 			thermal->cdevs[i] = cdev;
996 		}
997 	}
998 
999 	/* Initialize cooling levels per PWM state. */
1000 	for (i = 0; i < MLXSW_THERMAL_MAX_STATE; i++)
1001 		thermal->cooling_levels[i] = max(MLXSW_THERMAL_SPEED_MIN_LEVEL,
1002 						 i);
1003 
1004 	thermal->polling_delay = bus_info->low_frequency ?
1005 				 MLXSW_THERMAL_SLOW_POLL_INT :
1006 				 MLXSW_THERMAL_POLL_INT;
1007 
1008 	thermal->tzdev = thermal_zone_device_register("mlxsw",
1009 						      MLXSW_THERMAL_NUM_TRIPS,
1010 						      MLXSW_THERMAL_TRIP_MASK,
1011 						      thermal,
1012 						      &mlxsw_thermal_ops,
1013 						      NULL, 0,
1014 						      thermal->polling_delay);
1015 	if (IS_ERR(thermal->tzdev)) {
1016 		err = PTR_ERR(thermal->tzdev);
1017 		dev_err(dev, "Failed to register thermal zone\n");
1018 		goto err_unreg_cdevs;
1019 	}
1020 
1021 	err = mlxsw_thermal_modules_init(dev, core, thermal);
1022 	if (err)
1023 		goto err_unreg_tzdev;
1024 
1025 	err = mlxsw_thermal_gearboxes_init(dev, core, thermal);
1026 	if (err)
1027 		goto err_unreg_modules_tzdev;
1028 
1029 	err = thermal_zone_device_enable(thermal->tzdev);
1030 	if (err)
1031 		goto err_unreg_gearboxes;
1032 
1033 	*p_thermal = thermal;
1034 	return 0;
1035 
1036 err_unreg_gearboxes:
1037 	mlxsw_thermal_gearboxes_fini(thermal);
1038 err_unreg_modules_tzdev:
1039 	mlxsw_thermal_modules_fini(thermal);
1040 err_unreg_tzdev:
1041 	if (thermal->tzdev) {
1042 		thermal_zone_device_unregister(thermal->tzdev);
1043 		thermal->tzdev = NULL;
1044 	}
1045 err_unreg_cdevs:
1046 	for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++)
1047 		if (thermal->cdevs[i])
1048 			thermal_cooling_device_unregister(thermal->cdevs[i]);
1049 err_free_thermal:
1050 	devm_kfree(dev, thermal);
1051 	return err;
1052 }
1053 
1054 void mlxsw_thermal_fini(struct mlxsw_thermal *thermal)
1055 {
1056 	int i;
1057 
1058 	mlxsw_thermal_gearboxes_fini(thermal);
1059 	mlxsw_thermal_modules_fini(thermal);
1060 	if (thermal->tzdev) {
1061 		thermal_zone_device_unregister(thermal->tzdev);
1062 		thermal->tzdev = NULL;
1063 	}
1064 
1065 	for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++) {
1066 		if (thermal->cdevs[i]) {
1067 			thermal_cooling_device_unregister(thermal->cdevs[i]);
1068 			thermal->cdevs[i] = NULL;
1069 		}
1070 	}
1071 
1072 	devm_kfree(thermal->bus_info->dev, thermal);
1073 }
1074