1 // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
2 /* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved
3  * Copyright (c) 2016 Ivan Vecera <cera@cera.cz>
4  */
5 
6 #include <linux/kernel.h>
7 #include <linux/types.h>
8 #include <linux/device.h>
9 #include <linux/sysfs.h>
10 #include <linux/thermal.h>
11 #include <linux/err.h>
12 #include <linux/sfp.h>
13 
14 #include "core.h"
15 #include "core_env.h"
16 
17 #define MLXSW_THERMAL_POLL_INT	1000	/* ms */
18 #define MLXSW_THERMAL_SLOW_POLL_INT	20000	/* ms */
19 #define MLXSW_THERMAL_ASIC_TEMP_NORM	75000	/* 75C */
20 #define MLXSW_THERMAL_ASIC_TEMP_HIGH	85000	/* 85C */
21 #define MLXSW_THERMAL_ASIC_TEMP_HOT	105000	/* 105C */
22 #define MLXSW_THERMAL_ASIC_TEMP_CRIT	140000	/* 140C */
23 #define MLXSW_THERMAL_HYSTERESIS_TEMP	5000	/* 5C */
24 #define MLXSW_THERMAL_MODULE_TEMP_SHIFT	(MLXSW_THERMAL_HYSTERESIS_TEMP * 2)
25 #define MLXSW_THERMAL_ZONE_MAX_NAME	16
26 #define MLXSW_THERMAL_TEMP_SCORE_MAX	GENMASK(31, 0)
27 #define MLXSW_THERMAL_MAX_STATE	10
28 #define MLXSW_THERMAL_MAX_DUTY	255
29 /* Minimum and maximum fan allowed speed in percent: from 20% to 100%. Values
30  * MLXSW_THERMAL_MAX_STATE + x, where x is between 2 and 10 are used for
31  * setting fan speed dynamic minimum. For example, if value is set to 14 (40%)
32  * cooling levels vector will be set to 4, 4, 4, 4, 4, 5, 6, 7, 8, 9, 10 to
33  * introduce PWM speed in percent: 40, 40, 40, 40, 40, 50, 60. 70, 80, 90, 100.
34  */
35 #define MLXSW_THERMAL_SPEED_MIN		(MLXSW_THERMAL_MAX_STATE + 2)
36 #define MLXSW_THERMAL_SPEED_MAX		(MLXSW_THERMAL_MAX_STATE * 2)
37 #define MLXSW_THERMAL_SPEED_MIN_LEVEL	2		/* 20% */
38 
39 /* External cooling devices, allowed for binding to mlxsw thermal zones. */
40 static char * const mlxsw_thermal_external_allowed_cdev[] = {
41 	"mlxreg_fan",
42 };
43 
44 enum mlxsw_thermal_trips {
45 	MLXSW_THERMAL_TEMP_TRIP_NORM,
46 	MLXSW_THERMAL_TEMP_TRIP_HIGH,
47 	MLXSW_THERMAL_TEMP_TRIP_HOT,
48 	MLXSW_THERMAL_TEMP_TRIP_CRIT,
49 };
50 
51 struct mlxsw_thermal_trip {
52 	int	type;
53 	int	temp;
54 	int	hyst;
55 	int	min_state;
56 	int	max_state;
57 };
58 
59 static const struct mlxsw_thermal_trip default_thermal_trips[] = {
60 	{	/* In range - 0-40% PWM */
61 		.type		= THERMAL_TRIP_ACTIVE,
62 		.temp		= MLXSW_THERMAL_ASIC_TEMP_NORM,
63 		.hyst		= MLXSW_THERMAL_HYSTERESIS_TEMP,
64 		.min_state	= 0,
65 		.max_state	= (4 * MLXSW_THERMAL_MAX_STATE) / 10,
66 	},
67 	{
68 		/* In range - 40-100% PWM */
69 		.type		= THERMAL_TRIP_ACTIVE,
70 		.temp		= MLXSW_THERMAL_ASIC_TEMP_HIGH,
71 		.hyst		= MLXSW_THERMAL_HYSTERESIS_TEMP,
72 		.min_state	= (4 * MLXSW_THERMAL_MAX_STATE) / 10,
73 		.max_state	= MLXSW_THERMAL_MAX_STATE,
74 	},
75 	{	/* Warning */
76 		.type		= THERMAL_TRIP_HOT,
77 		.temp		= MLXSW_THERMAL_ASIC_TEMP_HOT,
78 		.hyst		= MLXSW_THERMAL_HYSTERESIS_TEMP,
79 		.min_state	= MLXSW_THERMAL_MAX_STATE,
80 		.max_state	= MLXSW_THERMAL_MAX_STATE,
81 	},
82 	{	/* Critical - soft poweroff */
83 		.type		= THERMAL_TRIP_CRITICAL,
84 		.temp		= MLXSW_THERMAL_ASIC_TEMP_CRIT,
85 		.min_state	= MLXSW_THERMAL_MAX_STATE,
86 		.max_state	= MLXSW_THERMAL_MAX_STATE,
87 	}
88 };
89 
90 #define MLXSW_THERMAL_NUM_TRIPS	ARRAY_SIZE(default_thermal_trips)
91 
92 /* Make sure all trips are writable */
93 #define MLXSW_THERMAL_TRIP_MASK	(BIT(MLXSW_THERMAL_NUM_TRIPS) - 1)
94 
95 struct mlxsw_thermal;
96 
97 struct mlxsw_thermal_module {
98 	struct mlxsw_thermal *parent;
99 	struct thermal_zone_device *tzdev;
100 	struct mlxsw_thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS];
101 	int module; /* Module or gearbox number */
102 };
103 
104 struct mlxsw_thermal {
105 	struct mlxsw_core *core;
106 	const struct mlxsw_bus_info *bus_info;
107 	struct thermal_zone_device *tzdev;
108 	int polling_delay;
109 	struct thermal_cooling_device *cdevs[MLXSW_MFCR_PWMS_MAX];
110 	u8 cooling_levels[MLXSW_THERMAL_MAX_STATE + 1];
111 	struct mlxsw_thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS];
112 	struct mlxsw_thermal_module *tz_module_arr;
113 	u8 tz_module_num;
114 	struct mlxsw_thermal_module *tz_gearbox_arr;
115 	u8 tz_gearbox_num;
116 	unsigned int tz_highest_score;
117 	struct thermal_zone_device *tz_highest_dev;
118 };
119 
120 static inline u8 mlxsw_state_to_duty(int state)
121 {
122 	return DIV_ROUND_CLOSEST(state * MLXSW_THERMAL_MAX_DUTY,
123 				 MLXSW_THERMAL_MAX_STATE);
124 }
125 
126 static inline int mlxsw_duty_to_state(u8 duty)
127 {
128 	return DIV_ROUND_CLOSEST(duty * MLXSW_THERMAL_MAX_STATE,
129 				 MLXSW_THERMAL_MAX_DUTY);
130 }
131 
132 static int mlxsw_get_cooling_device_idx(struct mlxsw_thermal *thermal,
133 					struct thermal_cooling_device *cdev)
134 {
135 	int i;
136 
137 	for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++)
138 		if (thermal->cdevs[i] == cdev)
139 			return i;
140 
141 	/* Allow mlxsw thermal zone binding to an external cooling device */
142 	for (i = 0; i < ARRAY_SIZE(mlxsw_thermal_external_allowed_cdev); i++) {
143 		if (strnstr(cdev->type, mlxsw_thermal_external_allowed_cdev[i],
144 			    sizeof(cdev->type)))
145 			return 0;
146 	}
147 
148 	return -ENODEV;
149 }
150 
151 static void
152 mlxsw_thermal_module_trips_reset(struct mlxsw_thermal_module *tz)
153 {
154 	tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temp = 0;
155 	tz->trips[MLXSW_THERMAL_TEMP_TRIP_HIGH].temp = 0;
156 	tz->trips[MLXSW_THERMAL_TEMP_TRIP_HOT].temp = 0;
157 	tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp = 0;
158 }
159 
160 static int
161 mlxsw_thermal_module_trips_update(struct device *dev, struct mlxsw_core *core,
162 				  struct mlxsw_thermal_module *tz)
163 {
164 	int crit_temp, emerg_temp;
165 	int err;
166 
167 	err = mlxsw_env_module_temp_thresholds_get(core, tz->module,
168 						   SFP_TEMP_HIGH_WARN,
169 						   &crit_temp);
170 	if (err)
171 		return err;
172 
173 	err = mlxsw_env_module_temp_thresholds_get(core, tz->module,
174 						   SFP_TEMP_HIGH_ALARM,
175 						   &emerg_temp);
176 	if (err)
177 		return err;
178 
179 	if (crit_temp > emerg_temp) {
180 		dev_warn(dev, "%s : Critical threshold %d is above emergency threshold %d\n",
181 			 tz->tzdev->type, crit_temp, emerg_temp);
182 		return 0;
183 	}
184 
185 	/* According to the system thermal requirements, the thermal zones are
186 	 * defined with four trip points. The critical and emergency
187 	 * temperature thresholds, provided by QSFP module are set as "active"
188 	 * and "hot" trip points, "normal" and "critical" trip points are
189 	 * derived from "active" and "hot" by subtracting or adding double
190 	 * hysteresis value.
191 	 */
192 	if (crit_temp >= MLXSW_THERMAL_MODULE_TEMP_SHIFT)
193 		tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temp = crit_temp -
194 					MLXSW_THERMAL_MODULE_TEMP_SHIFT;
195 	else
196 		tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temp = crit_temp;
197 	tz->trips[MLXSW_THERMAL_TEMP_TRIP_HIGH].temp = crit_temp;
198 	tz->trips[MLXSW_THERMAL_TEMP_TRIP_HOT].temp = emerg_temp;
199 	tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp = emerg_temp +
200 					MLXSW_THERMAL_MODULE_TEMP_SHIFT;
201 
202 	return 0;
203 }
204 
205 static void mlxsw_thermal_tz_score_update(struct mlxsw_thermal *thermal,
206 					  struct thermal_zone_device *tzdev,
207 					  struct mlxsw_thermal_trip *trips,
208 					  int temp)
209 {
210 	struct mlxsw_thermal_trip *trip = trips;
211 	unsigned int score, delta, i, shift = 1;
212 
213 	/* Calculate thermal zone score, if temperature is above the critical
214 	 * threshold score is set to MLXSW_THERMAL_TEMP_SCORE_MAX.
215 	 */
216 	score = MLXSW_THERMAL_TEMP_SCORE_MAX;
217 	for (i = MLXSW_THERMAL_TEMP_TRIP_NORM; i < MLXSW_THERMAL_NUM_TRIPS;
218 	     i++, trip++) {
219 		if (temp < trip->temp) {
220 			delta = DIV_ROUND_CLOSEST(temp, trip->temp - temp);
221 			score = delta * shift;
222 			break;
223 		}
224 		shift *= 256;
225 	}
226 
227 	if (score > thermal->tz_highest_score) {
228 		thermal->tz_highest_score = score;
229 		thermal->tz_highest_dev = tzdev;
230 	}
231 }
232 
233 static int mlxsw_thermal_bind(struct thermal_zone_device *tzdev,
234 			      struct thermal_cooling_device *cdev)
235 {
236 	struct mlxsw_thermal *thermal = tzdev->devdata;
237 	struct device *dev = thermal->bus_info->dev;
238 	int i, err;
239 
240 	/* If the cooling device is one of ours bind it */
241 	if (mlxsw_get_cooling_device_idx(thermal, cdev) < 0)
242 		return 0;
243 
244 	for (i = 0; i < MLXSW_THERMAL_NUM_TRIPS; i++) {
245 		const struct mlxsw_thermal_trip *trip = &thermal->trips[i];
246 
247 		err = thermal_zone_bind_cooling_device(tzdev, i, cdev,
248 						       trip->max_state,
249 						       trip->min_state,
250 						       THERMAL_WEIGHT_DEFAULT);
251 		if (err < 0) {
252 			dev_err(dev, "Failed to bind cooling device to trip %d\n", i);
253 			return err;
254 		}
255 	}
256 	return 0;
257 }
258 
259 static int mlxsw_thermal_unbind(struct thermal_zone_device *tzdev,
260 				struct thermal_cooling_device *cdev)
261 {
262 	struct mlxsw_thermal *thermal = tzdev->devdata;
263 	struct device *dev = thermal->bus_info->dev;
264 	int i;
265 	int err;
266 
267 	/* If the cooling device is our one unbind it */
268 	if (mlxsw_get_cooling_device_idx(thermal, cdev) < 0)
269 		return 0;
270 
271 	for (i = 0; i < MLXSW_THERMAL_NUM_TRIPS; i++) {
272 		err = thermal_zone_unbind_cooling_device(tzdev, i, cdev);
273 		if (err < 0) {
274 			dev_err(dev, "Failed to unbind cooling device\n");
275 			return err;
276 		}
277 	}
278 	return 0;
279 }
280 
281 static int mlxsw_thermal_get_temp(struct thermal_zone_device *tzdev,
282 				  int *p_temp)
283 {
284 	struct mlxsw_thermal *thermal = tzdev->devdata;
285 	struct device *dev = thermal->bus_info->dev;
286 	char mtmp_pl[MLXSW_REG_MTMP_LEN];
287 	int temp;
288 	int err;
289 
290 	mlxsw_reg_mtmp_pack(mtmp_pl, 0, false, false);
291 
292 	err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtmp), mtmp_pl);
293 	if (err) {
294 		dev_err(dev, "Failed to query temp sensor\n");
295 		return err;
296 	}
297 	mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
298 	if (temp > 0)
299 		mlxsw_thermal_tz_score_update(thermal, tzdev, thermal->trips,
300 					      temp);
301 
302 	*p_temp = temp;
303 	return 0;
304 }
305 
306 static int mlxsw_thermal_get_trip_type(struct thermal_zone_device *tzdev,
307 				       int trip,
308 				       enum thermal_trip_type *p_type)
309 {
310 	struct mlxsw_thermal *thermal = tzdev->devdata;
311 
312 	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
313 		return -EINVAL;
314 
315 	*p_type = thermal->trips[trip].type;
316 	return 0;
317 }
318 
319 static int mlxsw_thermal_get_trip_temp(struct thermal_zone_device *tzdev,
320 				       int trip, int *p_temp)
321 {
322 	struct mlxsw_thermal *thermal = tzdev->devdata;
323 
324 	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
325 		return -EINVAL;
326 
327 	*p_temp = thermal->trips[trip].temp;
328 	return 0;
329 }
330 
331 static int mlxsw_thermal_set_trip_temp(struct thermal_zone_device *tzdev,
332 				       int trip, int temp)
333 {
334 	struct mlxsw_thermal *thermal = tzdev->devdata;
335 
336 	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS ||
337 	    temp > MLXSW_THERMAL_ASIC_TEMP_CRIT)
338 		return -EINVAL;
339 
340 	thermal->trips[trip].temp = temp;
341 	return 0;
342 }
343 
344 static int mlxsw_thermal_get_trip_hyst(struct thermal_zone_device *tzdev,
345 				       int trip, int *p_hyst)
346 {
347 	struct mlxsw_thermal *thermal = tzdev->devdata;
348 
349 	*p_hyst = thermal->trips[trip].hyst;
350 	return 0;
351 }
352 
353 static int mlxsw_thermal_set_trip_hyst(struct thermal_zone_device *tzdev,
354 				       int trip, int hyst)
355 {
356 	struct mlxsw_thermal *thermal = tzdev->devdata;
357 
358 	thermal->trips[trip].hyst = hyst;
359 	return 0;
360 }
361 
362 static int mlxsw_thermal_trend_get(struct thermal_zone_device *tzdev,
363 				   int trip, enum thermal_trend *trend)
364 {
365 	struct mlxsw_thermal *thermal = tzdev->devdata;
366 
367 	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
368 		return -EINVAL;
369 
370 	if (tzdev == thermal->tz_highest_dev)
371 		return 1;
372 
373 	*trend = THERMAL_TREND_STABLE;
374 	return 0;
375 }
376 
377 static struct thermal_zone_device_ops mlxsw_thermal_ops = {
378 	.bind = mlxsw_thermal_bind,
379 	.unbind = mlxsw_thermal_unbind,
380 	.get_temp = mlxsw_thermal_get_temp,
381 	.get_trip_type	= mlxsw_thermal_get_trip_type,
382 	.get_trip_temp	= mlxsw_thermal_get_trip_temp,
383 	.set_trip_temp	= mlxsw_thermal_set_trip_temp,
384 	.get_trip_hyst	= mlxsw_thermal_get_trip_hyst,
385 	.set_trip_hyst	= mlxsw_thermal_set_trip_hyst,
386 	.get_trend	= mlxsw_thermal_trend_get,
387 };
388 
389 static int mlxsw_thermal_module_bind(struct thermal_zone_device *tzdev,
390 				     struct thermal_cooling_device *cdev)
391 {
392 	struct mlxsw_thermal_module *tz = tzdev->devdata;
393 	struct mlxsw_thermal *thermal = tz->parent;
394 	int i, j, err;
395 
396 	/* If the cooling device is one of ours bind it */
397 	if (mlxsw_get_cooling_device_idx(thermal, cdev) < 0)
398 		return 0;
399 
400 	for (i = 0; i < MLXSW_THERMAL_NUM_TRIPS; i++) {
401 		const struct mlxsw_thermal_trip *trip = &tz->trips[i];
402 
403 		err = thermal_zone_bind_cooling_device(tzdev, i, cdev,
404 						       trip->max_state,
405 						       trip->min_state,
406 						       THERMAL_WEIGHT_DEFAULT);
407 		if (err < 0)
408 			goto err_bind_cooling_device;
409 	}
410 	return 0;
411 
412 err_bind_cooling_device:
413 	for (j = i - 1; j >= 0; j--)
414 		thermal_zone_unbind_cooling_device(tzdev, j, cdev);
415 	return err;
416 }
417 
418 static int mlxsw_thermal_module_unbind(struct thermal_zone_device *tzdev,
419 				       struct thermal_cooling_device *cdev)
420 {
421 	struct mlxsw_thermal_module *tz = tzdev->devdata;
422 	struct mlxsw_thermal *thermal = tz->parent;
423 	int i;
424 	int err;
425 
426 	/* If the cooling device is one of ours unbind it */
427 	if (mlxsw_get_cooling_device_idx(thermal, cdev) < 0)
428 		return 0;
429 
430 	for (i = 0; i < MLXSW_THERMAL_NUM_TRIPS; i++) {
431 		err = thermal_zone_unbind_cooling_device(tzdev, i, cdev);
432 		WARN_ON(err);
433 	}
434 	return err;
435 }
436 
437 static int mlxsw_thermal_module_temp_get(struct thermal_zone_device *tzdev,
438 					 int *p_temp)
439 {
440 	struct mlxsw_thermal_module *tz = tzdev->devdata;
441 	struct mlxsw_thermal *thermal = tz->parent;
442 	struct device *dev = thermal->bus_info->dev;
443 	char mtmp_pl[MLXSW_REG_MTMP_LEN];
444 	int temp;
445 	int err;
446 
447 	/* Read module temperature. */
448 	mlxsw_reg_mtmp_pack(mtmp_pl, MLXSW_REG_MTMP_MODULE_INDEX_MIN +
449 			    tz->module, false, false);
450 	err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtmp), mtmp_pl);
451 	if (err) {
452 		/* Do not return error - in case of broken module's sensor
453 		 * it will cause error message flooding.
454 		 */
455 		temp = 0;
456 		*p_temp = (int) temp;
457 		return 0;
458 	}
459 	mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
460 	*p_temp = temp;
461 
462 	if (!temp)
463 		return 0;
464 
465 	/* Update trip points. */
466 	err = mlxsw_thermal_module_trips_update(dev, thermal->core, tz);
467 	if (!err && temp > 0)
468 		mlxsw_thermal_tz_score_update(thermal, tzdev, tz->trips, temp);
469 
470 	return 0;
471 }
472 
473 static int
474 mlxsw_thermal_module_trip_type_get(struct thermal_zone_device *tzdev, int trip,
475 				   enum thermal_trip_type *p_type)
476 {
477 	struct mlxsw_thermal_module *tz = tzdev->devdata;
478 
479 	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
480 		return -EINVAL;
481 
482 	*p_type = tz->trips[trip].type;
483 	return 0;
484 }
485 
486 static int
487 mlxsw_thermal_module_trip_temp_get(struct thermal_zone_device *tzdev,
488 				   int trip, int *p_temp)
489 {
490 	struct mlxsw_thermal_module *tz = tzdev->devdata;
491 
492 	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
493 		return -EINVAL;
494 
495 	*p_temp = tz->trips[trip].temp;
496 	return 0;
497 }
498 
499 static int
500 mlxsw_thermal_module_trip_temp_set(struct thermal_zone_device *tzdev,
501 				   int trip, int temp)
502 {
503 	struct mlxsw_thermal_module *tz = tzdev->devdata;
504 
505 	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS ||
506 	    temp > tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp)
507 		return -EINVAL;
508 
509 	tz->trips[trip].temp = temp;
510 	return 0;
511 }
512 
513 static int
514 mlxsw_thermal_module_trip_hyst_get(struct thermal_zone_device *tzdev, int trip,
515 				   int *p_hyst)
516 {
517 	struct mlxsw_thermal_module *tz = tzdev->devdata;
518 
519 	*p_hyst = tz->trips[trip].hyst;
520 	return 0;
521 }
522 
523 static int
524 mlxsw_thermal_module_trip_hyst_set(struct thermal_zone_device *tzdev, int trip,
525 				   int hyst)
526 {
527 	struct mlxsw_thermal_module *tz = tzdev->devdata;
528 
529 	tz->trips[trip].hyst = hyst;
530 	return 0;
531 }
532 
533 static int mlxsw_thermal_module_trend_get(struct thermal_zone_device *tzdev,
534 					  int trip, enum thermal_trend *trend)
535 {
536 	struct mlxsw_thermal_module *tz = tzdev->devdata;
537 	struct mlxsw_thermal *thermal = tz->parent;
538 
539 	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
540 		return -EINVAL;
541 
542 	if (tzdev == thermal->tz_highest_dev)
543 		return 1;
544 
545 	*trend = THERMAL_TREND_STABLE;
546 	return 0;
547 }
548 
549 static struct thermal_zone_device_ops mlxsw_thermal_module_ops = {
550 	.bind		= mlxsw_thermal_module_bind,
551 	.unbind		= mlxsw_thermal_module_unbind,
552 	.get_temp	= mlxsw_thermal_module_temp_get,
553 	.get_trip_type	= mlxsw_thermal_module_trip_type_get,
554 	.get_trip_temp	= mlxsw_thermal_module_trip_temp_get,
555 	.set_trip_temp	= mlxsw_thermal_module_trip_temp_set,
556 	.get_trip_hyst	= mlxsw_thermal_module_trip_hyst_get,
557 	.set_trip_hyst	= mlxsw_thermal_module_trip_hyst_set,
558 	.get_trend	= mlxsw_thermal_module_trend_get,
559 };
560 
561 static int mlxsw_thermal_gearbox_temp_get(struct thermal_zone_device *tzdev,
562 					  int *p_temp)
563 {
564 	struct mlxsw_thermal_module *tz = tzdev->devdata;
565 	struct mlxsw_thermal *thermal = tz->parent;
566 	char mtmp_pl[MLXSW_REG_MTMP_LEN];
567 	u16 index;
568 	int temp;
569 	int err;
570 
571 	index = MLXSW_REG_MTMP_GBOX_INDEX_MIN + tz->module;
572 	mlxsw_reg_mtmp_pack(mtmp_pl, index, false, false);
573 
574 	err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtmp), mtmp_pl);
575 	if (err)
576 		return err;
577 
578 	mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
579 	if (temp > 0)
580 		mlxsw_thermal_tz_score_update(thermal, tzdev, tz->trips, temp);
581 
582 	*p_temp = temp;
583 	return 0;
584 }
585 
586 static struct thermal_zone_device_ops mlxsw_thermal_gearbox_ops = {
587 	.bind		= mlxsw_thermal_module_bind,
588 	.unbind		= mlxsw_thermal_module_unbind,
589 	.get_temp	= mlxsw_thermal_gearbox_temp_get,
590 	.get_trip_type	= mlxsw_thermal_module_trip_type_get,
591 	.get_trip_temp	= mlxsw_thermal_module_trip_temp_get,
592 	.set_trip_temp	= mlxsw_thermal_module_trip_temp_set,
593 	.get_trip_hyst	= mlxsw_thermal_module_trip_hyst_get,
594 	.set_trip_hyst	= mlxsw_thermal_module_trip_hyst_set,
595 	.get_trend	= mlxsw_thermal_module_trend_get,
596 };
597 
598 static int mlxsw_thermal_get_max_state(struct thermal_cooling_device *cdev,
599 				       unsigned long *p_state)
600 {
601 	*p_state = MLXSW_THERMAL_MAX_STATE;
602 	return 0;
603 }
604 
605 static int mlxsw_thermal_get_cur_state(struct thermal_cooling_device *cdev,
606 				       unsigned long *p_state)
607 
608 {
609 	struct mlxsw_thermal *thermal = cdev->devdata;
610 	struct device *dev = thermal->bus_info->dev;
611 	char mfsc_pl[MLXSW_REG_MFSC_LEN];
612 	int err, idx;
613 	u8 duty;
614 
615 	idx = mlxsw_get_cooling_device_idx(thermal, cdev);
616 	if (idx < 0)
617 		return idx;
618 
619 	mlxsw_reg_mfsc_pack(mfsc_pl, idx, 0);
620 	err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfsc), mfsc_pl);
621 	if (err) {
622 		dev_err(dev, "Failed to query PWM duty\n");
623 		return err;
624 	}
625 
626 	duty = mlxsw_reg_mfsc_pwm_duty_cycle_get(mfsc_pl);
627 	*p_state = mlxsw_duty_to_state(duty);
628 	return 0;
629 }
630 
631 static int mlxsw_thermal_set_cur_state(struct thermal_cooling_device *cdev,
632 				       unsigned long state)
633 
634 {
635 	struct mlxsw_thermal *thermal = cdev->devdata;
636 	struct device *dev = thermal->bus_info->dev;
637 	char mfsc_pl[MLXSW_REG_MFSC_LEN];
638 	unsigned long cur_state, i;
639 	int idx;
640 	u8 duty;
641 	int err;
642 
643 	idx = mlxsw_get_cooling_device_idx(thermal, cdev);
644 	if (idx < 0)
645 		return idx;
646 
647 	/* Verify if this request is for changing allowed fan dynamical
648 	 * minimum. If it is - update cooling levels accordingly and update
649 	 * state, if current state is below the newly requested minimum state.
650 	 * For example, if current state is 5, and minimal state is to be
651 	 * changed from 4 to 6, thermal->cooling_levels[0 to 5] will be changed
652 	 * all from 4 to 6. And state 5 (thermal->cooling_levels[4]) should be
653 	 * overwritten.
654 	 */
655 	if (state >= MLXSW_THERMAL_SPEED_MIN &&
656 	    state <= MLXSW_THERMAL_SPEED_MAX) {
657 		state -= MLXSW_THERMAL_MAX_STATE;
658 		for (i = 0; i <= MLXSW_THERMAL_MAX_STATE; i++)
659 			thermal->cooling_levels[i] = max(state, i);
660 
661 		mlxsw_reg_mfsc_pack(mfsc_pl, idx, 0);
662 		err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfsc), mfsc_pl);
663 		if (err)
664 			return err;
665 
666 		duty = mlxsw_reg_mfsc_pwm_duty_cycle_get(mfsc_pl);
667 		cur_state = mlxsw_duty_to_state(duty);
668 
669 		/* If current fan state is lower than requested dynamical
670 		 * minimum, increase fan speed up to dynamical minimum.
671 		 */
672 		if (state < cur_state)
673 			return 0;
674 
675 		state = cur_state;
676 	}
677 
678 	if (state > MLXSW_THERMAL_MAX_STATE)
679 		return -EINVAL;
680 
681 	/* Normalize the state to the valid speed range. */
682 	state = thermal->cooling_levels[state];
683 	mlxsw_reg_mfsc_pack(mfsc_pl, idx, mlxsw_state_to_duty(state));
684 	err = mlxsw_reg_write(thermal->core, MLXSW_REG(mfsc), mfsc_pl);
685 	if (err) {
686 		dev_err(dev, "Failed to write PWM duty\n");
687 		return err;
688 	}
689 	return 0;
690 }
691 
692 static const struct thermal_cooling_device_ops mlxsw_cooling_ops = {
693 	.get_max_state	= mlxsw_thermal_get_max_state,
694 	.get_cur_state	= mlxsw_thermal_get_cur_state,
695 	.set_cur_state	= mlxsw_thermal_set_cur_state,
696 };
697 
698 static int
699 mlxsw_thermal_module_tz_init(struct mlxsw_thermal_module *module_tz)
700 {
701 	char tz_name[MLXSW_THERMAL_ZONE_MAX_NAME];
702 	int err;
703 
704 	snprintf(tz_name, sizeof(tz_name), "mlxsw-module%d",
705 		 module_tz->module + 1);
706 	module_tz->tzdev = thermal_zone_device_register(tz_name,
707 							MLXSW_THERMAL_NUM_TRIPS,
708 							MLXSW_THERMAL_TRIP_MASK,
709 							module_tz,
710 							&mlxsw_thermal_module_ops,
711 							NULL, 0, 0);
712 	if (IS_ERR(module_tz->tzdev)) {
713 		err = PTR_ERR(module_tz->tzdev);
714 		return err;
715 	}
716 
717 	err = thermal_zone_device_enable(module_tz->tzdev);
718 	if (err)
719 		thermal_zone_device_unregister(module_tz->tzdev);
720 
721 	return err;
722 }
723 
724 static void mlxsw_thermal_module_tz_fini(struct thermal_zone_device *tzdev)
725 {
726 	thermal_zone_device_unregister(tzdev);
727 }
728 
729 static int
730 mlxsw_thermal_module_init(struct device *dev, struct mlxsw_core *core,
731 			  struct mlxsw_thermal *thermal, u8 module)
732 {
733 	struct mlxsw_thermal_module *module_tz;
734 
735 	module_tz = &thermal->tz_module_arr[module];
736 	/* Skip if parent is already set (case of port split). */
737 	if (module_tz->parent)
738 		return 0;
739 	module_tz->module = module;
740 	module_tz->parent = thermal;
741 	memcpy(module_tz->trips, default_thermal_trips,
742 	       sizeof(thermal->trips));
743 	/* Initialize all trip point. */
744 	mlxsw_thermal_module_trips_reset(module_tz);
745 	/* Update trip point according to the module data. */
746 	return mlxsw_thermal_module_trips_update(dev, core, module_tz);
747 }
748 
749 static void mlxsw_thermal_module_fini(struct mlxsw_thermal_module *module_tz)
750 {
751 	if (module_tz && module_tz->tzdev) {
752 		mlxsw_thermal_module_tz_fini(module_tz->tzdev);
753 		module_tz->tzdev = NULL;
754 		module_tz->parent = NULL;
755 	}
756 }
757 
758 static int
759 mlxsw_thermal_modules_init(struct device *dev, struct mlxsw_core *core,
760 			   struct mlxsw_thermal *thermal)
761 {
762 	struct mlxsw_thermal_module *module_tz;
763 	char mgpir_pl[MLXSW_REG_MGPIR_LEN];
764 	int i, err;
765 
766 	if (!mlxsw_core_res_query_enabled(core))
767 		return 0;
768 
769 	mlxsw_reg_mgpir_pack(mgpir_pl);
770 	err = mlxsw_reg_query(core, MLXSW_REG(mgpir), mgpir_pl);
771 	if (err)
772 		return err;
773 
774 	mlxsw_reg_mgpir_unpack(mgpir_pl, NULL, NULL, NULL,
775 			       &thermal->tz_module_num);
776 
777 	thermal->tz_module_arr = kcalloc(thermal->tz_module_num,
778 					 sizeof(*thermal->tz_module_arr),
779 					 GFP_KERNEL);
780 	if (!thermal->tz_module_arr)
781 		return -ENOMEM;
782 
783 	for (i = 0; i < thermal->tz_module_num; i++) {
784 		err = mlxsw_thermal_module_init(dev, core, thermal, i);
785 		if (err)
786 			goto err_unreg_tz_module_arr;
787 	}
788 
789 	for (i = 0; i < thermal->tz_module_num; i++) {
790 		module_tz = &thermal->tz_module_arr[i];
791 		if (!module_tz->parent)
792 			continue;
793 		err = mlxsw_thermal_module_tz_init(module_tz);
794 		if (err)
795 			goto err_unreg_tz_module_arr;
796 	}
797 
798 	return 0;
799 
800 err_unreg_tz_module_arr:
801 	for (i = thermal->tz_module_num - 1; i >= 0; i--)
802 		mlxsw_thermal_module_fini(&thermal->tz_module_arr[i]);
803 	kfree(thermal->tz_module_arr);
804 	return err;
805 }
806 
807 static void
808 mlxsw_thermal_modules_fini(struct mlxsw_thermal *thermal)
809 {
810 	int i;
811 
812 	if (!mlxsw_core_res_query_enabled(thermal->core))
813 		return;
814 
815 	for (i = thermal->tz_module_num - 1; i >= 0; i--)
816 		mlxsw_thermal_module_fini(&thermal->tz_module_arr[i]);
817 	kfree(thermal->tz_module_arr);
818 }
819 
820 static int
821 mlxsw_thermal_gearbox_tz_init(struct mlxsw_thermal_module *gearbox_tz)
822 {
823 	char tz_name[MLXSW_THERMAL_ZONE_MAX_NAME];
824 	int ret;
825 
826 	snprintf(tz_name, sizeof(tz_name), "mlxsw-gearbox%d",
827 		 gearbox_tz->module + 1);
828 	gearbox_tz->tzdev = thermal_zone_device_register(tz_name,
829 						MLXSW_THERMAL_NUM_TRIPS,
830 						MLXSW_THERMAL_TRIP_MASK,
831 						gearbox_tz,
832 						&mlxsw_thermal_gearbox_ops,
833 						NULL, 0, 0);
834 	if (IS_ERR(gearbox_tz->tzdev))
835 		return PTR_ERR(gearbox_tz->tzdev);
836 
837 	ret = thermal_zone_device_enable(gearbox_tz->tzdev);
838 	if (ret)
839 		thermal_zone_device_unregister(gearbox_tz->tzdev);
840 
841 	return ret;
842 }
843 
844 static void
845 mlxsw_thermal_gearbox_tz_fini(struct mlxsw_thermal_module *gearbox_tz)
846 {
847 	thermal_zone_device_unregister(gearbox_tz->tzdev);
848 }
849 
850 static int
851 mlxsw_thermal_gearboxes_init(struct device *dev, struct mlxsw_core *core,
852 			     struct mlxsw_thermal *thermal)
853 {
854 	enum mlxsw_reg_mgpir_device_type device_type;
855 	struct mlxsw_thermal_module *gearbox_tz;
856 	char mgpir_pl[MLXSW_REG_MGPIR_LEN];
857 	u8 gbox_num;
858 	int i;
859 	int err;
860 
861 	if (!mlxsw_core_res_query_enabled(core))
862 		return 0;
863 
864 	mlxsw_reg_mgpir_pack(mgpir_pl);
865 	err = mlxsw_reg_query(core, MLXSW_REG(mgpir), mgpir_pl);
866 	if (err)
867 		return err;
868 
869 	mlxsw_reg_mgpir_unpack(mgpir_pl, &gbox_num, &device_type, NULL,
870 			       NULL);
871 	if (device_type != MLXSW_REG_MGPIR_DEVICE_TYPE_GEARBOX_DIE ||
872 	    !gbox_num)
873 		return 0;
874 
875 	thermal->tz_gearbox_num = gbox_num;
876 	thermal->tz_gearbox_arr = kcalloc(thermal->tz_gearbox_num,
877 					  sizeof(*thermal->tz_gearbox_arr),
878 					  GFP_KERNEL);
879 	if (!thermal->tz_gearbox_arr)
880 		return -ENOMEM;
881 
882 	for (i = 0; i < thermal->tz_gearbox_num; i++) {
883 		gearbox_tz = &thermal->tz_gearbox_arr[i];
884 		memcpy(gearbox_tz->trips, default_thermal_trips,
885 		       sizeof(thermal->trips));
886 		gearbox_tz->module = i;
887 		gearbox_tz->parent = thermal;
888 		err = mlxsw_thermal_gearbox_tz_init(gearbox_tz);
889 		if (err)
890 			goto err_unreg_tz_gearbox;
891 	}
892 
893 	return 0;
894 
895 err_unreg_tz_gearbox:
896 	for (i--; i >= 0; i--)
897 		mlxsw_thermal_gearbox_tz_fini(&thermal->tz_gearbox_arr[i]);
898 	kfree(thermal->tz_gearbox_arr);
899 	return err;
900 }
901 
902 static void
903 mlxsw_thermal_gearboxes_fini(struct mlxsw_thermal *thermal)
904 {
905 	int i;
906 
907 	if (!mlxsw_core_res_query_enabled(thermal->core))
908 		return;
909 
910 	for (i = thermal->tz_gearbox_num - 1; i >= 0; i--)
911 		mlxsw_thermal_gearbox_tz_fini(&thermal->tz_gearbox_arr[i]);
912 	kfree(thermal->tz_gearbox_arr);
913 }
914 
915 int mlxsw_thermal_init(struct mlxsw_core *core,
916 		       const struct mlxsw_bus_info *bus_info,
917 		       struct mlxsw_thermal **p_thermal)
918 {
919 	char mfcr_pl[MLXSW_REG_MFCR_LEN] = { 0 };
920 	enum mlxsw_reg_mfcr_pwm_frequency freq;
921 	struct device *dev = bus_info->dev;
922 	struct mlxsw_thermal *thermal;
923 	u16 tacho_active;
924 	u8 pwm_active;
925 	int err, i;
926 
927 	thermal = devm_kzalloc(dev, sizeof(*thermal),
928 			       GFP_KERNEL);
929 	if (!thermal)
930 		return -ENOMEM;
931 
932 	thermal->core = core;
933 	thermal->bus_info = bus_info;
934 	memcpy(thermal->trips, default_thermal_trips, sizeof(thermal->trips));
935 
936 	err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfcr), mfcr_pl);
937 	if (err) {
938 		dev_err(dev, "Failed to probe PWMs\n");
939 		goto err_free_thermal;
940 	}
941 	mlxsw_reg_mfcr_unpack(mfcr_pl, &freq, &tacho_active, &pwm_active);
942 
943 	for (i = 0; i < MLXSW_MFCR_TACHOS_MAX; i++) {
944 		if (tacho_active & BIT(i)) {
945 			char mfsl_pl[MLXSW_REG_MFSL_LEN];
946 
947 			mlxsw_reg_mfsl_pack(mfsl_pl, i, 0, 0);
948 
949 			/* We need to query the register to preserve maximum */
950 			err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfsl),
951 					      mfsl_pl);
952 			if (err)
953 				goto err_free_thermal;
954 
955 			/* set the minimal RPMs to 0 */
956 			mlxsw_reg_mfsl_tach_min_set(mfsl_pl, 0);
957 			err = mlxsw_reg_write(thermal->core, MLXSW_REG(mfsl),
958 					      mfsl_pl);
959 			if (err)
960 				goto err_free_thermal;
961 		}
962 	}
963 	for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++) {
964 		if (pwm_active & BIT(i)) {
965 			struct thermal_cooling_device *cdev;
966 
967 			cdev = thermal_cooling_device_register("mlxsw_fan",
968 							       thermal,
969 							       &mlxsw_cooling_ops);
970 			if (IS_ERR(cdev)) {
971 				err = PTR_ERR(cdev);
972 				dev_err(dev, "Failed to register cooling device\n");
973 				goto err_unreg_cdevs;
974 			}
975 			thermal->cdevs[i] = cdev;
976 		}
977 	}
978 
979 	/* Initialize cooling levels per PWM state. */
980 	for (i = 0; i < MLXSW_THERMAL_MAX_STATE; i++)
981 		thermal->cooling_levels[i] = max(MLXSW_THERMAL_SPEED_MIN_LEVEL,
982 						 i);
983 
984 	thermal->polling_delay = bus_info->low_frequency ?
985 				 MLXSW_THERMAL_SLOW_POLL_INT :
986 				 MLXSW_THERMAL_POLL_INT;
987 
988 	thermal->tzdev = thermal_zone_device_register("mlxsw",
989 						      MLXSW_THERMAL_NUM_TRIPS,
990 						      MLXSW_THERMAL_TRIP_MASK,
991 						      thermal,
992 						      &mlxsw_thermal_ops,
993 						      NULL, 0,
994 						      thermal->polling_delay);
995 	if (IS_ERR(thermal->tzdev)) {
996 		err = PTR_ERR(thermal->tzdev);
997 		dev_err(dev, "Failed to register thermal zone\n");
998 		goto err_unreg_cdevs;
999 	}
1000 
1001 	err = mlxsw_thermal_modules_init(dev, core, thermal);
1002 	if (err)
1003 		goto err_unreg_tzdev;
1004 
1005 	err = mlxsw_thermal_gearboxes_init(dev, core, thermal);
1006 	if (err)
1007 		goto err_unreg_modules_tzdev;
1008 
1009 	err = thermal_zone_device_enable(thermal->tzdev);
1010 	if (err)
1011 		goto err_unreg_gearboxes;
1012 
1013 	*p_thermal = thermal;
1014 	return 0;
1015 
1016 err_unreg_gearboxes:
1017 	mlxsw_thermal_gearboxes_fini(thermal);
1018 err_unreg_modules_tzdev:
1019 	mlxsw_thermal_modules_fini(thermal);
1020 err_unreg_tzdev:
1021 	if (thermal->tzdev) {
1022 		thermal_zone_device_unregister(thermal->tzdev);
1023 		thermal->tzdev = NULL;
1024 	}
1025 err_unreg_cdevs:
1026 	for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++)
1027 		if (thermal->cdevs[i])
1028 			thermal_cooling_device_unregister(thermal->cdevs[i]);
1029 err_free_thermal:
1030 	devm_kfree(dev, thermal);
1031 	return err;
1032 }
1033 
1034 void mlxsw_thermal_fini(struct mlxsw_thermal *thermal)
1035 {
1036 	int i;
1037 
1038 	mlxsw_thermal_gearboxes_fini(thermal);
1039 	mlxsw_thermal_modules_fini(thermal);
1040 	if (thermal->tzdev) {
1041 		thermal_zone_device_unregister(thermal->tzdev);
1042 		thermal->tzdev = NULL;
1043 	}
1044 
1045 	for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++) {
1046 		if (thermal->cdevs[i]) {
1047 			thermal_cooling_device_unregister(thermal->cdevs[i]);
1048 			thermal->cdevs[i] = NULL;
1049 		}
1050 	}
1051 
1052 	devm_kfree(thermal->bus_info->dev, thermal);
1053 }
1054