1 // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
2 /* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved
3  * Copyright (c) 2016 Ivan Vecera <cera@cera.cz>
4  */
5 
6 #include <linux/kernel.h>
7 #include <linux/types.h>
8 #include <linux/device.h>
9 #include <linux/sysfs.h>
10 #include <linux/thermal.h>
11 #include <linux/err.h>
12 #include <linux/sfp.h>
13 
14 #include "core.h"
15 #include "core_env.h"
16 
17 #define MLXSW_THERMAL_POLL_INT	1000	/* ms */
18 #define MLXSW_THERMAL_SLOW_POLL_INT	20000	/* ms */
19 #define MLXSW_THERMAL_ASIC_TEMP_NORM	75000	/* 75C */
20 #define MLXSW_THERMAL_ASIC_TEMP_HIGH	85000	/* 85C */
21 #define MLXSW_THERMAL_ASIC_TEMP_HOT	105000	/* 105C */
22 #define MLXSW_THERMAL_HYSTERESIS_TEMP	5000	/* 5C */
23 #define MLXSW_THERMAL_MODULE_TEMP_SHIFT	(MLXSW_THERMAL_HYSTERESIS_TEMP * 2)
24 #define MLXSW_THERMAL_ZONE_MAX_NAME	16
25 #define MLXSW_THERMAL_TEMP_SCORE_MAX	GENMASK(31, 0)
26 #define MLXSW_THERMAL_MAX_STATE	10
27 #define MLXSW_THERMAL_MIN_STATE	2
28 #define MLXSW_THERMAL_MAX_DUTY	255
29 
30 /* External cooling devices, allowed for binding to mlxsw thermal zones. */
31 static char * const mlxsw_thermal_external_allowed_cdev[] = {
32 	"mlxreg_fan",
33 };
34 
35 enum mlxsw_thermal_trips {
36 	MLXSW_THERMAL_TEMP_TRIP_NORM,
37 	MLXSW_THERMAL_TEMP_TRIP_HIGH,
38 	MLXSW_THERMAL_TEMP_TRIP_HOT,
39 };
40 
41 struct mlxsw_thermal_trip {
42 	int	type;
43 	int	temp;
44 	int	hyst;
45 	int	min_state;
46 	int	max_state;
47 };
48 
49 static const struct mlxsw_thermal_trip default_thermal_trips[] = {
50 	{	/* In range - 0-40% PWM */
51 		.type		= THERMAL_TRIP_ACTIVE,
52 		.temp		= MLXSW_THERMAL_ASIC_TEMP_NORM,
53 		.hyst		= MLXSW_THERMAL_HYSTERESIS_TEMP,
54 		.min_state	= 0,
55 		.max_state	= (4 * MLXSW_THERMAL_MAX_STATE) / 10,
56 	},
57 	{
58 		/* In range - 40-100% PWM */
59 		.type		= THERMAL_TRIP_ACTIVE,
60 		.temp		= MLXSW_THERMAL_ASIC_TEMP_HIGH,
61 		.hyst		= MLXSW_THERMAL_HYSTERESIS_TEMP,
62 		.min_state	= (4 * MLXSW_THERMAL_MAX_STATE) / 10,
63 		.max_state	= MLXSW_THERMAL_MAX_STATE,
64 	},
65 	{	/* Warning */
66 		.type		= THERMAL_TRIP_HOT,
67 		.temp		= MLXSW_THERMAL_ASIC_TEMP_HOT,
68 		.min_state	= MLXSW_THERMAL_MAX_STATE,
69 		.max_state	= MLXSW_THERMAL_MAX_STATE,
70 	},
71 };
72 
73 #define MLXSW_THERMAL_NUM_TRIPS	ARRAY_SIZE(default_thermal_trips)
74 
75 /* Make sure all trips are writable */
76 #define MLXSW_THERMAL_TRIP_MASK	(BIT(MLXSW_THERMAL_NUM_TRIPS) - 1)
77 
78 struct mlxsw_thermal;
79 
80 struct mlxsw_thermal_module {
81 	struct mlxsw_thermal *parent;
82 	struct thermal_zone_device *tzdev;
83 	struct mlxsw_thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS];
84 	int module; /* Module or gearbox number */
85 };
86 
87 struct mlxsw_thermal {
88 	struct mlxsw_core *core;
89 	const struct mlxsw_bus_info *bus_info;
90 	struct thermal_zone_device *tzdev;
91 	int polling_delay;
92 	struct thermal_cooling_device *cdevs[MLXSW_MFCR_PWMS_MAX];
93 	u8 cooling_levels[MLXSW_THERMAL_MAX_STATE + 1];
94 	struct mlxsw_thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS];
95 	struct mlxsw_thermal_module *tz_module_arr;
96 	u8 tz_module_num;
97 	struct mlxsw_thermal_module *tz_gearbox_arr;
98 	u8 tz_gearbox_num;
99 	unsigned int tz_highest_score;
100 	struct thermal_zone_device *tz_highest_dev;
101 };
102 
103 static inline u8 mlxsw_state_to_duty(int state)
104 {
105 	return DIV_ROUND_CLOSEST(state * MLXSW_THERMAL_MAX_DUTY,
106 				 MLXSW_THERMAL_MAX_STATE);
107 }
108 
109 static inline int mlxsw_duty_to_state(u8 duty)
110 {
111 	return DIV_ROUND_CLOSEST(duty * MLXSW_THERMAL_MAX_STATE,
112 				 MLXSW_THERMAL_MAX_DUTY);
113 }
114 
115 static int mlxsw_get_cooling_device_idx(struct mlxsw_thermal *thermal,
116 					struct thermal_cooling_device *cdev)
117 {
118 	int i;
119 
120 	for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++)
121 		if (thermal->cdevs[i] == cdev)
122 			return i;
123 
124 	/* Allow mlxsw thermal zone binding to an external cooling device */
125 	for (i = 0; i < ARRAY_SIZE(mlxsw_thermal_external_allowed_cdev); i++) {
126 		if (strnstr(cdev->type, mlxsw_thermal_external_allowed_cdev[i],
127 			    strlen(cdev->type)))
128 			return 0;
129 	}
130 
131 	return -ENODEV;
132 }
133 
134 static void
135 mlxsw_thermal_module_trips_reset(struct mlxsw_thermal_module *tz)
136 {
137 	tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temp = 0;
138 	tz->trips[MLXSW_THERMAL_TEMP_TRIP_HIGH].temp = 0;
139 	tz->trips[MLXSW_THERMAL_TEMP_TRIP_HOT].temp = 0;
140 }
141 
142 static int
143 mlxsw_thermal_module_trips_update(struct device *dev, struct mlxsw_core *core,
144 				  struct mlxsw_thermal_module *tz,
145 				  int crit_temp, int emerg_temp)
146 {
147 	int err;
148 
149 	/* Do not try to query temperature thresholds directly from the module's
150 	 * EEPROM if we got valid thresholds from MTMP.
151 	 */
152 	if (!emerg_temp || !crit_temp) {
153 		err = mlxsw_env_module_temp_thresholds_get(core, tz->module,
154 							   SFP_TEMP_HIGH_WARN,
155 							   &crit_temp);
156 		if (err)
157 			return err;
158 
159 		err = mlxsw_env_module_temp_thresholds_get(core, tz->module,
160 							   SFP_TEMP_HIGH_ALARM,
161 							   &emerg_temp);
162 		if (err)
163 			return err;
164 	}
165 
166 	if (crit_temp > emerg_temp) {
167 		dev_warn(dev, "%s : Critical threshold %d is above emergency threshold %d\n",
168 			 tz->tzdev->type, crit_temp, emerg_temp);
169 		return 0;
170 	}
171 
172 	/* According to the system thermal requirements, the thermal zones are
173 	 * defined with three trip points. The critical and emergency
174 	 * temperature thresholds, provided by QSFP module are set as "active"
175 	 * and "hot" trip points, "normal" trip point is derived from "active"
176 	 * by subtracting double hysteresis value.
177 	 */
178 	if (crit_temp >= MLXSW_THERMAL_MODULE_TEMP_SHIFT)
179 		tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temp = crit_temp -
180 					MLXSW_THERMAL_MODULE_TEMP_SHIFT;
181 	else
182 		tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temp = crit_temp;
183 	tz->trips[MLXSW_THERMAL_TEMP_TRIP_HIGH].temp = crit_temp;
184 	tz->trips[MLXSW_THERMAL_TEMP_TRIP_HOT].temp = emerg_temp;
185 
186 	return 0;
187 }
188 
189 static void mlxsw_thermal_tz_score_update(struct mlxsw_thermal *thermal,
190 					  struct thermal_zone_device *tzdev,
191 					  struct mlxsw_thermal_trip *trips,
192 					  int temp)
193 {
194 	struct mlxsw_thermal_trip *trip = trips;
195 	unsigned int score, delta, i, shift = 1;
196 
197 	/* Calculate thermal zone score, if temperature is above the hot
198 	 * threshold score is set to MLXSW_THERMAL_TEMP_SCORE_MAX.
199 	 */
200 	score = MLXSW_THERMAL_TEMP_SCORE_MAX;
201 	for (i = MLXSW_THERMAL_TEMP_TRIP_NORM; i < MLXSW_THERMAL_NUM_TRIPS;
202 	     i++, trip++) {
203 		if (temp < trip->temp) {
204 			delta = DIV_ROUND_CLOSEST(temp, trip->temp - temp);
205 			score = delta * shift;
206 			break;
207 		}
208 		shift *= 256;
209 	}
210 
211 	if (score > thermal->tz_highest_score) {
212 		thermal->tz_highest_score = score;
213 		thermal->tz_highest_dev = tzdev;
214 	}
215 }
216 
217 static int mlxsw_thermal_bind(struct thermal_zone_device *tzdev,
218 			      struct thermal_cooling_device *cdev)
219 {
220 	struct mlxsw_thermal *thermal = tzdev->devdata;
221 	struct device *dev = thermal->bus_info->dev;
222 	int i, err;
223 
224 	/* If the cooling device is one of ours bind it */
225 	if (mlxsw_get_cooling_device_idx(thermal, cdev) < 0)
226 		return 0;
227 
228 	for (i = 0; i < MLXSW_THERMAL_NUM_TRIPS; i++) {
229 		const struct mlxsw_thermal_trip *trip = &thermal->trips[i];
230 
231 		err = thermal_zone_bind_cooling_device(tzdev, i, cdev,
232 						       trip->max_state,
233 						       trip->min_state,
234 						       THERMAL_WEIGHT_DEFAULT);
235 		if (err < 0) {
236 			dev_err(dev, "Failed to bind cooling device to trip %d\n", i);
237 			return err;
238 		}
239 	}
240 	return 0;
241 }
242 
243 static int mlxsw_thermal_unbind(struct thermal_zone_device *tzdev,
244 				struct thermal_cooling_device *cdev)
245 {
246 	struct mlxsw_thermal *thermal = tzdev->devdata;
247 	struct device *dev = thermal->bus_info->dev;
248 	int i;
249 	int err;
250 
251 	/* If the cooling device is our one unbind it */
252 	if (mlxsw_get_cooling_device_idx(thermal, cdev) < 0)
253 		return 0;
254 
255 	for (i = 0; i < MLXSW_THERMAL_NUM_TRIPS; i++) {
256 		err = thermal_zone_unbind_cooling_device(tzdev, i, cdev);
257 		if (err < 0) {
258 			dev_err(dev, "Failed to unbind cooling device\n");
259 			return err;
260 		}
261 	}
262 	return 0;
263 }
264 
265 static int mlxsw_thermal_get_temp(struct thermal_zone_device *tzdev,
266 				  int *p_temp)
267 {
268 	struct mlxsw_thermal *thermal = tzdev->devdata;
269 	struct device *dev = thermal->bus_info->dev;
270 	char mtmp_pl[MLXSW_REG_MTMP_LEN];
271 	int temp;
272 	int err;
273 
274 	mlxsw_reg_mtmp_pack(mtmp_pl, 0, false, false);
275 
276 	err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtmp), mtmp_pl);
277 	if (err) {
278 		dev_err(dev, "Failed to query temp sensor\n");
279 		return err;
280 	}
281 	mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL, NULL, NULL);
282 	if (temp > 0)
283 		mlxsw_thermal_tz_score_update(thermal, tzdev, thermal->trips,
284 					      temp);
285 
286 	*p_temp = temp;
287 	return 0;
288 }
289 
290 static int mlxsw_thermal_get_trip_type(struct thermal_zone_device *tzdev,
291 				       int trip,
292 				       enum thermal_trip_type *p_type)
293 {
294 	struct mlxsw_thermal *thermal = tzdev->devdata;
295 
296 	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
297 		return -EINVAL;
298 
299 	*p_type = thermal->trips[trip].type;
300 	return 0;
301 }
302 
303 static int mlxsw_thermal_get_trip_temp(struct thermal_zone_device *tzdev,
304 				       int trip, int *p_temp)
305 {
306 	struct mlxsw_thermal *thermal = tzdev->devdata;
307 
308 	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
309 		return -EINVAL;
310 
311 	*p_temp = thermal->trips[trip].temp;
312 	return 0;
313 }
314 
315 static int mlxsw_thermal_set_trip_temp(struct thermal_zone_device *tzdev,
316 				       int trip, int temp)
317 {
318 	struct mlxsw_thermal *thermal = tzdev->devdata;
319 
320 	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
321 		return -EINVAL;
322 
323 	thermal->trips[trip].temp = temp;
324 	return 0;
325 }
326 
327 static int mlxsw_thermal_get_trip_hyst(struct thermal_zone_device *tzdev,
328 				       int trip, int *p_hyst)
329 {
330 	struct mlxsw_thermal *thermal = tzdev->devdata;
331 
332 	*p_hyst = thermal->trips[trip].hyst;
333 	return 0;
334 }
335 
336 static int mlxsw_thermal_set_trip_hyst(struct thermal_zone_device *tzdev,
337 				       int trip, int hyst)
338 {
339 	struct mlxsw_thermal *thermal = tzdev->devdata;
340 
341 	thermal->trips[trip].hyst = hyst;
342 	return 0;
343 }
344 
345 static int mlxsw_thermal_trend_get(struct thermal_zone_device *tzdev,
346 				   int trip, enum thermal_trend *trend)
347 {
348 	struct mlxsw_thermal *thermal = tzdev->devdata;
349 
350 	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
351 		return -EINVAL;
352 
353 	if (tzdev == thermal->tz_highest_dev)
354 		return 1;
355 
356 	*trend = THERMAL_TREND_STABLE;
357 	return 0;
358 }
359 
360 static struct thermal_zone_device_ops mlxsw_thermal_ops = {
361 	.bind = mlxsw_thermal_bind,
362 	.unbind = mlxsw_thermal_unbind,
363 	.get_temp = mlxsw_thermal_get_temp,
364 	.get_trip_type	= mlxsw_thermal_get_trip_type,
365 	.get_trip_temp	= mlxsw_thermal_get_trip_temp,
366 	.set_trip_temp	= mlxsw_thermal_set_trip_temp,
367 	.get_trip_hyst	= mlxsw_thermal_get_trip_hyst,
368 	.set_trip_hyst	= mlxsw_thermal_set_trip_hyst,
369 	.get_trend	= mlxsw_thermal_trend_get,
370 };
371 
372 static int mlxsw_thermal_module_bind(struct thermal_zone_device *tzdev,
373 				     struct thermal_cooling_device *cdev)
374 {
375 	struct mlxsw_thermal_module *tz = tzdev->devdata;
376 	struct mlxsw_thermal *thermal = tz->parent;
377 	int i, j, err;
378 
379 	/* If the cooling device is one of ours bind it */
380 	if (mlxsw_get_cooling_device_idx(thermal, cdev) < 0)
381 		return 0;
382 
383 	for (i = 0; i < MLXSW_THERMAL_NUM_TRIPS; i++) {
384 		const struct mlxsw_thermal_trip *trip = &tz->trips[i];
385 
386 		err = thermal_zone_bind_cooling_device(tzdev, i, cdev,
387 						       trip->max_state,
388 						       trip->min_state,
389 						       THERMAL_WEIGHT_DEFAULT);
390 		if (err < 0)
391 			goto err_bind_cooling_device;
392 	}
393 	return 0;
394 
395 err_bind_cooling_device:
396 	for (j = i - 1; j >= 0; j--)
397 		thermal_zone_unbind_cooling_device(tzdev, j, cdev);
398 	return err;
399 }
400 
401 static int mlxsw_thermal_module_unbind(struct thermal_zone_device *tzdev,
402 				       struct thermal_cooling_device *cdev)
403 {
404 	struct mlxsw_thermal_module *tz = tzdev->devdata;
405 	struct mlxsw_thermal *thermal = tz->parent;
406 	int i;
407 	int err;
408 
409 	/* If the cooling device is one of ours unbind it */
410 	if (mlxsw_get_cooling_device_idx(thermal, cdev) < 0)
411 		return 0;
412 
413 	for (i = 0; i < MLXSW_THERMAL_NUM_TRIPS; i++) {
414 		err = thermal_zone_unbind_cooling_device(tzdev, i, cdev);
415 		WARN_ON(err);
416 	}
417 	return err;
418 }
419 
420 static void
421 mlxsw_thermal_module_temp_and_thresholds_get(struct mlxsw_core *core,
422 					     u16 sensor_index, int *p_temp,
423 					     int *p_crit_temp,
424 					     int *p_emerg_temp)
425 {
426 	char mtmp_pl[MLXSW_REG_MTMP_LEN];
427 	int err;
428 
429 	/* Read module temperature and thresholds. */
430 	mlxsw_reg_mtmp_pack(mtmp_pl, sensor_index, false, false);
431 	err = mlxsw_reg_query(core, MLXSW_REG(mtmp), mtmp_pl);
432 	if (err) {
433 		/* Set temperature and thresholds to zero to avoid passing
434 		 * uninitialized data back to the caller.
435 		 */
436 		*p_temp = 0;
437 		*p_crit_temp = 0;
438 		*p_emerg_temp = 0;
439 
440 		return;
441 	}
442 	mlxsw_reg_mtmp_unpack(mtmp_pl, p_temp, NULL, p_crit_temp, p_emerg_temp,
443 			      NULL);
444 }
445 
446 static int mlxsw_thermal_module_temp_get(struct thermal_zone_device *tzdev,
447 					 int *p_temp)
448 {
449 	struct mlxsw_thermal_module *tz = tzdev->devdata;
450 	struct mlxsw_thermal *thermal = tz->parent;
451 	int temp, crit_temp, emerg_temp;
452 	struct device *dev;
453 	u16 sensor_index;
454 	int err;
455 
456 	dev = thermal->bus_info->dev;
457 	sensor_index = MLXSW_REG_MTMP_MODULE_INDEX_MIN + tz->module;
458 
459 	/* Read module temperature and thresholds. */
460 	mlxsw_thermal_module_temp_and_thresholds_get(thermal->core,
461 						     sensor_index, &temp,
462 						     &crit_temp, &emerg_temp);
463 	*p_temp = temp;
464 
465 	if (!temp)
466 		return 0;
467 
468 	/* Update trip points. */
469 	err = mlxsw_thermal_module_trips_update(dev, thermal->core, tz,
470 						crit_temp, emerg_temp);
471 	if (!err && temp > 0)
472 		mlxsw_thermal_tz_score_update(thermal, tzdev, tz->trips, temp);
473 
474 	return 0;
475 }
476 
477 static int
478 mlxsw_thermal_module_trip_type_get(struct thermal_zone_device *tzdev, int trip,
479 				   enum thermal_trip_type *p_type)
480 {
481 	struct mlxsw_thermal_module *tz = tzdev->devdata;
482 
483 	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
484 		return -EINVAL;
485 
486 	*p_type = tz->trips[trip].type;
487 	return 0;
488 }
489 
490 static int
491 mlxsw_thermal_module_trip_temp_get(struct thermal_zone_device *tzdev,
492 				   int trip, int *p_temp)
493 {
494 	struct mlxsw_thermal_module *tz = tzdev->devdata;
495 
496 	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
497 		return -EINVAL;
498 
499 	*p_temp = tz->trips[trip].temp;
500 	return 0;
501 }
502 
503 static int
504 mlxsw_thermal_module_trip_temp_set(struct thermal_zone_device *tzdev,
505 				   int trip, int temp)
506 {
507 	struct mlxsw_thermal_module *tz = tzdev->devdata;
508 
509 	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
510 		return -EINVAL;
511 
512 	tz->trips[trip].temp = temp;
513 	return 0;
514 }
515 
516 static int
517 mlxsw_thermal_module_trip_hyst_get(struct thermal_zone_device *tzdev, int trip,
518 				   int *p_hyst)
519 {
520 	struct mlxsw_thermal_module *tz = tzdev->devdata;
521 
522 	*p_hyst = tz->trips[trip].hyst;
523 	return 0;
524 }
525 
526 static int
527 mlxsw_thermal_module_trip_hyst_set(struct thermal_zone_device *tzdev, int trip,
528 				   int hyst)
529 {
530 	struct mlxsw_thermal_module *tz = tzdev->devdata;
531 
532 	tz->trips[trip].hyst = hyst;
533 	return 0;
534 }
535 
536 static int mlxsw_thermal_module_trend_get(struct thermal_zone_device *tzdev,
537 					  int trip, enum thermal_trend *trend)
538 {
539 	struct mlxsw_thermal_module *tz = tzdev->devdata;
540 	struct mlxsw_thermal *thermal = tz->parent;
541 
542 	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
543 		return -EINVAL;
544 
545 	if (tzdev == thermal->tz_highest_dev)
546 		return 1;
547 
548 	*trend = THERMAL_TREND_STABLE;
549 	return 0;
550 }
551 
552 static struct thermal_zone_device_ops mlxsw_thermal_module_ops = {
553 	.bind		= mlxsw_thermal_module_bind,
554 	.unbind		= mlxsw_thermal_module_unbind,
555 	.get_temp	= mlxsw_thermal_module_temp_get,
556 	.get_trip_type	= mlxsw_thermal_module_trip_type_get,
557 	.get_trip_temp	= mlxsw_thermal_module_trip_temp_get,
558 	.set_trip_temp	= mlxsw_thermal_module_trip_temp_set,
559 	.get_trip_hyst	= mlxsw_thermal_module_trip_hyst_get,
560 	.set_trip_hyst	= mlxsw_thermal_module_trip_hyst_set,
561 	.get_trend	= mlxsw_thermal_module_trend_get,
562 };
563 
564 static int mlxsw_thermal_gearbox_temp_get(struct thermal_zone_device *tzdev,
565 					  int *p_temp)
566 {
567 	struct mlxsw_thermal_module *tz = tzdev->devdata;
568 	struct mlxsw_thermal *thermal = tz->parent;
569 	char mtmp_pl[MLXSW_REG_MTMP_LEN];
570 	u16 index;
571 	int temp;
572 	int err;
573 
574 	index = MLXSW_REG_MTMP_GBOX_INDEX_MIN + tz->module;
575 	mlxsw_reg_mtmp_pack(mtmp_pl, index, false, false);
576 
577 	err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtmp), mtmp_pl);
578 	if (err)
579 		return err;
580 
581 	mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL, NULL, NULL);
582 	if (temp > 0)
583 		mlxsw_thermal_tz_score_update(thermal, tzdev, tz->trips, temp);
584 
585 	*p_temp = temp;
586 	return 0;
587 }
588 
589 static struct thermal_zone_device_ops mlxsw_thermal_gearbox_ops = {
590 	.bind		= mlxsw_thermal_module_bind,
591 	.unbind		= mlxsw_thermal_module_unbind,
592 	.get_temp	= mlxsw_thermal_gearbox_temp_get,
593 	.get_trip_type	= mlxsw_thermal_module_trip_type_get,
594 	.get_trip_temp	= mlxsw_thermal_module_trip_temp_get,
595 	.set_trip_temp	= mlxsw_thermal_module_trip_temp_set,
596 	.get_trip_hyst	= mlxsw_thermal_module_trip_hyst_get,
597 	.set_trip_hyst	= mlxsw_thermal_module_trip_hyst_set,
598 	.get_trend	= mlxsw_thermal_module_trend_get,
599 };
600 
601 static int mlxsw_thermal_get_max_state(struct thermal_cooling_device *cdev,
602 				       unsigned long *p_state)
603 {
604 	*p_state = MLXSW_THERMAL_MAX_STATE;
605 	return 0;
606 }
607 
608 static int mlxsw_thermal_get_cur_state(struct thermal_cooling_device *cdev,
609 				       unsigned long *p_state)
610 
611 {
612 	struct mlxsw_thermal *thermal = cdev->devdata;
613 	struct device *dev = thermal->bus_info->dev;
614 	char mfsc_pl[MLXSW_REG_MFSC_LEN];
615 	int err, idx;
616 	u8 duty;
617 
618 	idx = mlxsw_get_cooling_device_idx(thermal, cdev);
619 	if (idx < 0)
620 		return idx;
621 
622 	mlxsw_reg_mfsc_pack(mfsc_pl, idx, 0);
623 	err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfsc), mfsc_pl);
624 	if (err) {
625 		dev_err(dev, "Failed to query PWM duty\n");
626 		return err;
627 	}
628 
629 	duty = mlxsw_reg_mfsc_pwm_duty_cycle_get(mfsc_pl);
630 	*p_state = mlxsw_duty_to_state(duty);
631 	return 0;
632 }
633 
634 static int mlxsw_thermal_set_cur_state(struct thermal_cooling_device *cdev,
635 				       unsigned long state)
636 
637 {
638 	struct mlxsw_thermal *thermal = cdev->devdata;
639 	struct device *dev = thermal->bus_info->dev;
640 	char mfsc_pl[MLXSW_REG_MFSC_LEN];
641 	int idx;
642 	int err;
643 
644 	if (state > MLXSW_THERMAL_MAX_STATE)
645 		return -EINVAL;
646 
647 	idx = mlxsw_get_cooling_device_idx(thermal, cdev);
648 	if (idx < 0)
649 		return idx;
650 
651 	/* Normalize the state to the valid speed range. */
652 	state = thermal->cooling_levels[state];
653 	mlxsw_reg_mfsc_pack(mfsc_pl, idx, mlxsw_state_to_duty(state));
654 	err = mlxsw_reg_write(thermal->core, MLXSW_REG(mfsc), mfsc_pl);
655 	if (err) {
656 		dev_err(dev, "Failed to write PWM duty\n");
657 		return err;
658 	}
659 	return 0;
660 }
661 
662 static const struct thermal_cooling_device_ops mlxsw_cooling_ops = {
663 	.get_max_state	= mlxsw_thermal_get_max_state,
664 	.get_cur_state	= mlxsw_thermal_get_cur_state,
665 	.set_cur_state	= mlxsw_thermal_set_cur_state,
666 };
667 
668 static int
669 mlxsw_thermal_module_tz_init(struct mlxsw_thermal_module *module_tz)
670 {
671 	char tz_name[MLXSW_THERMAL_ZONE_MAX_NAME];
672 	int err;
673 
674 	snprintf(tz_name, sizeof(tz_name), "mlxsw-module%d",
675 		 module_tz->module + 1);
676 	module_tz->tzdev = thermal_zone_device_register(tz_name,
677 							MLXSW_THERMAL_NUM_TRIPS,
678 							MLXSW_THERMAL_TRIP_MASK,
679 							module_tz,
680 							&mlxsw_thermal_module_ops,
681 							NULL, 0,
682 							module_tz->parent->polling_delay);
683 	if (IS_ERR(module_tz->tzdev)) {
684 		err = PTR_ERR(module_tz->tzdev);
685 		return err;
686 	}
687 
688 	err = thermal_zone_device_enable(module_tz->tzdev);
689 	if (err)
690 		thermal_zone_device_unregister(module_tz->tzdev);
691 
692 	return err;
693 }
694 
695 static void mlxsw_thermal_module_tz_fini(struct thermal_zone_device *tzdev)
696 {
697 	thermal_zone_device_unregister(tzdev);
698 }
699 
700 static int
701 mlxsw_thermal_module_init(struct device *dev, struct mlxsw_core *core,
702 			  struct mlxsw_thermal *thermal, u8 module)
703 {
704 	struct mlxsw_thermal_module *module_tz;
705 	int dummy_temp, crit_temp, emerg_temp;
706 	u16 sensor_index;
707 
708 	sensor_index = MLXSW_REG_MTMP_MODULE_INDEX_MIN + module;
709 	module_tz = &thermal->tz_module_arr[module];
710 	/* Skip if parent is already set (case of port split). */
711 	if (module_tz->parent)
712 		return 0;
713 	module_tz->module = module;
714 	module_tz->parent = thermal;
715 	memcpy(module_tz->trips, default_thermal_trips,
716 	       sizeof(thermal->trips));
717 	/* Initialize all trip point. */
718 	mlxsw_thermal_module_trips_reset(module_tz);
719 	/* Read module temperature and thresholds. */
720 	mlxsw_thermal_module_temp_and_thresholds_get(core, sensor_index, &dummy_temp,
721 						     &crit_temp, &emerg_temp);
722 	/* Update trip point according to the module data. */
723 	return mlxsw_thermal_module_trips_update(dev, core, module_tz,
724 						 crit_temp, emerg_temp);
725 }
726 
727 static void mlxsw_thermal_module_fini(struct mlxsw_thermal_module *module_tz)
728 {
729 	if (module_tz && module_tz->tzdev) {
730 		mlxsw_thermal_module_tz_fini(module_tz->tzdev);
731 		module_tz->tzdev = NULL;
732 		module_tz->parent = NULL;
733 	}
734 }
735 
736 static int
737 mlxsw_thermal_modules_init(struct device *dev, struct mlxsw_core *core,
738 			   struct mlxsw_thermal *thermal)
739 {
740 	struct mlxsw_thermal_module *module_tz;
741 	char mgpir_pl[MLXSW_REG_MGPIR_LEN];
742 	int i, err;
743 
744 	if (!mlxsw_core_res_query_enabled(core))
745 		return 0;
746 
747 	mlxsw_reg_mgpir_pack(mgpir_pl);
748 	err = mlxsw_reg_query(core, MLXSW_REG(mgpir), mgpir_pl);
749 	if (err)
750 		return err;
751 
752 	mlxsw_reg_mgpir_unpack(mgpir_pl, NULL, NULL, NULL,
753 			       &thermal->tz_module_num);
754 
755 	thermal->tz_module_arr = kcalloc(thermal->tz_module_num,
756 					 sizeof(*thermal->tz_module_arr),
757 					 GFP_KERNEL);
758 	if (!thermal->tz_module_arr)
759 		return -ENOMEM;
760 
761 	for (i = 0; i < thermal->tz_module_num; i++) {
762 		err = mlxsw_thermal_module_init(dev, core, thermal, i);
763 		if (err)
764 			goto err_unreg_tz_module_arr;
765 	}
766 
767 	for (i = 0; i < thermal->tz_module_num; i++) {
768 		module_tz = &thermal->tz_module_arr[i];
769 		if (!module_tz->parent)
770 			continue;
771 		err = mlxsw_thermal_module_tz_init(module_tz);
772 		if (err)
773 			goto err_unreg_tz_module_arr;
774 	}
775 
776 	return 0;
777 
778 err_unreg_tz_module_arr:
779 	for (i = thermal->tz_module_num - 1; i >= 0; i--)
780 		mlxsw_thermal_module_fini(&thermal->tz_module_arr[i]);
781 	kfree(thermal->tz_module_arr);
782 	return err;
783 }
784 
785 static void
786 mlxsw_thermal_modules_fini(struct mlxsw_thermal *thermal)
787 {
788 	int i;
789 
790 	if (!mlxsw_core_res_query_enabled(thermal->core))
791 		return;
792 
793 	for (i = thermal->tz_module_num - 1; i >= 0; i--)
794 		mlxsw_thermal_module_fini(&thermal->tz_module_arr[i]);
795 	kfree(thermal->tz_module_arr);
796 }
797 
798 static int
799 mlxsw_thermal_gearbox_tz_init(struct mlxsw_thermal_module *gearbox_tz)
800 {
801 	char tz_name[MLXSW_THERMAL_ZONE_MAX_NAME];
802 	int ret;
803 
804 	snprintf(tz_name, sizeof(tz_name), "mlxsw-gearbox%d",
805 		 gearbox_tz->module + 1);
806 	gearbox_tz->tzdev = thermal_zone_device_register(tz_name,
807 						MLXSW_THERMAL_NUM_TRIPS,
808 						MLXSW_THERMAL_TRIP_MASK,
809 						gearbox_tz,
810 						&mlxsw_thermal_gearbox_ops,
811 						NULL, 0,
812 						gearbox_tz->parent->polling_delay);
813 	if (IS_ERR(gearbox_tz->tzdev))
814 		return PTR_ERR(gearbox_tz->tzdev);
815 
816 	ret = thermal_zone_device_enable(gearbox_tz->tzdev);
817 	if (ret)
818 		thermal_zone_device_unregister(gearbox_tz->tzdev);
819 
820 	return ret;
821 }
822 
823 static void
824 mlxsw_thermal_gearbox_tz_fini(struct mlxsw_thermal_module *gearbox_tz)
825 {
826 	thermal_zone_device_unregister(gearbox_tz->tzdev);
827 }
828 
829 static int
830 mlxsw_thermal_gearboxes_init(struct device *dev, struct mlxsw_core *core,
831 			     struct mlxsw_thermal *thermal)
832 {
833 	enum mlxsw_reg_mgpir_device_type device_type;
834 	struct mlxsw_thermal_module *gearbox_tz;
835 	char mgpir_pl[MLXSW_REG_MGPIR_LEN];
836 	u8 gbox_num;
837 	int i;
838 	int err;
839 
840 	if (!mlxsw_core_res_query_enabled(core))
841 		return 0;
842 
843 	mlxsw_reg_mgpir_pack(mgpir_pl);
844 	err = mlxsw_reg_query(core, MLXSW_REG(mgpir), mgpir_pl);
845 	if (err)
846 		return err;
847 
848 	mlxsw_reg_mgpir_unpack(mgpir_pl, &gbox_num, &device_type, NULL,
849 			       NULL);
850 	if (device_type != MLXSW_REG_MGPIR_DEVICE_TYPE_GEARBOX_DIE ||
851 	    !gbox_num)
852 		return 0;
853 
854 	thermal->tz_gearbox_num = gbox_num;
855 	thermal->tz_gearbox_arr = kcalloc(thermal->tz_gearbox_num,
856 					  sizeof(*thermal->tz_gearbox_arr),
857 					  GFP_KERNEL);
858 	if (!thermal->tz_gearbox_arr)
859 		return -ENOMEM;
860 
861 	for (i = 0; i < thermal->tz_gearbox_num; i++) {
862 		gearbox_tz = &thermal->tz_gearbox_arr[i];
863 		memcpy(gearbox_tz->trips, default_thermal_trips,
864 		       sizeof(thermal->trips));
865 		gearbox_tz->module = i;
866 		gearbox_tz->parent = thermal;
867 		err = mlxsw_thermal_gearbox_tz_init(gearbox_tz);
868 		if (err)
869 			goto err_unreg_tz_gearbox;
870 	}
871 
872 	return 0;
873 
874 err_unreg_tz_gearbox:
875 	for (i--; i >= 0; i--)
876 		mlxsw_thermal_gearbox_tz_fini(&thermal->tz_gearbox_arr[i]);
877 	kfree(thermal->tz_gearbox_arr);
878 	return err;
879 }
880 
881 static void
882 mlxsw_thermal_gearboxes_fini(struct mlxsw_thermal *thermal)
883 {
884 	int i;
885 
886 	if (!mlxsw_core_res_query_enabled(thermal->core))
887 		return;
888 
889 	for (i = thermal->tz_gearbox_num - 1; i >= 0; i--)
890 		mlxsw_thermal_gearbox_tz_fini(&thermal->tz_gearbox_arr[i]);
891 	kfree(thermal->tz_gearbox_arr);
892 }
893 
894 int mlxsw_thermal_init(struct mlxsw_core *core,
895 		       const struct mlxsw_bus_info *bus_info,
896 		       struct mlxsw_thermal **p_thermal)
897 {
898 	char mfcr_pl[MLXSW_REG_MFCR_LEN] = { 0 };
899 	enum mlxsw_reg_mfcr_pwm_frequency freq;
900 	struct device *dev = bus_info->dev;
901 	struct mlxsw_thermal *thermal;
902 	u16 tacho_active;
903 	u8 pwm_active;
904 	int err, i;
905 
906 	thermal = devm_kzalloc(dev, sizeof(*thermal),
907 			       GFP_KERNEL);
908 	if (!thermal)
909 		return -ENOMEM;
910 
911 	thermal->core = core;
912 	thermal->bus_info = bus_info;
913 	memcpy(thermal->trips, default_thermal_trips, sizeof(thermal->trips));
914 
915 	err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfcr), mfcr_pl);
916 	if (err) {
917 		dev_err(dev, "Failed to probe PWMs\n");
918 		goto err_free_thermal;
919 	}
920 	mlxsw_reg_mfcr_unpack(mfcr_pl, &freq, &tacho_active, &pwm_active);
921 
922 	for (i = 0; i < MLXSW_MFCR_TACHOS_MAX; i++) {
923 		if (tacho_active & BIT(i)) {
924 			char mfsl_pl[MLXSW_REG_MFSL_LEN];
925 
926 			mlxsw_reg_mfsl_pack(mfsl_pl, i, 0, 0);
927 
928 			/* We need to query the register to preserve maximum */
929 			err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfsl),
930 					      mfsl_pl);
931 			if (err)
932 				goto err_free_thermal;
933 
934 			/* set the minimal RPMs to 0 */
935 			mlxsw_reg_mfsl_tach_min_set(mfsl_pl, 0);
936 			err = mlxsw_reg_write(thermal->core, MLXSW_REG(mfsl),
937 					      mfsl_pl);
938 			if (err)
939 				goto err_free_thermal;
940 		}
941 	}
942 	for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++) {
943 		if (pwm_active & BIT(i)) {
944 			struct thermal_cooling_device *cdev;
945 
946 			cdev = thermal_cooling_device_register("mlxsw_fan",
947 							       thermal,
948 							       &mlxsw_cooling_ops);
949 			if (IS_ERR(cdev)) {
950 				err = PTR_ERR(cdev);
951 				dev_err(dev, "Failed to register cooling device\n");
952 				goto err_unreg_cdevs;
953 			}
954 			thermal->cdevs[i] = cdev;
955 		}
956 	}
957 
958 	/* Initialize cooling levels per PWM state. */
959 	for (i = 0; i < MLXSW_THERMAL_MAX_STATE; i++)
960 		thermal->cooling_levels[i] = max(MLXSW_THERMAL_MIN_STATE, i);
961 
962 	thermal->polling_delay = bus_info->low_frequency ?
963 				 MLXSW_THERMAL_SLOW_POLL_INT :
964 				 MLXSW_THERMAL_POLL_INT;
965 
966 	thermal->tzdev = thermal_zone_device_register("mlxsw",
967 						      MLXSW_THERMAL_NUM_TRIPS,
968 						      MLXSW_THERMAL_TRIP_MASK,
969 						      thermal,
970 						      &mlxsw_thermal_ops,
971 						      NULL, 0,
972 						      thermal->polling_delay);
973 	if (IS_ERR(thermal->tzdev)) {
974 		err = PTR_ERR(thermal->tzdev);
975 		dev_err(dev, "Failed to register thermal zone\n");
976 		goto err_unreg_cdevs;
977 	}
978 
979 	err = mlxsw_thermal_modules_init(dev, core, thermal);
980 	if (err)
981 		goto err_unreg_tzdev;
982 
983 	err = mlxsw_thermal_gearboxes_init(dev, core, thermal);
984 	if (err)
985 		goto err_unreg_modules_tzdev;
986 
987 	err = thermal_zone_device_enable(thermal->tzdev);
988 	if (err)
989 		goto err_unreg_gearboxes;
990 
991 	*p_thermal = thermal;
992 	return 0;
993 
994 err_unreg_gearboxes:
995 	mlxsw_thermal_gearboxes_fini(thermal);
996 err_unreg_modules_tzdev:
997 	mlxsw_thermal_modules_fini(thermal);
998 err_unreg_tzdev:
999 	if (thermal->tzdev) {
1000 		thermal_zone_device_unregister(thermal->tzdev);
1001 		thermal->tzdev = NULL;
1002 	}
1003 err_unreg_cdevs:
1004 	for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++)
1005 		if (thermal->cdevs[i])
1006 			thermal_cooling_device_unregister(thermal->cdevs[i]);
1007 err_free_thermal:
1008 	devm_kfree(dev, thermal);
1009 	return err;
1010 }
1011 
1012 void mlxsw_thermal_fini(struct mlxsw_thermal *thermal)
1013 {
1014 	int i;
1015 
1016 	mlxsw_thermal_gearboxes_fini(thermal);
1017 	mlxsw_thermal_modules_fini(thermal);
1018 	if (thermal->tzdev) {
1019 		thermal_zone_device_unregister(thermal->tzdev);
1020 		thermal->tzdev = NULL;
1021 	}
1022 
1023 	for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++) {
1024 		if (thermal->cdevs[i]) {
1025 			thermal_cooling_device_unregister(thermal->cdevs[i]);
1026 			thermal->cdevs[i] = NULL;
1027 		}
1028 	}
1029 
1030 	devm_kfree(thermal->bus_info->dev, thermal);
1031 }
1032