1 // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
2 /* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved
3  * Copyright (c) 2016 Ivan Vecera <cera@cera.cz>
4  */
5 
6 #include <linux/kernel.h>
7 #include <linux/types.h>
8 #include <linux/device.h>
9 #include <linux/sysfs.h>
10 #include <linux/thermal.h>
11 #include <linux/err.h>
12 #include <linux/sfp.h>
13 
14 #include "core.h"
15 #include "core_env.h"
16 
17 #define MLXSW_THERMAL_POLL_INT	1000	/* ms */
18 #define MLXSW_THERMAL_SLOW_POLL_INT	20000	/* ms */
19 #define MLXSW_THERMAL_ASIC_TEMP_NORM	75000	/* 75C */
20 #define MLXSW_THERMAL_ASIC_TEMP_HIGH	85000	/* 85C */
21 #define MLXSW_THERMAL_ASIC_TEMP_HOT	105000	/* 105C */
22 #define MLXSW_THERMAL_ASIC_TEMP_CRIT	110000	/* 110C */
23 #define MLXSW_THERMAL_HYSTERESIS_TEMP	5000	/* 5C */
24 #define MLXSW_THERMAL_MODULE_TEMP_SHIFT	(MLXSW_THERMAL_HYSTERESIS_TEMP * 2)
25 #define MLXSW_THERMAL_ZONE_MAX_NAME	16
26 #define MLXSW_THERMAL_TEMP_SCORE_MAX	GENMASK(31, 0)
27 #define MLXSW_THERMAL_MAX_STATE	10
28 #define MLXSW_THERMAL_MAX_DUTY	255
29 /* Minimum and maximum fan allowed speed in percent: from 20% to 100%. Values
30  * MLXSW_THERMAL_MAX_STATE + x, where x is between 2 and 10 are used for
31  * setting fan speed dynamic minimum. For example, if value is set to 14 (40%)
32  * cooling levels vector will be set to 4, 4, 4, 4, 4, 5, 6, 7, 8, 9, 10 to
33  * introduce PWM speed in percent: 40, 40, 40, 40, 40, 50, 60. 70, 80, 90, 100.
34  */
35 #define MLXSW_THERMAL_SPEED_MIN		(MLXSW_THERMAL_MAX_STATE + 2)
36 #define MLXSW_THERMAL_SPEED_MAX		(MLXSW_THERMAL_MAX_STATE * 2)
37 #define MLXSW_THERMAL_SPEED_MIN_LEVEL	2		/* 20% */
38 
39 /* External cooling devices, allowed for binding to mlxsw thermal zones. */
40 static char * const mlxsw_thermal_external_allowed_cdev[] = {
41 	"mlxreg_fan",
42 };
43 
44 enum mlxsw_thermal_trips {
45 	MLXSW_THERMAL_TEMP_TRIP_NORM,
46 	MLXSW_THERMAL_TEMP_TRIP_HIGH,
47 	MLXSW_THERMAL_TEMP_TRIP_HOT,
48 	MLXSW_THERMAL_TEMP_TRIP_CRIT,
49 };
50 
51 struct mlxsw_thermal_trip {
52 	int	type;
53 	int	temp;
54 	int	hyst;
55 	int	min_state;
56 	int	max_state;
57 };
58 
59 static const struct mlxsw_thermal_trip default_thermal_trips[] = {
60 	{	/* In range - 0-40% PWM */
61 		.type		= THERMAL_TRIP_ACTIVE,
62 		.temp		= MLXSW_THERMAL_ASIC_TEMP_NORM,
63 		.hyst		= MLXSW_THERMAL_HYSTERESIS_TEMP,
64 		.min_state	= 0,
65 		.max_state	= (4 * MLXSW_THERMAL_MAX_STATE) / 10,
66 	},
67 	{
68 		/* In range - 40-100% PWM */
69 		.type		= THERMAL_TRIP_ACTIVE,
70 		.temp		= MLXSW_THERMAL_ASIC_TEMP_HIGH,
71 		.hyst		= MLXSW_THERMAL_HYSTERESIS_TEMP,
72 		.min_state	= (4 * MLXSW_THERMAL_MAX_STATE) / 10,
73 		.max_state	= MLXSW_THERMAL_MAX_STATE,
74 	},
75 	{	/* Warning */
76 		.type		= THERMAL_TRIP_HOT,
77 		.temp		= MLXSW_THERMAL_ASIC_TEMP_HOT,
78 		.hyst		= MLXSW_THERMAL_HYSTERESIS_TEMP,
79 		.min_state	= MLXSW_THERMAL_MAX_STATE,
80 		.max_state	= MLXSW_THERMAL_MAX_STATE,
81 	},
82 	{	/* Critical - soft poweroff */
83 		.type		= THERMAL_TRIP_CRITICAL,
84 		.temp		= MLXSW_THERMAL_ASIC_TEMP_CRIT,
85 		.min_state	= MLXSW_THERMAL_MAX_STATE,
86 		.max_state	= MLXSW_THERMAL_MAX_STATE,
87 	}
88 };
89 
90 #define MLXSW_THERMAL_NUM_TRIPS	ARRAY_SIZE(default_thermal_trips)
91 
92 /* Make sure all trips are writable */
93 #define MLXSW_THERMAL_TRIP_MASK	(BIT(MLXSW_THERMAL_NUM_TRIPS) - 1)
94 
95 struct mlxsw_thermal;
96 
97 struct mlxsw_thermal_module {
98 	struct mlxsw_thermal *parent;
99 	struct thermal_zone_device *tzdev;
100 	struct mlxsw_thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS];
101 	int module; /* Module or gearbox number */
102 };
103 
104 struct mlxsw_thermal {
105 	struct mlxsw_core *core;
106 	const struct mlxsw_bus_info *bus_info;
107 	struct thermal_zone_device *tzdev;
108 	int polling_delay;
109 	struct thermal_cooling_device *cdevs[MLXSW_MFCR_PWMS_MAX];
110 	u8 cooling_levels[MLXSW_THERMAL_MAX_STATE + 1];
111 	struct mlxsw_thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS];
112 	struct mlxsw_thermal_module *tz_module_arr;
113 	u8 tz_module_num;
114 	struct mlxsw_thermal_module *tz_gearbox_arr;
115 	u8 tz_gearbox_num;
116 	unsigned int tz_highest_score;
117 	struct thermal_zone_device *tz_highest_dev;
118 };
119 
120 static inline u8 mlxsw_state_to_duty(int state)
121 {
122 	return DIV_ROUND_CLOSEST(state * MLXSW_THERMAL_MAX_DUTY,
123 				 MLXSW_THERMAL_MAX_STATE);
124 }
125 
126 static inline int mlxsw_duty_to_state(u8 duty)
127 {
128 	return DIV_ROUND_CLOSEST(duty * MLXSW_THERMAL_MAX_STATE,
129 				 MLXSW_THERMAL_MAX_DUTY);
130 }
131 
132 static int mlxsw_get_cooling_device_idx(struct mlxsw_thermal *thermal,
133 					struct thermal_cooling_device *cdev)
134 {
135 	int i;
136 
137 	for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++)
138 		if (thermal->cdevs[i] == cdev)
139 			return i;
140 
141 	/* Allow mlxsw thermal zone binding to an external cooling device */
142 	for (i = 0; i < ARRAY_SIZE(mlxsw_thermal_external_allowed_cdev); i++) {
143 		if (strnstr(cdev->type, mlxsw_thermal_external_allowed_cdev[i],
144 			    sizeof(cdev->type)))
145 			return 0;
146 	}
147 
148 	return -ENODEV;
149 }
150 
151 static void
152 mlxsw_thermal_module_trips_reset(struct mlxsw_thermal_module *tz)
153 {
154 	tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temp = 0;
155 	tz->trips[MLXSW_THERMAL_TEMP_TRIP_HIGH].temp = 0;
156 	tz->trips[MLXSW_THERMAL_TEMP_TRIP_HOT].temp = 0;
157 	tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp = 0;
158 }
159 
160 static int
161 mlxsw_thermal_module_trips_update(struct device *dev, struct mlxsw_core *core,
162 				  struct mlxsw_thermal_module *tz)
163 {
164 	int crit_temp, emerg_temp;
165 	int err;
166 
167 	err = mlxsw_env_module_temp_thresholds_get(core, tz->module,
168 						   SFP_TEMP_HIGH_WARN,
169 						   &crit_temp);
170 	if (err)
171 		return err;
172 
173 	err = mlxsw_env_module_temp_thresholds_get(core, tz->module,
174 						   SFP_TEMP_HIGH_ALARM,
175 						   &emerg_temp);
176 	if (err)
177 		return err;
178 
179 	/* According to the system thermal requirements, the thermal zones are
180 	 * defined with four trip points. The critical and emergency
181 	 * temperature thresholds, provided by QSFP module are set as "active"
182 	 * and "hot" trip points, "normal" and "critical" trip points are
183 	 * derived from "active" and "hot" by subtracting or adding double
184 	 * hysteresis value.
185 	 */
186 	if (crit_temp >= MLXSW_THERMAL_MODULE_TEMP_SHIFT)
187 		tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temp = crit_temp -
188 					MLXSW_THERMAL_MODULE_TEMP_SHIFT;
189 	else
190 		tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temp = crit_temp;
191 	tz->trips[MLXSW_THERMAL_TEMP_TRIP_HIGH].temp = crit_temp;
192 	tz->trips[MLXSW_THERMAL_TEMP_TRIP_HOT].temp = emerg_temp;
193 	if (emerg_temp > crit_temp)
194 		tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp = emerg_temp +
195 					MLXSW_THERMAL_MODULE_TEMP_SHIFT;
196 	else
197 		tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp = emerg_temp;
198 
199 	return 0;
200 }
201 
202 static void mlxsw_thermal_tz_score_update(struct mlxsw_thermal *thermal,
203 					  struct thermal_zone_device *tzdev,
204 					  struct mlxsw_thermal_trip *trips,
205 					  int temp)
206 {
207 	struct mlxsw_thermal_trip *trip = trips;
208 	unsigned int score, delta, i, shift = 1;
209 
210 	/* Calculate thermal zone score, if temperature is above the critical
211 	 * threshold score is set to MLXSW_THERMAL_TEMP_SCORE_MAX.
212 	 */
213 	score = MLXSW_THERMAL_TEMP_SCORE_MAX;
214 	for (i = MLXSW_THERMAL_TEMP_TRIP_NORM; i < MLXSW_THERMAL_NUM_TRIPS;
215 	     i++, trip++) {
216 		if (temp < trip->temp) {
217 			delta = DIV_ROUND_CLOSEST(temp, trip->temp - temp);
218 			score = delta * shift;
219 			break;
220 		}
221 		shift *= 256;
222 	}
223 
224 	if (score > thermal->tz_highest_score) {
225 		thermal->tz_highest_score = score;
226 		thermal->tz_highest_dev = tzdev;
227 	}
228 }
229 
230 static int mlxsw_thermal_bind(struct thermal_zone_device *tzdev,
231 			      struct thermal_cooling_device *cdev)
232 {
233 	struct mlxsw_thermal *thermal = tzdev->devdata;
234 	struct device *dev = thermal->bus_info->dev;
235 	int i, err;
236 
237 	/* If the cooling device is one of ours bind it */
238 	if (mlxsw_get_cooling_device_idx(thermal, cdev) < 0)
239 		return 0;
240 
241 	for (i = 0; i < MLXSW_THERMAL_NUM_TRIPS; i++) {
242 		const struct mlxsw_thermal_trip *trip = &thermal->trips[i];
243 
244 		err = thermal_zone_bind_cooling_device(tzdev, i, cdev,
245 						       trip->max_state,
246 						       trip->min_state,
247 						       THERMAL_WEIGHT_DEFAULT);
248 		if (err < 0) {
249 			dev_err(dev, "Failed to bind cooling device to trip %d\n", i);
250 			return err;
251 		}
252 	}
253 	return 0;
254 }
255 
256 static int mlxsw_thermal_unbind(struct thermal_zone_device *tzdev,
257 				struct thermal_cooling_device *cdev)
258 {
259 	struct mlxsw_thermal *thermal = tzdev->devdata;
260 	struct device *dev = thermal->bus_info->dev;
261 	int i;
262 	int err;
263 
264 	/* If the cooling device is our one unbind it */
265 	if (mlxsw_get_cooling_device_idx(thermal, cdev) < 0)
266 		return 0;
267 
268 	for (i = 0; i < MLXSW_THERMAL_NUM_TRIPS; i++) {
269 		err = thermal_zone_unbind_cooling_device(tzdev, i, cdev);
270 		if (err < 0) {
271 			dev_err(dev, "Failed to unbind cooling device\n");
272 			return err;
273 		}
274 	}
275 	return 0;
276 }
277 
278 static int mlxsw_thermal_get_temp(struct thermal_zone_device *tzdev,
279 				  int *p_temp)
280 {
281 	struct mlxsw_thermal *thermal = tzdev->devdata;
282 	struct device *dev = thermal->bus_info->dev;
283 	char mtmp_pl[MLXSW_REG_MTMP_LEN];
284 	int temp;
285 	int err;
286 
287 	mlxsw_reg_mtmp_pack(mtmp_pl, 0, false, false);
288 
289 	err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtmp), mtmp_pl);
290 	if (err) {
291 		dev_err(dev, "Failed to query temp sensor\n");
292 		return err;
293 	}
294 	mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
295 	if (temp > 0)
296 		mlxsw_thermal_tz_score_update(thermal, tzdev, thermal->trips,
297 					      temp);
298 
299 	*p_temp = temp;
300 	return 0;
301 }
302 
303 static int mlxsw_thermal_get_trip_type(struct thermal_zone_device *tzdev,
304 				       int trip,
305 				       enum thermal_trip_type *p_type)
306 {
307 	struct mlxsw_thermal *thermal = tzdev->devdata;
308 
309 	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
310 		return -EINVAL;
311 
312 	*p_type = thermal->trips[trip].type;
313 	return 0;
314 }
315 
316 static int mlxsw_thermal_get_trip_temp(struct thermal_zone_device *tzdev,
317 				       int trip, int *p_temp)
318 {
319 	struct mlxsw_thermal *thermal = tzdev->devdata;
320 
321 	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
322 		return -EINVAL;
323 
324 	*p_temp = thermal->trips[trip].temp;
325 	return 0;
326 }
327 
328 static int mlxsw_thermal_set_trip_temp(struct thermal_zone_device *tzdev,
329 				       int trip, int temp)
330 {
331 	struct mlxsw_thermal *thermal = tzdev->devdata;
332 
333 	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS ||
334 	    temp > MLXSW_THERMAL_ASIC_TEMP_CRIT)
335 		return -EINVAL;
336 
337 	thermal->trips[trip].temp = temp;
338 	return 0;
339 }
340 
341 static int mlxsw_thermal_get_trip_hyst(struct thermal_zone_device *tzdev,
342 				       int trip, int *p_hyst)
343 {
344 	struct mlxsw_thermal *thermal = tzdev->devdata;
345 
346 	*p_hyst = thermal->trips[trip].hyst;
347 	return 0;
348 }
349 
350 static int mlxsw_thermal_set_trip_hyst(struct thermal_zone_device *tzdev,
351 				       int trip, int hyst)
352 {
353 	struct mlxsw_thermal *thermal = tzdev->devdata;
354 
355 	thermal->trips[trip].hyst = hyst;
356 	return 0;
357 }
358 
359 static int mlxsw_thermal_trend_get(struct thermal_zone_device *tzdev,
360 				   int trip, enum thermal_trend *trend)
361 {
362 	struct mlxsw_thermal *thermal = tzdev->devdata;
363 
364 	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
365 		return -EINVAL;
366 
367 	if (tzdev == thermal->tz_highest_dev)
368 		return 1;
369 
370 	*trend = THERMAL_TREND_STABLE;
371 	return 0;
372 }
373 
374 static struct thermal_zone_device_ops mlxsw_thermal_ops = {
375 	.bind = mlxsw_thermal_bind,
376 	.unbind = mlxsw_thermal_unbind,
377 	.get_temp = mlxsw_thermal_get_temp,
378 	.get_trip_type	= mlxsw_thermal_get_trip_type,
379 	.get_trip_temp	= mlxsw_thermal_get_trip_temp,
380 	.set_trip_temp	= mlxsw_thermal_set_trip_temp,
381 	.get_trip_hyst	= mlxsw_thermal_get_trip_hyst,
382 	.set_trip_hyst	= mlxsw_thermal_set_trip_hyst,
383 	.get_trend	= mlxsw_thermal_trend_get,
384 };
385 
386 static int mlxsw_thermal_module_bind(struct thermal_zone_device *tzdev,
387 				     struct thermal_cooling_device *cdev)
388 {
389 	struct mlxsw_thermal_module *tz = tzdev->devdata;
390 	struct mlxsw_thermal *thermal = tz->parent;
391 	int i, j, err;
392 
393 	/* If the cooling device is one of ours bind it */
394 	if (mlxsw_get_cooling_device_idx(thermal, cdev) < 0)
395 		return 0;
396 
397 	for (i = 0; i < MLXSW_THERMAL_NUM_TRIPS; i++) {
398 		const struct mlxsw_thermal_trip *trip = &tz->trips[i];
399 
400 		err = thermal_zone_bind_cooling_device(tzdev, i, cdev,
401 						       trip->max_state,
402 						       trip->min_state,
403 						       THERMAL_WEIGHT_DEFAULT);
404 		if (err < 0)
405 			goto err_bind_cooling_device;
406 	}
407 	return 0;
408 
409 err_bind_cooling_device:
410 	for (j = i - 1; j >= 0; j--)
411 		thermal_zone_unbind_cooling_device(tzdev, j, cdev);
412 	return err;
413 }
414 
415 static int mlxsw_thermal_module_unbind(struct thermal_zone_device *tzdev,
416 				       struct thermal_cooling_device *cdev)
417 {
418 	struct mlxsw_thermal_module *tz = tzdev->devdata;
419 	struct mlxsw_thermal *thermal = tz->parent;
420 	int i;
421 	int err;
422 
423 	/* If the cooling device is one of ours unbind it */
424 	if (mlxsw_get_cooling_device_idx(thermal, cdev) < 0)
425 		return 0;
426 
427 	for (i = 0; i < MLXSW_THERMAL_NUM_TRIPS; i++) {
428 		err = thermal_zone_unbind_cooling_device(tzdev, i, cdev);
429 		WARN_ON(err);
430 	}
431 	return err;
432 }
433 
434 static int mlxsw_thermal_module_temp_get(struct thermal_zone_device *tzdev,
435 					 int *p_temp)
436 {
437 	struct mlxsw_thermal_module *tz = tzdev->devdata;
438 	struct mlxsw_thermal *thermal = tz->parent;
439 	struct device *dev = thermal->bus_info->dev;
440 	char mtmp_pl[MLXSW_REG_MTMP_LEN];
441 	int temp;
442 	int err;
443 
444 	/* Read module temperature. */
445 	mlxsw_reg_mtmp_pack(mtmp_pl, MLXSW_REG_MTMP_MODULE_INDEX_MIN +
446 			    tz->module, false, false);
447 	err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtmp), mtmp_pl);
448 	if (err) {
449 		/* Do not return error - in case of broken module's sensor
450 		 * it will cause error message flooding.
451 		 */
452 		temp = 0;
453 		*p_temp = (int) temp;
454 		return 0;
455 	}
456 	mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
457 	*p_temp = temp;
458 
459 	if (!temp)
460 		return 0;
461 
462 	/* Update trip points. */
463 	err = mlxsw_thermal_module_trips_update(dev, thermal->core, tz);
464 	if (!err && temp > 0)
465 		mlxsw_thermal_tz_score_update(thermal, tzdev, tz->trips, temp);
466 
467 	return 0;
468 }
469 
470 static int
471 mlxsw_thermal_module_trip_type_get(struct thermal_zone_device *tzdev, int trip,
472 				   enum thermal_trip_type *p_type)
473 {
474 	struct mlxsw_thermal_module *tz = tzdev->devdata;
475 
476 	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
477 		return -EINVAL;
478 
479 	*p_type = tz->trips[trip].type;
480 	return 0;
481 }
482 
483 static int
484 mlxsw_thermal_module_trip_temp_get(struct thermal_zone_device *tzdev,
485 				   int trip, int *p_temp)
486 {
487 	struct mlxsw_thermal_module *tz = tzdev->devdata;
488 
489 	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
490 		return -EINVAL;
491 
492 	*p_temp = tz->trips[trip].temp;
493 	return 0;
494 }
495 
496 static int
497 mlxsw_thermal_module_trip_temp_set(struct thermal_zone_device *tzdev,
498 				   int trip, int temp)
499 {
500 	struct mlxsw_thermal_module *tz = tzdev->devdata;
501 
502 	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS ||
503 	    temp > tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp)
504 		return -EINVAL;
505 
506 	tz->trips[trip].temp = temp;
507 	return 0;
508 }
509 
510 static int
511 mlxsw_thermal_module_trip_hyst_get(struct thermal_zone_device *tzdev, int trip,
512 				   int *p_hyst)
513 {
514 	struct mlxsw_thermal_module *tz = tzdev->devdata;
515 
516 	*p_hyst = tz->trips[trip].hyst;
517 	return 0;
518 }
519 
520 static int
521 mlxsw_thermal_module_trip_hyst_set(struct thermal_zone_device *tzdev, int trip,
522 				   int hyst)
523 {
524 	struct mlxsw_thermal_module *tz = tzdev->devdata;
525 
526 	tz->trips[trip].hyst = hyst;
527 	return 0;
528 }
529 
530 static int mlxsw_thermal_module_trend_get(struct thermal_zone_device *tzdev,
531 					  int trip, enum thermal_trend *trend)
532 {
533 	struct mlxsw_thermal_module *tz = tzdev->devdata;
534 	struct mlxsw_thermal *thermal = tz->parent;
535 
536 	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
537 		return -EINVAL;
538 
539 	if (tzdev == thermal->tz_highest_dev)
540 		return 1;
541 
542 	*trend = THERMAL_TREND_STABLE;
543 	return 0;
544 }
545 
546 static struct thermal_zone_device_ops mlxsw_thermal_module_ops = {
547 	.bind		= mlxsw_thermal_module_bind,
548 	.unbind		= mlxsw_thermal_module_unbind,
549 	.get_temp	= mlxsw_thermal_module_temp_get,
550 	.get_trip_type	= mlxsw_thermal_module_trip_type_get,
551 	.get_trip_temp	= mlxsw_thermal_module_trip_temp_get,
552 	.set_trip_temp	= mlxsw_thermal_module_trip_temp_set,
553 	.get_trip_hyst	= mlxsw_thermal_module_trip_hyst_get,
554 	.set_trip_hyst	= mlxsw_thermal_module_trip_hyst_set,
555 	.get_trend	= mlxsw_thermal_module_trend_get,
556 };
557 
558 static int mlxsw_thermal_gearbox_temp_get(struct thermal_zone_device *tzdev,
559 					  int *p_temp)
560 {
561 	struct mlxsw_thermal_module *tz = tzdev->devdata;
562 	struct mlxsw_thermal *thermal = tz->parent;
563 	char mtmp_pl[MLXSW_REG_MTMP_LEN];
564 	u16 index;
565 	int temp;
566 	int err;
567 
568 	index = MLXSW_REG_MTMP_GBOX_INDEX_MIN + tz->module;
569 	mlxsw_reg_mtmp_pack(mtmp_pl, index, false, false);
570 
571 	err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtmp), mtmp_pl);
572 	if (err)
573 		return err;
574 
575 	mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
576 	if (temp > 0)
577 		mlxsw_thermal_tz_score_update(thermal, tzdev, tz->trips, temp);
578 
579 	*p_temp = temp;
580 	return 0;
581 }
582 
583 static struct thermal_zone_device_ops mlxsw_thermal_gearbox_ops = {
584 	.bind		= mlxsw_thermal_module_bind,
585 	.unbind		= mlxsw_thermal_module_unbind,
586 	.get_temp	= mlxsw_thermal_gearbox_temp_get,
587 	.get_trip_type	= mlxsw_thermal_module_trip_type_get,
588 	.get_trip_temp	= mlxsw_thermal_module_trip_temp_get,
589 	.set_trip_temp	= mlxsw_thermal_module_trip_temp_set,
590 	.get_trip_hyst	= mlxsw_thermal_module_trip_hyst_get,
591 	.set_trip_hyst	= mlxsw_thermal_module_trip_hyst_set,
592 	.get_trend	= mlxsw_thermal_module_trend_get,
593 };
594 
595 static int mlxsw_thermal_get_max_state(struct thermal_cooling_device *cdev,
596 				       unsigned long *p_state)
597 {
598 	*p_state = MLXSW_THERMAL_MAX_STATE;
599 	return 0;
600 }
601 
602 static int mlxsw_thermal_get_cur_state(struct thermal_cooling_device *cdev,
603 				       unsigned long *p_state)
604 
605 {
606 	struct mlxsw_thermal *thermal = cdev->devdata;
607 	struct device *dev = thermal->bus_info->dev;
608 	char mfsc_pl[MLXSW_REG_MFSC_LEN];
609 	int err, idx;
610 	u8 duty;
611 
612 	idx = mlxsw_get_cooling_device_idx(thermal, cdev);
613 	if (idx < 0)
614 		return idx;
615 
616 	mlxsw_reg_mfsc_pack(mfsc_pl, idx, 0);
617 	err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfsc), mfsc_pl);
618 	if (err) {
619 		dev_err(dev, "Failed to query PWM duty\n");
620 		return err;
621 	}
622 
623 	duty = mlxsw_reg_mfsc_pwm_duty_cycle_get(mfsc_pl);
624 	*p_state = mlxsw_duty_to_state(duty);
625 	return 0;
626 }
627 
628 static int mlxsw_thermal_set_cur_state(struct thermal_cooling_device *cdev,
629 				       unsigned long state)
630 
631 {
632 	struct mlxsw_thermal *thermal = cdev->devdata;
633 	struct device *dev = thermal->bus_info->dev;
634 	char mfsc_pl[MLXSW_REG_MFSC_LEN];
635 	unsigned long cur_state, i;
636 	int idx;
637 	u8 duty;
638 	int err;
639 
640 	idx = mlxsw_get_cooling_device_idx(thermal, cdev);
641 	if (idx < 0)
642 		return idx;
643 
644 	/* Verify if this request is for changing allowed fan dynamical
645 	 * minimum. If it is - update cooling levels accordingly and update
646 	 * state, if current state is below the newly requested minimum state.
647 	 * For example, if current state is 5, and minimal state is to be
648 	 * changed from 4 to 6, thermal->cooling_levels[0 to 5] will be changed
649 	 * all from 4 to 6. And state 5 (thermal->cooling_levels[4]) should be
650 	 * overwritten.
651 	 */
652 	if (state >= MLXSW_THERMAL_SPEED_MIN &&
653 	    state <= MLXSW_THERMAL_SPEED_MAX) {
654 		state -= MLXSW_THERMAL_MAX_STATE;
655 		for (i = 0; i <= MLXSW_THERMAL_MAX_STATE; i++)
656 			thermal->cooling_levels[i] = max(state, i);
657 
658 		mlxsw_reg_mfsc_pack(mfsc_pl, idx, 0);
659 		err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfsc), mfsc_pl);
660 		if (err)
661 			return err;
662 
663 		duty = mlxsw_reg_mfsc_pwm_duty_cycle_get(mfsc_pl);
664 		cur_state = mlxsw_duty_to_state(duty);
665 
666 		/* If current fan state is lower than requested dynamical
667 		 * minimum, increase fan speed up to dynamical minimum.
668 		 */
669 		if (state < cur_state)
670 			return 0;
671 
672 		state = cur_state;
673 	}
674 
675 	if (state > MLXSW_THERMAL_MAX_STATE)
676 		return -EINVAL;
677 
678 	/* Normalize the state to the valid speed range. */
679 	state = thermal->cooling_levels[state];
680 	mlxsw_reg_mfsc_pack(mfsc_pl, idx, mlxsw_state_to_duty(state));
681 	err = mlxsw_reg_write(thermal->core, MLXSW_REG(mfsc), mfsc_pl);
682 	if (err) {
683 		dev_err(dev, "Failed to write PWM duty\n");
684 		return err;
685 	}
686 	return 0;
687 }
688 
689 static const struct thermal_cooling_device_ops mlxsw_cooling_ops = {
690 	.get_max_state	= mlxsw_thermal_get_max_state,
691 	.get_cur_state	= mlxsw_thermal_get_cur_state,
692 	.set_cur_state	= mlxsw_thermal_set_cur_state,
693 };
694 
695 static int
696 mlxsw_thermal_module_tz_init(struct mlxsw_thermal_module *module_tz)
697 {
698 	char tz_name[MLXSW_THERMAL_ZONE_MAX_NAME];
699 	int err;
700 
701 	snprintf(tz_name, sizeof(tz_name), "mlxsw-module%d",
702 		 module_tz->module + 1);
703 	module_tz->tzdev = thermal_zone_device_register(tz_name,
704 							MLXSW_THERMAL_NUM_TRIPS,
705 							MLXSW_THERMAL_TRIP_MASK,
706 							module_tz,
707 							&mlxsw_thermal_module_ops,
708 							NULL, 0, 0);
709 	if (IS_ERR(module_tz->tzdev)) {
710 		err = PTR_ERR(module_tz->tzdev);
711 		return err;
712 	}
713 
714 	err = thermal_zone_device_enable(module_tz->tzdev);
715 	if (err)
716 		thermal_zone_device_unregister(module_tz->tzdev);
717 
718 	return err;
719 }
720 
721 static void mlxsw_thermal_module_tz_fini(struct thermal_zone_device *tzdev)
722 {
723 	thermal_zone_device_unregister(tzdev);
724 }
725 
726 static int
727 mlxsw_thermal_module_init(struct device *dev, struct mlxsw_core *core,
728 			  struct mlxsw_thermal *thermal, u8 module)
729 {
730 	struct mlxsw_thermal_module *module_tz;
731 
732 	module_tz = &thermal->tz_module_arr[module];
733 	/* Skip if parent is already set (case of port split). */
734 	if (module_tz->parent)
735 		return 0;
736 	module_tz->module = module;
737 	module_tz->parent = thermal;
738 	memcpy(module_tz->trips, default_thermal_trips,
739 	       sizeof(thermal->trips));
740 	/* Initialize all trip point. */
741 	mlxsw_thermal_module_trips_reset(module_tz);
742 	/* Update trip point according to the module data. */
743 	return mlxsw_thermal_module_trips_update(dev, core, module_tz);
744 }
745 
746 static void mlxsw_thermal_module_fini(struct mlxsw_thermal_module *module_tz)
747 {
748 	if (module_tz && module_tz->tzdev) {
749 		mlxsw_thermal_module_tz_fini(module_tz->tzdev);
750 		module_tz->tzdev = NULL;
751 		module_tz->parent = NULL;
752 	}
753 }
754 
755 static int
756 mlxsw_thermal_modules_init(struct device *dev, struct mlxsw_core *core,
757 			   struct mlxsw_thermal *thermal)
758 {
759 	struct mlxsw_thermal_module *module_tz;
760 	char mgpir_pl[MLXSW_REG_MGPIR_LEN];
761 	int i, err;
762 
763 	if (!mlxsw_core_res_query_enabled(core))
764 		return 0;
765 
766 	mlxsw_reg_mgpir_pack(mgpir_pl);
767 	err = mlxsw_reg_query(core, MLXSW_REG(mgpir), mgpir_pl);
768 	if (err)
769 		return err;
770 
771 	mlxsw_reg_mgpir_unpack(mgpir_pl, NULL, NULL, NULL,
772 			       &thermal->tz_module_num);
773 
774 	thermal->tz_module_arr = kcalloc(thermal->tz_module_num,
775 					 sizeof(*thermal->tz_module_arr),
776 					 GFP_KERNEL);
777 	if (!thermal->tz_module_arr)
778 		return -ENOMEM;
779 
780 	for (i = 0; i < thermal->tz_module_num; i++) {
781 		err = mlxsw_thermal_module_init(dev, core, thermal, i);
782 		if (err)
783 			goto err_unreg_tz_module_arr;
784 	}
785 
786 	for (i = 0; i < thermal->tz_module_num; i++) {
787 		module_tz = &thermal->tz_module_arr[i];
788 		if (!module_tz->parent)
789 			continue;
790 		err = mlxsw_thermal_module_tz_init(module_tz);
791 		if (err)
792 			goto err_unreg_tz_module_arr;
793 	}
794 
795 	return 0;
796 
797 err_unreg_tz_module_arr:
798 	for (i = thermal->tz_module_num - 1; i >= 0; i--)
799 		mlxsw_thermal_module_fini(&thermal->tz_module_arr[i]);
800 	kfree(thermal->tz_module_arr);
801 	return err;
802 }
803 
804 static void
805 mlxsw_thermal_modules_fini(struct mlxsw_thermal *thermal)
806 {
807 	int i;
808 
809 	if (!mlxsw_core_res_query_enabled(thermal->core))
810 		return;
811 
812 	for (i = thermal->tz_module_num - 1; i >= 0; i--)
813 		mlxsw_thermal_module_fini(&thermal->tz_module_arr[i]);
814 	kfree(thermal->tz_module_arr);
815 }
816 
817 static int
818 mlxsw_thermal_gearbox_tz_init(struct mlxsw_thermal_module *gearbox_tz)
819 {
820 	char tz_name[MLXSW_THERMAL_ZONE_MAX_NAME];
821 	int ret;
822 
823 	snprintf(tz_name, sizeof(tz_name), "mlxsw-gearbox%d",
824 		 gearbox_tz->module + 1);
825 	gearbox_tz->tzdev = thermal_zone_device_register(tz_name,
826 						MLXSW_THERMAL_NUM_TRIPS,
827 						MLXSW_THERMAL_TRIP_MASK,
828 						gearbox_tz,
829 						&mlxsw_thermal_gearbox_ops,
830 						NULL, 0, 0);
831 	if (IS_ERR(gearbox_tz->tzdev))
832 		return PTR_ERR(gearbox_tz->tzdev);
833 
834 	ret = thermal_zone_device_enable(gearbox_tz->tzdev);
835 	if (ret)
836 		thermal_zone_device_unregister(gearbox_tz->tzdev);
837 
838 	return ret;
839 }
840 
841 static void
842 mlxsw_thermal_gearbox_tz_fini(struct mlxsw_thermal_module *gearbox_tz)
843 {
844 	thermal_zone_device_unregister(gearbox_tz->tzdev);
845 }
846 
847 static int
848 mlxsw_thermal_gearboxes_init(struct device *dev, struct mlxsw_core *core,
849 			     struct mlxsw_thermal *thermal)
850 {
851 	enum mlxsw_reg_mgpir_device_type device_type;
852 	struct mlxsw_thermal_module *gearbox_tz;
853 	char mgpir_pl[MLXSW_REG_MGPIR_LEN];
854 	u8 gbox_num;
855 	int i;
856 	int err;
857 
858 	if (!mlxsw_core_res_query_enabled(core))
859 		return 0;
860 
861 	mlxsw_reg_mgpir_pack(mgpir_pl);
862 	err = mlxsw_reg_query(core, MLXSW_REG(mgpir), mgpir_pl);
863 	if (err)
864 		return err;
865 
866 	mlxsw_reg_mgpir_unpack(mgpir_pl, &gbox_num, &device_type, NULL,
867 			       NULL);
868 	if (device_type != MLXSW_REG_MGPIR_DEVICE_TYPE_GEARBOX_DIE ||
869 	    !gbox_num)
870 		return 0;
871 
872 	thermal->tz_gearbox_num = gbox_num;
873 	thermal->tz_gearbox_arr = kcalloc(thermal->tz_gearbox_num,
874 					  sizeof(*thermal->tz_gearbox_arr),
875 					  GFP_KERNEL);
876 	if (!thermal->tz_gearbox_arr)
877 		return -ENOMEM;
878 
879 	for (i = 0; i < thermal->tz_gearbox_num; i++) {
880 		gearbox_tz = &thermal->tz_gearbox_arr[i];
881 		memcpy(gearbox_tz->trips, default_thermal_trips,
882 		       sizeof(thermal->trips));
883 		gearbox_tz->module = i;
884 		gearbox_tz->parent = thermal;
885 		err = mlxsw_thermal_gearbox_tz_init(gearbox_tz);
886 		if (err)
887 			goto err_unreg_tz_gearbox;
888 	}
889 
890 	return 0;
891 
892 err_unreg_tz_gearbox:
893 	for (i--; i >= 0; i--)
894 		mlxsw_thermal_gearbox_tz_fini(&thermal->tz_gearbox_arr[i]);
895 	kfree(thermal->tz_gearbox_arr);
896 	return err;
897 }
898 
899 static void
900 mlxsw_thermal_gearboxes_fini(struct mlxsw_thermal *thermal)
901 {
902 	int i;
903 
904 	if (!mlxsw_core_res_query_enabled(thermal->core))
905 		return;
906 
907 	for (i = thermal->tz_gearbox_num - 1; i >= 0; i--)
908 		mlxsw_thermal_gearbox_tz_fini(&thermal->tz_gearbox_arr[i]);
909 	kfree(thermal->tz_gearbox_arr);
910 }
911 
912 int mlxsw_thermal_init(struct mlxsw_core *core,
913 		       const struct mlxsw_bus_info *bus_info,
914 		       struct mlxsw_thermal **p_thermal)
915 {
916 	char mfcr_pl[MLXSW_REG_MFCR_LEN] = { 0 };
917 	enum mlxsw_reg_mfcr_pwm_frequency freq;
918 	struct device *dev = bus_info->dev;
919 	struct mlxsw_thermal *thermal;
920 	u16 tacho_active;
921 	u8 pwm_active;
922 	int err, i;
923 
924 	thermal = devm_kzalloc(dev, sizeof(*thermal),
925 			       GFP_KERNEL);
926 	if (!thermal)
927 		return -ENOMEM;
928 
929 	thermal->core = core;
930 	thermal->bus_info = bus_info;
931 	memcpy(thermal->trips, default_thermal_trips, sizeof(thermal->trips));
932 
933 	err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfcr), mfcr_pl);
934 	if (err) {
935 		dev_err(dev, "Failed to probe PWMs\n");
936 		goto err_free_thermal;
937 	}
938 	mlxsw_reg_mfcr_unpack(mfcr_pl, &freq, &tacho_active, &pwm_active);
939 
940 	for (i = 0; i < MLXSW_MFCR_TACHOS_MAX; i++) {
941 		if (tacho_active & BIT(i)) {
942 			char mfsl_pl[MLXSW_REG_MFSL_LEN];
943 
944 			mlxsw_reg_mfsl_pack(mfsl_pl, i, 0, 0);
945 
946 			/* We need to query the register to preserve maximum */
947 			err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfsl),
948 					      mfsl_pl);
949 			if (err)
950 				goto err_free_thermal;
951 
952 			/* set the minimal RPMs to 0 */
953 			mlxsw_reg_mfsl_tach_min_set(mfsl_pl, 0);
954 			err = mlxsw_reg_write(thermal->core, MLXSW_REG(mfsl),
955 					      mfsl_pl);
956 			if (err)
957 				goto err_free_thermal;
958 		}
959 	}
960 	for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++) {
961 		if (pwm_active & BIT(i)) {
962 			struct thermal_cooling_device *cdev;
963 
964 			cdev = thermal_cooling_device_register("mlxsw_fan",
965 							       thermal,
966 							       &mlxsw_cooling_ops);
967 			if (IS_ERR(cdev)) {
968 				err = PTR_ERR(cdev);
969 				dev_err(dev, "Failed to register cooling device\n");
970 				goto err_unreg_cdevs;
971 			}
972 			thermal->cdevs[i] = cdev;
973 		}
974 	}
975 
976 	/* Initialize cooling levels per PWM state. */
977 	for (i = 0; i < MLXSW_THERMAL_MAX_STATE; i++)
978 		thermal->cooling_levels[i] = max(MLXSW_THERMAL_SPEED_MIN_LEVEL,
979 						 i);
980 
981 	thermal->polling_delay = bus_info->low_frequency ?
982 				 MLXSW_THERMAL_SLOW_POLL_INT :
983 				 MLXSW_THERMAL_POLL_INT;
984 
985 	thermal->tzdev = thermal_zone_device_register("mlxsw",
986 						      MLXSW_THERMAL_NUM_TRIPS,
987 						      MLXSW_THERMAL_TRIP_MASK,
988 						      thermal,
989 						      &mlxsw_thermal_ops,
990 						      NULL, 0,
991 						      thermal->polling_delay);
992 	if (IS_ERR(thermal->tzdev)) {
993 		err = PTR_ERR(thermal->tzdev);
994 		dev_err(dev, "Failed to register thermal zone\n");
995 		goto err_unreg_cdevs;
996 	}
997 
998 	err = mlxsw_thermal_modules_init(dev, core, thermal);
999 	if (err)
1000 		goto err_unreg_tzdev;
1001 
1002 	err = mlxsw_thermal_gearboxes_init(dev, core, thermal);
1003 	if (err)
1004 		goto err_unreg_modules_tzdev;
1005 
1006 	err = thermal_zone_device_enable(thermal->tzdev);
1007 	if (err)
1008 		goto err_unreg_gearboxes;
1009 
1010 	*p_thermal = thermal;
1011 	return 0;
1012 
1013 err_unreg_gearboxes:
1014 	mlxsw_thermal_gearboxes_fini(thermal);
1015 err_unreg_modules_tzdev:
1016 	mlxsw_thermal_modules_fini(thermal);
1017 err_unreg_tzdev:
1018 	if (thermal->tzdev) {
1019 		thermal_zone_device_unregister(thermal->tzdev);
1020 		thermal->tzdev = NULL;
1021 	}
1022 err_unreg_cdevs:
1023 	for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++)
1024 		if (thermal->cdevs[i])
1025 			thermal_cooling_device_unregister(thermal->cdevs[i]);
1026 err_free_thermal:
1027 	devm_kfree(dev, thermal);
1028 	return err;
1029 }
1030 
1031 void mlxsw_thermal_fini(struct mlxsw_thermal *thermal)
1032 {
1033 	int i;
1034 
1035 	mlxsw_thermal_gearboxes_fini(thermal);
1036 	mlxsw_thermal_modules_fini(thermal);
1037 	if (thermal->tzdev) {
1038 		thermal_zone_device_unregister(thermal->tzdev);
1039 		thermal->tzdev = NULL;
1040 	}
1041 
1042 	for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++) {
1043 		if (thermal->cdevs[i]) {
1044 			thermal_cooling_device_unregister(thermal->cdevs[i]);
1045 			thermal->cdevs[i] = NULL;
1046 		}
1047 	}
1048 
1049 	devm_kfree(thermal->bus_info->dev, thermal);
1050 }
1051