1 // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
2 /* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved
3  * Copyright (c) 2016 Ivan Vecera <cera@cera.cz>
4  */
5 
6 #include <linux/kernel.h>
7 #include <linux/types.h>
8 #include <linux/device.h>
9 #include <linux/sysfs.h>
10 #include <linux/thermal.h>
11 #include <linux/err.h>
12 #include <linux/sfp.h>
13 
14 #include "core.h"
15 #include "core_env.h"
16 
17 #define MLXSW_THERMAL_POLL_INT	1000	/* ms */
18 #define MLXSW_THERMAL_SLOW_POLL_INT	20000	/* ms */
19 #define MLXSW_THERMAL_ASIC_TEMP_NORM	75000	/* 75C */
20 #define MLXSW_THERMAL_ASIC_TEMP_HIGH	85000	/* 85C */
21 #define MLXSW_THERMAL_ASIC_TEMP_HOT	105000	/* 105C */
22 #define MLXSW_THERMAL_ASIC_TEMP_CRIT	110000	/* 110C */
23 #define MLXSW_THERMAL_HYSTERESIS_TEMP	5000	/* 5C */
24 #define MLXSW_THERMAL_MODULE_TEMP_SHIFT	(MLXSW_THERMAL_HYSTERESIS_TEMP * 2)
25 #define MLXSW_THERMAL_ZONE_MAX_NAME	16
26 #define MLXSW_THERMAL_MAX_STATE	10
27 #define MLXSW_THERMAL_MAX_DUTY	255
28 /* Minimum and maximum fan allowed speed in percent: from 20% to 100%. Values
29  * MLXSW_THERMAL_MAX_STATE + x, where x is between 2 and 10 are used for
30  * setting fan speed dynamic minimum. For example, if value is set to 14 (40%)
31  * cooling levels vector will be set to 4, 4, 4, 4, 4, 5, 6, 7, 8, 9, 10 to
32  * introduce PWM speed in percent: 40, 40, 40, 40, 40, 50, 60. 70, 80, 90, 100.
33  */
34 #define MLXSW_THERMAL_SPEED_MIN		(MLXSW_THERMAL_MAX_STATE + 2)
35 #define MLXSW_THERMAL_SPEED_MAX		(MLXSW_THERMAL_MAX_STATE * 2)
36 #define MLXSW_THERMAL_SPEED_MIN_LEVEL	2		/* 20% */
37 
38 /* External cooling devices, allowed for binding to mlxsw thermal zones. */
39 static char * const mlxsw_thermal_external_allowed_cdev[] = {
40 	"mlxreg_fan",
41 };
42 
43 enum mlxsw_thermal_trips {
44 	MLXSW_THERMAL_TEMP_TRIP_NORM,
45 	MLXSW_THERMAL_TEMP_TRIP_HIGH,
46 	MLXSW_THERMAL_TEMP_TRIP_HOT,
47 	MLXSW_THERMAL_TEMP_TRIP_CRIT,
48 };
49 
50 struct mlxsw_thermal_trip {
51 	int	type;
52 	int	temp;
53 	int	hyst;
54 	int	min_state;
55 	int	max_state;
56 };
57 
58 static const struct mlxsw_thermal_trip default_thermal_trips[] = {
59 	{	/* In range - 0-40% PWM */
60 		.type		= THERMAL_TRIP_ACTIVE,
61 		.temp		= MLXSW_THERMAL_ASIC_TEMP_NORM,
62 		.hyst		= MLXSW_THERMAL_HYSTERESIS_TEMP,
63 		.min_state	= 0,
64 		.max_state	= (4 * MLXSW_THERMAL_MAX_STATE) / 10,
65 	},
66 	{
67 		/* In range - 40-100% PWM */
68 		.type		= THERMAL_TRIP_ACTIVE,
69 		.temp		= MLXSW_THERMAL_ASIC_TEMP_HIGH,
70 		.hyst		= MLXSW_THERMAL_HYSTERESIS_TEMP,
71 		.min_state	= (4 * MLXSW_THERMAL_MAX_STATE) / 10,
72 		.max_state	= MLXSW_THERMAL_MAX_STATE,
73 	},
74 	{	/* Warning */
75 		.type		= THERMAL_TRIP_HOT,
76 		.temp		= MLXSW_THERMAL_ASIC_TEMP_HOT,
77 		.hyst		= MLXSW_THERMAL_HYSTERESIS_TEMP,
78 		.min_state	= MLXSW_THERMAL_MAX_STATE,
79 		.max_state	= MLXSW_THERMAL_MAX_STATE,
80 	},
81 	{	/* Critical - soft poweroff */
82 		.type		= THERMAL_TRIP_CRITICAL,
83 		.temp		= MLXSW_THERMAL_ASIC_TEMP_CRIT,
84 		.min_state	= MLXSW_THERMAL_MAX_STATE,
85 		.max_state	= MLXSW_THERMAL_MAX_STATE,
86 	}
87 };
88 
89 #define MLXSW_THERMAL_NUM_TRIPS	ARRAY_SIZE(default_thermal_trips)
90 
91 /* Make sure all trips are writable */
92 #define MLXSW_THERMAL_TRIP_MASK	(BIT(MLXSW_THERMAL_NUM_TRIPS) - 1)
93 
94 struct mlxsw_thermal;
95 
96 struct mlxsw_thermal_module {
97 	struct mlxsw_thermal *parent;
98 	struct thermal_zone_device *tzdev;
99 	struct mlxsw_thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS];
100 	enum thermal_device_mode mode;
101 	int module;
102 };
103 
104 struct mlxsw_thermal {
105 	struct mlxsw_core *core;
106 	const struct mlxsw_bus_info *bus_info;
107 	struct thermal_zone_device *tzdev;
108 	int polling_delay;
109 	struct thermal_cooling_device *cdevs[MLXSW_MFCR_PWMS_MAX];
110 	u8 cooling_levels[MLXSW_THERMAL_MAX_STATE + 1];
111 	struct mlxsw_thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS];
112 	enum thermal_device_mode mode;
113 	struct mlxsw_thermal_module *tz_module_arr;
114 };
115 
116 static inline u8 mlxsw_state_to_duty(int state)
117 {
118 	return DIV_ROUND_CLOSEST(state * MLXSW_THERMAL_MAX_DUTY,
119 				 MLXSW_THERMAL_MAX_STATE);
120 }
121 
122 static inline int mlxsw_duty_to_state(u8 duty)
123 {
124 	return DIV_ROUND_CLOSEST(duty * MLXSW_THERMAL_MAX_STATE,
125 				 MLXSW_THERMAL_MAX_DUTY);
126 }
127 
128 static int mlxsw_get_cooling_device_idx(struct mlxsw_thermal *thermal,
129 					struct thermal_cooling_device *cdev)
130 {
131 	int i;
132 
133 	for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++)
134 		if (thermal->cdevs[i] == cdev)
135 			return i;
136 
137 	/* Allow mlxsw thermal zone binding to an external cooling device */
138 	for (i = 0; i < ARRAY_SIZE(mlxsw_thermal_external_allowed_cdev); i++) {
139 		if (strnstr(cdev->type, mlxsw_thermal_external_allowed_cdev[i],
140 			    sizeof(cdev->type)))
141 			return 0;
142 	}
143 
144 	return -ENODEV;
145 }
146 
147 static void
148 mlxsw_thermal_module_trips_reset(struct mlxsw_thermal_module *tz)
149 {
150 	tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temp = 0;
151 	tz->trips[MLXSW_THERMAL_TEMP_TRIP_HIGH].temp = 0;
152 	tz->trips[MLXSW_THERMAL_TEMP_TRIP_HOT].temp = 0;
153 	tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp = 0;
154 }
155 
156 static int
157 mlxsw_thermal_module_trips_update(struct device *dev, struct mlxsw_core *core,
158 				  struct mlxsw_thermal_module *tz)
159 {
160 	int crit_temp, emerg_temp;
161 	int err;
162 
163 	err = mlxsw_env_module_temp_thresholds_get(core, tz->module,
164 						   SFP_TEMP_HIGH_WARN,
165 						   &crit_temp);
166 	if (err)
167 		return err;
168 
169 	err = mlxsw_env_module_temp_thresholds_get(core, tz->module,
170 						   SFP_TEMP_HIGH_ALARM,
171 						   &emerg_temp);
172 	if (err)
173 		return err;
174 
175 	/* According to the system thermal requirements, the thermal zones are
176 	 * defined with four trip points. The critical and emergency
177 	 * temperature thresholds, provided by QSFP module are set as "active"
178 	 * and "hot" trip points, "normal" and "critical" trip points are
179 	 * derived from "active" and "hot" by subtracting or adding double
180 	 * hysteresis value.
181 	 */
182 	if (crit_temp >= MLXSW_THERMAL_MODULE_TEMP_SHIFT)
183 		tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temp = crit_temp -
184 					MLXSW_THERMAL_MODULE_TEMP_SHIFT;
185 	else
186 		tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temp = crit_temp;
187 	tz->trips[MLXSW_THERMAL_TEMP_TRIP_HIGH].temp = crit_temp;
188 	tz->trips[MLXSW_THERMAL_TEMP_TRIP_HOT].temp = emerg_temp;
189 	if (emerg_temp > crit_temp)
190 		tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp = emerg_temp +
191 					MLXSW_THERMAL_MODULE_TEMP_SHIFT;
192 	else
193 		tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp = emerg_temp;
194 
195 	return 0;
196 }
197 
198 static int mlxsw_thermal_bind(struct thermal_zone_device *tzdev,
199 			      struct thermal_cooling_device *cdev)
200 {
201 	struct mlxsw_thermal *thermal = tzdev->devdata;
202 	struct device *dev = thermal->bus_info->dev;
203 	int i, err;
204 
205 	/* If the cooling device is one of ours bind it */
206 	if (mlxsw_get_cooling_device_idx(thermal, cdev) < 0)
207 		return 0;
208 
209 	for (i = 0; i < MLXSW_THERMAL_NUM_TRIPS; i++) {
210 		const struct mlxsw_thermal_trip *trip = &thermal->trips[i];
211 
212 		err = thermal_zone_bind_cooling_device(tzdev, i, cdev,
213 						       trip->max_state,
214 						       trip->min_state,
215 						       THERMAL_WEIGHT_DEFAULT);
216 		if (err < 0) {
217 			dev_err(dev, "Failed to bind cooling device to trip %d\n", i);
218 			return err;
219 		}
220 	}
221 	return 0;
222 }
223 
224 static int mlxsw_thermal_unbind(struct thermal_zone_device *tzdev,
225 				struct thermal_cooling_device *cdev)
226 {
227 	struct mlxsw_thermal *thermal = tzdev->devdata;
228 	struct device *dev = thermal->bus_info->dev;
229 	int i;
230 	int err;
231 
232 	/* If the cooling device is our one unbind it */
233 	if (mlxsw_get_cooling_device_idx(thermal, cdev) < 0)
234 		return 0;
235 
236 	for (i = 0; i < MLXSW_THERMAL_NUM_TRIPS; i++) {
237 		err = thermal_zone_unbind_cooling_device(tzdev, i, cdev);
238 		if (err < 0) {
239 			dev_err(dev, "Failed to unbind cooling device\n");
240 			return err;
241 		}
242 	}
243 	return 0;
244 }
245 
246 static int mlxsw_thermal_get_mode(struct thermal_zone_device *tzdev,
247 				  enum thermal_device_mode *mode)
248 {
249 	struct mlxsw_thermal *thermal = tzdev->devdata;
250 
251 	*mode = thermal->mode;
252 
253 	return 0;
254 }
255 
256 static int mlxsw_thermal_set_mode(struct thermal_zone_device *tzdev,
257 				  enum thermal_device_mode mode)
258 {
259 	struct mlxsw_thermal *thermal = tzdev->devdata;
260 
261 	mutex_lock(&tzdev->lock);
262 
263 	if (mode == THERMAL_DEVICE_ENABLED)
264 		tzdev->polling_delay = thermal->polling_delay;
265 	else
266 		tzdev->polling_delay = 0;
267 
268 	mutex_unlock(&tzdev->lock);
269 
270 	thermal->mode = mode;
271 	thermal_zone_device_update(tzdev, THERMAL_EVENT_UNSPECIFIED);
272 
273 	return 0;
274 }
275 
276 static int mlxsw_thermal_get_temp(struct thermal_zone_device *tzdev,
277 				  int *p_temp)
278 {
279 	struct mlxsw_thermal *thermal = tzdev->devdata;
280 	struct device *dev = thermal->bus_info->dev;
281 	char mtmp_pl[MLXSW_REG_MTMP_LEN];
282 	unsigned int temp;
283 	int err;
284 
285 	mlxsw_reg_mtmp_pack(mtmp_pl, 0, false, false);
286 
287 	err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtmp), mtmp_pl);
288 	if (err) {
289 		dev_err(dev, "Failed to query temp sensor\n");
290 		return err;
291 	}
292 	mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
293 
294 	*p_temp = (int) temp;
295 	return 0;
296 }
297 
298 static int mlxsw_thermal_get_trip_type(struct thermal_zone_device *tzdev,
299 				       int trip,
300 				       enum thermal_trip_type *p_type)
301 {
302 	struct mlxsw_thermal *thermal = tzdev->devdata;
303 
304 	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
305 		return -EINVAL;
306 
307 	*p_type = thermal->trips[trip].type;
308 	return 0;
309 }
310 
311 static int mlxsw_thermal_get_trip_temp(struct thermal_zone_device *tzdev,
312 				       int trip, int *p_temp)
313 {
314 	struct mlxsw_thermal *thermal = tzdev->devdata;
315 
316 	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
317 		return -EINVAL;
318 
319 	*p_temp = thermal->trips[trip].temp;
320 	return 0;
321 }
322 
323 static int mlxsw_thermal_set_trip_temp(struct thermal_zone_device *tzdev,
324 				       int trip, int temp)
325 {
326 	struct mlxsw_thermal *thermal = tzdev->devdata;
327 
328 	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS ||
329 	    temp > MLXSW_THERMAL_ASIC_TEMP_CRIT)
330 		return -EINVAL;
331 
332 	thermal->trips[trip].temp = temp;
333 	return 0;
334 }
335 
336 static int mlxsw_thermal_get_trip_hyst(struct thermal_zone_device *tzdev,
337 				       int trip, int *p_hyst)
338 {
339 	struct mlxsw_thermal *thermal = tzdev->devdata;
340 
341 	*p_hyst = thermal->trips[trip].hyst;
342 	return 0;
343 }
344 
345 static int mlxsw_thermal_set_trip_hyst(struct thermal_zone_device *tzdev,
346 				       int trip, int hyst)
347 {
348 	struct mlxsw_thermal *thermal = tzdev->devdata;
349 
350 	thermal->trips[trip].hyst = hyst;
351 	return 0;
352 }
353 
354 static struct thermal_zone_device_ops mlxsw_thermal_ops = {
355 	.bind = mlxsw_thermal_bind,
356 	.unbind = mlxsw_thermal_unbind,
357 	.get_mode = mlxsw_thermal_get_mode,
358 	.set_mode = mlxsw_thermal_set_mode,
359 	.get_temp = mlxsw_thermal_get_temp,
360 	.get_trip_type	= mlxsw_thermal_get_trip_type,
361 	.get_trip_temp	= mlxsw_thermal_get_trip_temp,
362 	.set_trip_temp	= mlxsw_thermal_set_trip_temp,
363 	.get_trip_hyst	= mlxsw_thermal_get_trip_hyst,
364 	.set_trip_hyst	= mlxsw_thermal_set_trip_hyst,
365 };
366 
367 static int mlxsw_thermal_module_bind(struct thermal_zone_device *tzdev,
368 				     struct thermal_cooling_device *cdev)
369 {
370 	struct mlxsw_thermal_module *tz = tzdev->devdata;
371 	struct mlxsw_thermal *thermal = tz->parent;
372 	int i, j, err;
373 
374 	/* If the cooling device is one of ours bind it */
375 	if (mlxsw_get_cooling_device_idx(thermal, cdev) < 0)
376 		return 0;
377 
378 	for (i = 0; i < MLXSW_THERMAL_NUM_TRIPS; i++) {
379 		const struct mlxsw_thermal_trip *trip = &tz->trips[i];
380 
381 		err = thermal_zone_bind_cooling_device(tzdev, i, cdev,
382 						       trip->max_state,
383 						       trip->min_state,
384 						       THERMAL_WEIGHT_DEFAULT);
385 		if (err < 0)
386 			goto err_bind_cooling_device;
387 	}
388 	return 0;
389 
390 err_bind_cooling_device:
391 	for (j = i - 1; j >= 0; j--)
392 		thermal_zone_unbind_cooling_device(tzdev, j, cdev);
393 	return err;
394 }
395 
396 static int mlxsw_thermal_module_unbind(struct thermal_zone_device *tzdev,
397 				       struct thermal_cooling_device *cdev)
398 {
399 	struct mlxsw_thermal_module *tz = tzdev->devdata;
400 	struct mlxsw_thermal *thermal = tz->parent;
401 	int i;
402 	int err;
403 
404 	/* If the cooling device is one of ours unbind it */
405 	if (mlxsw_get_cooling_device_idx(thermal, cdev) < 0)
406 		return 0;
407 
408 	for (i = 0; i < MLXSW_THERMAL_NUM_TRIPS; i++) {
409 		err = thermal_zone_unbind_cooling_device(tzdev, i, cdev);
410 		WARN_ON(err);
411 	}
412 	return err;
413 }
414 
415 static int mlxsw_thermal_module_mode_get(struct thermal_zone_device *tzdev,
416 					 enum thermal_device_mode *mode)
417 {
418 	struct mlxsw_thermal_module *tz = tzdev->devdata;
419 
420 	*mode = tz->mode;
421 
422 	return 0;
423 }
424 
425 static int mlxsw_thermal_module_mode_set(struct thermal_zone_device *tzdev,
426 					 enum thermal_device_mode mode)
427 {
428 	struct mlxsw_thermal_module *tz = tzdev->devdata;
429 	struct mlxsw_thermal *thermal = tz->parent;
430 
431 	mutex_lock(&tzdev->lock);
432 
433 	if (mode == THERMAL_DEVICE_ENABLED)
434 		tzdev->polling_delay = thermal->polling_delay;
435 	else
436 		tzdev->polling_delay = 0;
437 
438 	mutex_unlock(&tzdev->lock);
439 
440 	tz->mode = mode;
441 	thermal_zone_device_update(tzdev, THERMAL_EVENT_UNSPECIFIED);
442 
443 	return 0;
444 }
445 
446 static int mlxsw_thermal_module_temp_get(struct thermal_zone_device *tzdev,
447 					 int *p_temp)
448 {
449 	struct mlxsw_thermal_module *tz = tzdev->devdata;
450 	struct mlxsw_thermal *thermal = tz->parent;
451 	struct device *dev = thermal->bus_info->dev;
452 	char mtbr_pl[MLXSW_REG_MTBR_LEN];
453 	u16 temp;
454 	int err;
455 
456 	/* Read module temperature. */
457 	mlxsw_reg_mtbr_pack(mtbr_pl, MLXSW_REG_MTBR_BASE_MODULE_INDEX +
458 			    tz->module, 1);
459 	err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtbr), mtbr_pl);
460 	if (err)
461 		return err;
462 
463 	mlxsw_reg_mtbr_temp_unpack(mtbr_pl, 0, &temp, NULL);
464 	/* Update temperature. */
465 	switch (temp) {
466 	case MLXSW_REG_MTBR_NO_CONN: /* fall-through */
467 	case MLXSW_REG_MTBR_NO_TEMP_SENS: /* fall-through */
468 	case MLXSW_REG_MTBR_INDEX_NA: /* fall-through */
469 	case MLXSW_REG_MTBR_BAD_SENS_INFO:
470 		temp = 0;
471 		break;
472 	default:
473 		temp = MLXSW_REG_MTMP_TEMP_TO_MC(temp);
474 		/* Reset all trip point. */
475 		mlxsw_thermal_module_trips_reset(tz);
476 		/* Update trip points. */
477 		err = mlxsw_thermal_module_trips_update(dev, thermal->core,
478 							tz);
479 		if (err)
480 			return err;
481 		break;
482 	}
483 
484 	*p_temp = (int) temp;
485 	return 0;
486 }
487 
488 static int
489 mlxsw_thermal_module_trip_type_get(struct thermal_zone_device *tzdev, int trip,
490 				   enum thermal_trip_type *p_type)
491 {
492 	struct mlxsw_thermal_module *tz = tzdev->devdata;
493 
494 	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
495 		return -EINVAL;
496 
497 	*p_type = tz->trips[trip].type;
498 	return 0;
499 }
500 
501 static int
502 mlxsw_thermal_module_trip_temp_get(struct thermal_zone_device *tzdev,
503 				   int trip, int *p_temp)
504 {
505 	struct mlxsw_thermal_module *tz = tzdev->devdata;
506 
507 	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
508 		return -EINVAL;
509 
510 	*p_temp = tz->trips[trip].temp;
511 	return 0;
512 }
513 
514 static int
515 mlxsw_thermal_module_trip_temp_set(struct thermal_zone_device *tzdev,
516 				   int trip, int temp)
517 {
518 	struct mlxsw_thermal_module *tz = tzdev->devdata;
519 
520 	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS ||
521 	    temp > tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp)
522 		return -EINVAL;
523 
524 	tz->trips[trip].temp = temp;
525 	return 0;
526 }
527 
528 static int
529 mlxsw_thermal_module_trip_hyst_get(struct thermal_zone_device *tzdev, int trip,
530 				   int *p_hyst)
531 {
532 	struct mlxsw_thermal_module *tz = tzdev->devdata;
533 
534 	*p_hyst = tz->trips[trip].hyst;
535 	return 0;
536 }
537 
538 static int
539 mlxsw_thermal_module_trip_hyst_set(struct thermal_zone_device *tzdev, int trip,
540 				   int hyst)
541 {
542 	struct mlxsw_thermal_module *tz = tzdev->devdata;
543 
544 	tz->trips[trip].hyst = hyst;
545 	return 0;
546 }
547 
548 static struct thermal_zone_params mlxsw_thermal_module_params = {
549 	.governor_name = "user_space",
550 };
551 
552 static struct thermal_zone_device_ops mlxsw_thermal_module_ops = {
553 	.bind		= mlxsw_thermal_module_bind,
554 	.unbind		= mlxsw_thermal_module_unbind,
555 	.get_mode	= mlxsw_thermal_module_mode_get,
556 	.set_mode	= mlxsw_thermal_module_mode_set,
557 	.get_temp	= mlxsw_thermal_module_temp_get,
558 	.get_trip_type	= mlxsw_thermal_module_trip_type_get,
559 	.get_trip_temp	= mlxsw_thermal_module_trip_temp_get,
560 	.set_trip_temp	= mlxsw_thermal_module_trip_temp_set,
561 	.get_trip_hyst	= mlxsw_thermal_module_trip_hyst_get,
562 	.set_trip_hyst	= mlxsw_thermal_module_trip_hyst_set,
563 };
564 
565 static int mlxsw_thermal_get_max_state(struct thermal_cooling_device *cdev,
566 				       unsigned long *p_state)
567 {
568 	*p_state = MLXSW_THERMAL_MAX_STATE;
569 	return 0;
570 }
571 
572 static int mlxsw_thermal_get_cur_state(struct thermal_cooling_device *cdev,
573 				       unsigned long *p_state)
574 
575 {
576 	struct mlxsw_thermal *thermal = cdev->devdata;
577 	struct device *dev = thermal->bus_info->dev;
578 	char mfsc_pl[MLXSW_REG_MFSC_LEN];
579 	int err, idx;
580 	u8 duty;
581 
582 	idx = mlxsw_get_cooling_device_idx(thermal, cdev);
583 	if (idx < 0)
584 		return idx;
585 
586 	mlxsw_reg_mfsc_pack(mfsc_pl, idx, 0);
587 	err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfsc), mfsc_pl);
588 	if (err) {
589 		dev_err(dev, "Failed to query PWM duty\n");
590 		return err;
591 	}
592 
593 	duty = mlxsw_reg_mfsc_pwm_duty_cycle_get(mfsc_pl);
594 	*p_state = mlxsw_duty_to_state(duty);
595 	return 0;
596 }
597 
598 static int mlxsw_thermal_set_cur_state(struct thermal_cooling_device *cdev,
599 				       unsigned long state)
600 
601 {
602 	struct mlxsw_thermal *thermal = cdev->devdata;
603 	struct device *dev = thermal->bus_info->dev;
604 	char mfsc_pl[MLXSW_REG_MFSC_LEN];
605 	unsigned long cur_state, i;
606 	int idx;
607 	u8 duty;
608 	int err;
609 
610 	idx = mlxsw_get_cooling_device_idx(thermal, cdev);
611 	if (idx < 0)
612 		return idx;
613 
614 	/* Verify if this request is for changing allowed fan dynamical
615 	 * minimum. If it is - update cooling levels accordingly and update
616 	 * state, if current state is below the newly requested minimum state.
617 	 * For example, if current state is 5, and minimal state is to be
618 	 * changed from 4 to 6, thermal->cooling_levels[0 to 5] will be changed
619 	 * all from 4 to 6. And state 5 (thermal->cooling_levels[4]) should be
620 	 * overwritten.
621 	 */
622 	if (state >= MLXSW_THERMAL_SPEED_MIN &&
623 	    state <= MLXSW_THERMAL_SPEED_MAX) {
624 		state -= MLXSW_THERMAL_MAX_STATE;
625 		for (i = 0; i <= MLXSW_THERMAL_MAX_STATE; i++)
626 			thermal->cooling_levels[i] = max(state, i);
627 
628 		mlxsw_reg_mfsc_pack(mfsc_pl, idx, 0);
629 		err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfsc), mfsc_pl);
630 		if (err)
631 			return err;
632 
633 		duty = mlxsw_reg_mfsc_pwm_duty_cycle_get(mfsc_pl);
634 		cur_state = mlxsw_duty_to_state(duty);
635 
636 		/* If current fan state is lower than requested dynamical
637 		 * minimum, increase fan speed up to dynamical minimum.
638 		 */
639 		if (state < cur_state)
640 			return 0;
641 
642 		state = cur_state;
643 	}
644 
645 	if (state > MLXSW_THERMAL_MAX_STATE)
646 		return -EINVAL;
647 
648 	/* Normalize the state to the valid speed range. */
649 	state = thermal->cooling_levels[state];
650 	mlxsw_reg_mfsc_pack(mfsc_pl, idx, mlxsw_state_to_duty(state));
651 	err = mlxsw_reg_write(thermal->core, MLXSW_REG(mfsc), mfsc_pl);
652 	if (err) {
653 		dev_err(dev, "Failed to write PWM duty\n");
654 		return err;
655 	}
656 	return 0;
657 }
658 
659 static const struct thermal_cooling_device_ops mlxsw_cooling_ops = {
660 	.get_max_state	= mlxsw_thermal_get_max_state,
661 	.get_cur_state	= mlxsw_thermal_get_cur_state,
662 	.set_cur_state	= mlxsw_thermal_set_cur_state,
663 };
664 
665 static int
666 mlxsw_thermal_module_tz_init(struct mlxsw_thermal_module *module_tz)
667 {
668 	char tz_name[MLXSW_THERMAL_ZONE_MAX_NAME];
669 	int err;
670 
671 	snprintf(tz_name, sizeof(tz_name), "mlxsw-module%d",
672 		 module_tz->module + 1);
673 	module_tz->tzdev = thermal_zone_device_register(tz_name,
674 							MLXSW_THERMAL_NUM_TRIPS,
675 							MLXSW_THERMAL_TRIP_MASK,
676 							module_tz,
677 							&mlxsw_thermal_module_ops,
678 							&mlxsw_thermal_module_params,
679 							0, 0);
680 	if (IS_ERR(module_tz->tzdev)) {
681 		err = PTR_ERR(module_tz->tzdev);
682 		return err;
683 	}
684 
685 	return 0;
686 }
687 
688 static void mlxsw_thermal_module_tz_fini(struct thermal_zone_device *tzdev)
689 {
690 	thermal_zone_device_unregister(tzdev);
691 }
692 
693 static int
694 mlxsw_thermal_module_init(struct device *dev, struct mlxsw_core *core,
695 			  struct mlxsw_thermal *thermal, u8 local_port)
696 {
697 	struct mlxsw_thermal_module *module_tz;
698 	char pmlp_pl[MLXSW_REG_PMLP_LEN];
699 	u8 width, module;
700 	int err;
701 
702 	mlxsw_reg_pmlp_pack(pmlp_pl, local_port);
703 	err = mlxsw_reg_query(core, MLXSW_REG(pmlp), pmlp_pl);
704 	if (err)
705 		return err;
706 
707 	width = mlxsw_reg_pmlp_width_get(pmlp_pl);
708 	if (!width)
709 		return 0;
710 
711 	module = mlxsw_reg_pmlp_module_get(pmlp_pl, 0);
712 	module_tz = &thermal->tz_module_arr[module];
713 	/* Skip if parent is already set (case of port split). */
714 	if (module_tz->parent)
715 		return 0;
716 	module_tz->module = module;
717 	module_tz->parent = thermal;
718 	memcpy(module_tz->trips, default_thermal_trips,
719 	       sizeof(thermal->trips));
720 	/* Initialize all trip point. */
721 	mlxsw_thermal_module_trips_reset(module_tz);
722 	/* Update trip point according to the module data. */
723 	return mlxsw_thermal_module_trips_update(dev, core, module_tz);
724 }
725 
726 static void mlxsw_thermal_module_fini(struct mlxsw_thermal_module *module_tz)
727 {
728 	if (module_tz && module_tz->tzdev) {
729 		mlxsw_thermal_module_tz_fini(module_tz->tzdev);
730 		module_tz->tzdev = NULL;
731 		module_tz->parent = NULL;
732 	}
733 }
734 
735 static int
736 mlxsw_thermal_modules_init(struct device *dev, struct mlxsw_core *core,
737 			   struct mlxsw_thermal *thermal)
738 {
739 	unsigned int module_count = mlxsw_core_max_ports(core);
740 	struct mlxsw_thermal_module *module_tz;
741 	int i, err;
742 
743 	if (!mlxsw_core_res_query_enabled(core))
744 		return 0;
745 
746 	thermal->tz_module_arr = kcalloc(module_count,
747 					 sizeof(*thermal->tz_module_arr),
748 					 GFP_KERNEL);
749 	if (!thermal->tz_module_arr)
750 		return -ENOMEM;
751 
752 	for (i = 1; i < module_count; i++) {
753 		err = mlxsw_thermal_module_init(dev, core, thermal, i);
754 		if (err)
755 			goto err_unreg_tz_module_arr;
756 	}
757 
758 	for (i = 0; i < module_count - 1; i++) {
759 		module_tz = &thermal->tz_module_arr[i];
760 		if (!module_tz->parent)
761 			continue;
762 		err = mlxsw_thermal_module_tz_init(module_tz);
763 		if (err)
764 			goto err_unreg_tz_module_arr;
765 	}
766 
767 	return 0;
768 
769 err_unreg_tz_module_arr:
770 	for (i = module_count - 1; i >= 0; i--)
771 		mlxsw_thermal_module_fini(&thermal->tz_module_arr[i]);
772 	kfree(thermal->tz_module_arr);
773 	return err;
774 }
775 
776 static void
777 mlxsw_thermal_modules_fini(struct mlxsw_thermal *thermal)
778 {
779 	unsigned int module_count = mlxsw_core_max_ports(thermal->core);
780 	int i;
781 
782 	if (!mlxsw_core_res_query_enabled(thermal->core))
783 		return;
784 
785 	for (i = module_count - 1; i >= 0; i--)
786 		mlxsw_thermal_module_fini(&thermal->tz_module_arr[i]);
787 	kfree(thermal->tz_module_arr);
788 }
789 
790 int mlxsw_thermal_init(struct mlxsw_core *core,
791 		       const struct mlxsw_bus_info *bus_info,
792 		       struct mlxsw_thermal **p_thermal)
793 {
794 	char mfcr_pl[MLXSW_REG_MFCR_LEN] = { 0 };
795 	enum mlxsw_reg_mfcr_pwm_frequency freq;
796 	struct device *dev = bus_info->dev;
797 	struct mlxsw_thermal *thermal;
798 	u16 tacho_active;
799 	u8 pwm_active;
800 	int err, i;
801 
802 	thermal = devm_kzalloc(dev, sizeof(*thermal),
803 			       GFP_KERNEL);
804 	if (!thermal)
805 		return -ENOMEM;
806 
807 	thermal->core = core;
808 	thermal->bus_info = bus_info;
809 	memcpy(thermal->trips, default_thermal_trips, sizeof(thermal->trips));
810 
811 	err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfcr), mfcr_pl);
812 	if (err) {
813 		dev_err(dev, "Failed to probe PWMs\n");
814 		goto err_free_thermal;
815 	}
816 	mlxsw_reg_mfcr_unpack(mfcr_pl, &freq, &tacho_active, &pwm_active);
817 
818 	for (i = 0; i < MLXSW_MFCR_TACHOS_MAX; i++) {
819 		if (tacho_active & BIT(i)) {
820 			char mfsl_pl[MLXSW_REG_MFSL_LEN];
821 
822 			mlxsw_reg_mfsl_pack(mfsl_pl, i, 0, 0);
823 
824 			/* We need to query the register to preserve maximum */
825 			err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfsl),
826 					      mfsl_pl);
827 			if (err)
828 				goto err_free_thermal;
829 
830 			/* set the minimal RPMs to 0 */
831 			mlxsw_reg_mfsl_tach_min_set(mfsl_pl, 0);
832 			err = mlxsw_reg_write(thermal->core, MLXSW_REG(mfsl),
833 					      mfsl_pl);
834 			if (err)
835 				goto err_free_thermal;
836 		}
837 	}
838 	for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++) {
839 		if (pwm_active & BIT(i)) {
840 			struct thermal_cooling_device *cdev;
841 
842 			cdev = thermal_cooling_device_register("mlxsw_fan",
843 							       thermal,
844 							       &mlxsw_cooling_ops);
845 			if (IS_ERR(cdev)) {
846 				err = PTR_ERR(cdev);
847 				dev_err(dev, "Failed to register cooling device\n");
848 				goto err_unreg_cdevs;
849 			}
850 			thermal->cdevs[i] = cdev;
851 		}
852 	}
853 
854 	/* Initialize cooling levels per PWM state. */
855 	for (i = 0; i < MLXSW_THERMAL_MAX_STATE; i++)
856 		thermal->cooling_levels[i] = max(MLXSW_THERMAL_SPEED_MIN_LEVEL,
857 						 i);
858 
859 	thermal->polling_delay = bus_info->low_frequency ?
860 				 MLXSW_THERMAL_SLOW_POLL_INT :
861 				 MLXSW_THERMAL_POLL_INT;
862 
863 	thermal->tzdev = thermal_zone_device_register("mlxsw",
864 						      MLXSW_THERMAL_NUM_TRIPS,
865 						      MLXSW_THERMAL_TRIP_MASK,
866 						      thermal,
867 						      &mlxsw_thermal_ops,
868 						      NULL, 0,
869 						      thermal->polling_delay);
870 	if (IS_ERR(thermal->tzdev)) {
871 		err = PTR_ERR(thermal->tzdev);
872 		dev_err(dev, "Failed to register thermal zone\n");
873 		goto err_unreg_cdevs;
874 	}
875 
876 	err = mlxsw_thermal_modules_init(dev, core, thermal);
877 	if (err)
878 		goto err_unreg_tzdev;
879 
880 	thermal->mode = THERMAL_DEVICE_ENABLED;
881 	*p_thermal = thermal;
882 	return 0;
883 
884 err_unreg_tzdev:
885 	if (thermal->tzdev) {
886 		thermal_zone_device_unregister(thermal->tzdev);
887 		thermal->tzdev = NULL;
888 	}
889 err_unreg_cdevs:
890 	for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++)
891 		if (thermal->cdevs[i])
892 			thermal_cooling_device_unregister(thermal->cdevs[i]);
893 err_free_thermal:
894 	devm_kfree(dev, thermal);
895 	return err;
896 }
897 
898 void mlxsw_thermal_fini(struct mlxsw_thermal *thermal)
899 {
900 	int i;
901 
902 	mlxsw_thermal_modules_fini(thermal);
903 	if (thermal->tzdev) {
904 		thermal_zone_device_unregister(thermal->tzdev);
905 		thermal->tzdev = NULL;
906 	}
907 
908 	for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++) {
909 		if (thermal->cdevs[i]) {
910 			thermal_cooling_device_unregister(thermal->cdevs[i]);
911 			thermal->cdevs[i] = NULL;
912 		}
913 	}
914 
915 	devm_kfree(thermal->bus_info->dev, thermal);
916 }
917