xref: /openbmc/linux/drivers/net/dsa/sja1105/sja1105_tas.c (revision a89aa749ece9c6fee7932163472d2ee0efd6ddd3)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2019, Vladimir Oltean <olteanv@gmail.com>
3  */
4 #include "sja1105.h"
5 
6 #define SJA1105_TAS_CLKSRC_DISABLED	0
7 #define SJA1105_TAS_CLKSRC_STANDALONE	1
8 #define SJA1105_TAS_CLKSRC_AS6802	2
9 #define SJA1105_TAS_CLKSRC_PTP		3
10 #define SJA1105_TAS_MAX_DELTA		BIT(19)
11 #define SJA1105_GATE_MASK		GENMASK_ULL(SJA1105_NUM_TC - 1, 0)
12 
13 #define work_to_sja1105_tas(d) \
14 	container_of((d), struct sja1105_tas_data, tas_work)
15 #define tas_to_sja1105(d) \
16 	container_of((d), struct sja1105_private, tas_data)
17 
18 /* This is not a preprocessor macro because the "ns" argument may or may not be
19  * s64 at caller side. This ensures it is properly type-cast before div_s64.
20  */
21 static s64 ns_to_sja1105_delta(s64 ns)
22 {
23 	return div_s64(ns, 200);
24 }
25 
26 static s64 sja1105_delta_to_ns(s64 delta)
27 {
28 	return delta * 200;
29 }
30 
31 static int sja1105_tas_set_runtime_params(struct sja1105_private *priv)
32 {
33 	struct sja1105_tas_data *tas_data = &priv->tas_data;
34 	struct dsa_switch *ds = priv->ds;
35 	s64 earliest_base_time = S64_MAX;
36 	s64 latest_base_time = 0;
37 	s64 its_cycle_time = 0;
38 	s64 max_cycle_time = 0;
39 	int port;
40 
41 	tas_data->enabled = false;
42 
43 	for (port = 0; port < SJA1105_NUM_PORTS; port++) {
44 		const struct tc_taprio_qopt_offload *offload;
45 
46 		offload = tas_data->offload[port];
47 		if (!offload)
48 			continue;
49 
50 		tas_data->enabled = true;
51 
52 		if (max_cycle_time < offload->cycle_time)
53 			max_cycle_time = offload->cycle_time;
54 		if (latest_base_time < offload->base_time)
55 			latest_base_time = offload->base_time;
56 		if (earliest_base_time > offload->base_time) {
57 			earliest_base_time = offload->base_time;
58 			its_cycle_time = offload->cycle_time;
59 		}
60 	}
61 
62 	if (!tas_data->enabled)
63 		return 0;
64 
65 	/* Roll the earliest base time over until it is in a comparable
66 	 * time base with the latest, then compare their deltas.
67 	 * We want to enforce that all ports' base times are within
68 	 * SJA1105_TAS_MAX_DELTA 200ns cycles of one another.
69 	 */
70 	earliest_base_time = future_base_time(earliest_base_time,
71 					      its_cycle_time,
72 					      latest_base_time);
73 	while (earliest_base_time > latest_base_time)
74 		earliest_base_time -= its_cycle_time;
75 	if (latest_base_time - earliest_base_time >
76 	    sja1105_delta_to_ns(SJA1105_TAS_MAX_DELTA)) {
77 		dev_err(ds->dev,
78 			"Base times too far apart: min %llu max %llu\n",
79 			earliest_base_time, latest_base_time);
80 		return -ERANGE;
81 	}
82 
83 	tas_data->earliest_base_time = earliest_base_time;
84 	tas_data->max_cycle_time = max_cycle_time;
85 
86 	dev_dbg(ds->dev, "earliest base time %lld ns\n", earliest_base_time);
87 	dev_dbg(ds->dev, "latest base time %lld ns\n", latest_base_time);
88 	dev_dbg(ds->dev, "longest cycle time %lld ns\n", max_cycle_time);
89 
90 	return 0;
91 }
92 
93 /* Lo and behold: the egress scheduler from hell.
94  *
95  * At the hardware level, the Time-Aware Shaper holds a global linear arrray of
96  * all schedule entries for all ports. These are the Gate Control List (GCL)
97  * entries, let's call them "timeslots" for short. This linear array of
98  * timeslots is held in BLK_IDX_SCHEDULE.
99  *
100  * Then there are a maximum of 8 "execution threads" inside the switch, which
101  * iterate cyclically through the "schedule". Each "cycle" has an entry point
102  * and an exit point, both being timeslot indices in the schedule table. The
103  * hardware calls each cycle a "subschedule".
104  *
105  * Subschedule (cycle) i starts when
106  *   ptpclkval >= ptpschtm + BLK_IDX_SCHEDULE_ENTRY_POINTS[i].delta.
107  *
108  * The hardware scheduler iterates BLK_IDX_SCHEDULE with a k ranging from
109  *   k = BLK_IDX_SCHEDULE_ENTRY_POINTS[i].address to
110  *   k = BLK_IDX_SCHEDULE_PARAMS.subscheind[i]
111  *
112  * For each schedule entry (timeslot) k, the engine executes the gate control
113  * list entry for the duration of BLK_IDX_SCHEDULE[k].delta.
114  *
115  *         +---------+
116  *         |         | BLK_IDX_SCHEDULE_ENTRY_POINTS_PARAMS
117  *         +---------+
118  *              |
119  *              +-----------------+
120  *                                | .actsubsch
121  *  BLK_IDX_SCHEDULE_ENTRY_POINTS v
122  *                 +-------+-------+
123  *                 |cycle 0|cycle 1|
124  *                 +-------+-------+
125  *                   |  |      |  |
126  *  +----------------+  |      |  +-------------------------------------+
127  *  |   .subschindx     |      |             .subschindx                |
128  *  |                   |      +---------------+                        |
129  *  |          .address |        .address      |                        |
130  *  |                   |                      |                        |
131  *  |                   |                      |                        |
132  *  |  BLK_IDX_SCHEDULE v                      v                        |
133  *  |              +-------+-------+-------+-------+-------+------+     |
134  *  |              |entry 0|entry 1|entry 2|entry 3|entry 4|entry5|     |
135  *  |              +-------+-------+-------+-------+-------+------+     |
136  *  |                                  ^                    ^  ^  ^     |
137  *  |                                  |                    |  |  |     |
138  *  |        +-------------------------+                    |  |  |     |
139  *  |        |              +-------------------------------+  |  |     |
140  *  |        |              |              +-------------------+  |     |
141  *  |        |              |              |                      |     |
142  *  | +---------------------------------------------------------------+ |
143  *  | |subscheind[0]<=subscheind[1]<=subscheind[2]<=...<=subscheind[7]| |
144  *  | +---------------------------------------------------------------+ |
145  *  |        ^              ^                BLK_IDX_SCHEDULE_PARAMS    |
146  *  |        |              |                                           |
147  *  +--------+              +-------------------------------------------+
148  *
149  *  In the above picture there are two subschedules (cycles):
150  *
151  *  - cycle 0: iterates the schedule table from 0 to 2 (and back)
152  *  - cycle 1: iterates the schedule table from 3 to 5 (and back)
153  *
154  *  All other possible execution threads must be marked as unused by making
155  *  their "subschedule end index" (subscheind) equal to the last valid
156  *  subschedule's end index (in this case 5).
157  */
158 static int sja1105_init_scheduling(struct sja1105_private *priv)
159 {
160 	struct sja1105_schedule_entry_points_entry *schedule_entry_points;
161 	struct sja1105_schedule_entry_points_params_entry
162 					*schedule_entry_points_params;
163 	struct sja1105_schedule_params_entry *schedule_params;
164 	struct sja1105_tas_data *tas_data = &priv->tas_data;
165 	struct sja1105_schedule_entry *schedule;
166 	struct sja1105_table *table;
167 	int schedule_start_idx;
168 	s64 entry_point_delta;
169 	int schedule_end_idx;
170 	int num_entries = 0;
171 	int num_cycles = 0;
172 	int cycle = 0;
173 	int i, k = 0;
174 	int port, rc;
175 
176 	rc = sja1105_tas_set_runtime_params(priv);
177 	if (rc < 0)
178 		return rc;
179 
180 	/* Discard previous Schedule Table */
181 	table = &priv->static_config.tables[BLK_IDX_SCHEDULE];
182 	if (table->entry_count) {
183 		kfree(table->entries);
184 		table->entry_count = 0;
185 	}
186 
187 	/* Discard previous Schedule Entry Points Parameters Table */
188 	table = &priv->static_config.tables[BLK_IDX_SCHEDULE_ENTRY_POINTS_PARAMS];
189 	if (table->entry_count) {
190 		kfree(table->entries);
191 		table->entry_count = 0;
192 	}
193 
194 	/* Discard previous Schedule Parameters Table */
195 	table = &priv->static_config.tables[BLK_IDX_SCHEDULE_PARAMS];
196 	if (table->entry_count) {
197 		kfree(table->entries);
198 		table->entry_count = 0;
199 	}
200 
201 	/* Discard previous Schedule Entry Points Table */
202 	table = &priv->static_config.tables[BLK_IDX_SCHEDULE_ENTRY_POINTS];
203 	if (table->entry_count) {
204 		kfree(table->entries);
205 		table->entry_count = 0;
206 	}
207 
208 	/* Figure out the dimensioning of the problem */
209 	for (port = 0; port < SJA1105_NUM_PORTS; port++) {
210 		if (tas_data->offload[port]) {
211 			num_entries += tas_data->offload[port]->num_entries;
212 			num_cycles++;
213 		}
214 	}
215 
216 	/* Nothing to do */
217 	if (!num_cycles)
218 		return 0;
219 
220 	/* Pre-allocate space in the static config tables */
221 
222 	/* Schedule Table */
223 	table = &priv->static_config.tables[BLK_IDX_SCHEDULE];
224 	table->entries = kcalloc(num_entries, table->ops->unpacked_entry_size,
225 				 GFP_KERNEL);
226 	if (!table->entries)
227 		return -ENOMEM;
228 	table->entry_count = num_entries;
229 	schedule = table->entries;
230 
231 	/* Schedule Points Parameters Table */
232 	table = &priv->static_config.tables[BLK_IDX_SCHEDULE_ENTRY_POINTS_PARAMS];
233 	table->entries = kcalloc(SJA1105_MAX_SCHEDULE_ENTRY_POINTS_PARAMS_COUNT,
234 				 table->ops->unpacked_entry_size, GFP_KERNEL);
235 	if (!table->entries)
236 		/* Previously allocated memory will be freed automatically in
237 		 * sja1105_static_config_free. This is true for all early
238 		 * returns below.
239 		 */
240 		return -ENOMEM;
241 	table->entry_count = SJA1105_MAX_SCHEDULE_ENTRY_POINTS_PARAMS_COUNT;
242 	schedule_entry_points_params = table->entries;
243 
244 	/* Schedule Parameters Table */
245 	table = &priv->static_config.tables[BLK_IDX_SCHEDULE_PARAMS];
246 	table->entries = kcalloc(SJA1105_MAX_SCHEDULE_PARAMS_COUNT,
247 				 table->ops->unpacked_entry_size, GFP_KERNEL);
248 	if (!table->entries)
249 		return -ENOMEM;
250 	table->entry_count = SJA1105_MAX_SCHEDULE_PARAMS_COUNT;
251 	schedule_params = table->entries;
252 
253 	/* Schedule Entry Points Table */
254 	table = &priv->static_config.tables[BLK_IDX_SCHEDULE_ENTRY_POINTS];
255 	table->entries = kcalloc(num_cycles, table->ops->unpacked_entry_size,
256 				 GFP_KERNEL);
257 	if (!table->entries)
258 		return -ENOMEM;
259 	table->entry_count = num_cycles;
260 	schedule_entry_points = table->entries;
261 
262 	/* Finally start populating the static config tables */
263 	schedule_entry_points_params->clksrc = SJA1105_TAS_CLKSRC_PTP;
264 	schedule_entry_points_params->actsubsch = num_cycles - 1;
265 
266 	for (port = 0; port < SJA1105_NUM_PORTS; port++) {
267 		const struct tc_taprio_qopt_offload *offload;
268 		/* Relative base time */
269 		s64 rbt;
270 
271 		offload = tas_data->offload[port];
272 		if (!offload)
273 			continue;
274 
275 		schedule_start_idx = k;
276 		schedule_end_idx = k + offload->num_entries - 1;
277 		/* This is the base time expressed as a number of TAS ticks
278 		 * relative to PTPSCHTM, which we'll (perhaps improperly) call
279 		 * the operational base time.
280 		 */
281 		rbt = future_base_time(offload->base_time,
282 				       offload->cycle_time,
283 				       tas_data->earliest_base_time);
284 		rbt -= tas_data->earliest_base_time;
285 		/* UM10944.pdf 4.2.2. Schedule Entry Points table says that
286 		 * delta cannot be zero, which is shitty. Advance all relative
287 		 * base times by 1 TAS delta, so that even the earliest base
288 		 * time becomes 1 in relative terms. Then start the operational
289 		 * base time (PTPSCHTM) one TAS delta earlier than planned.
290 		 */
291 		entry_point_delta = ns_to_sja1105_delta(rbt) + 1;
292 
293 		schedule_entry_points[cycle].subschindx = cycle;
294 		schedule_entry_points[cycle].delta = entry_point_delta;
295 		schedule_entry_points[cycle].address = schedule_start_idx;
296 
297 		/* The subschedule end indices need to be
298 		 * monotonically increasing.
299 		 */
300 		for (i = cycle; i < 8; i++)
301 			schedule_params->subscheind[i] = schedule_end_idx;
302 
303 		for (i = 0; i < offload->num_entries; i++, k++) {
304 			s64 delta_ns = offload->entries[i].interval;
305 
306 			schedule[k].delta = ns_to_sja1105_delta(delta_ns);
307 			schedule[k].destports = BIT(port);
308 			schedule[k].resmedia_en = true;
309 			schedule[k].resmedia = SJA1105_GATE_MASK &
310 					~offload->entries[i].gate_mask;
311 		}
312 		cycle++;
313 	}
314 
315 	return 0;
316 }
317 
318 /* Be there 2 port subschedules, each executing an arbitrary number of gate
319  * open/close events cyclically.
320  * None of those gate events must ever occur at the exact same time, otherwise
321  * the switch is known to act in exotically strange ways.
322  * However the hardware doesn't bother performing these integrity checks.
323  * So here we are with the task of validating whether the new @admin offload
324  * has any conflict with the already established TAS configuration in
325  * tas_data->offload.  We already know the other ports are in harmony with one
326  * another, otherwise we wouldn't have saved them.
327  * Each gate event executes periodically, with a period of @cycle_time and a
328  * phase given by its cycle's @base_time plus its offset within the cycle
329  * (which in turn is given by the length of the events prior to it).
330  * There are two aspects to possible collisions:
331  * - Collisions within one cycle's (actually the longest cycle's) time frame.
332  *   For that, we need to compare the cartesian product of each possible
333  *   occurrence of each event within one cycle time.
334  * - Collisions in the future. Events may not collide within one cycle time,
335  *   but if two port schedules don't have the same periodicity (aka the cycle
336  *   times aren't multiples of one another), they surely will some time in the
337  *   future (actually they will collide an infinite amount of times).
338  */
339 static bool
340 sja1105_tas_check_conflicts(struct sja1105_private *priv, int port,
341 			    const struct tc_taprio_qopt_offload *admin)
342 {
343 	struct sja1105_tas_data *tas_data = &priv->tas_data;
344 	const struct tc_taprio_qopt_offload *offload;
345 	s64 max_cycle_time, min_cycle_time;
346 	s64 delta1, delta2;
347 	s64 rbt1, rbt2;
348 	s64 stop_time;
349 	s64 t1, t2;
350 	int i, j;
351 	s32 rem;
352 
353 	offload = tas_data->offload[port];
354 	if (!offload)
355 		return false;
356 
357 	/* Check if the two cycle times are multiples of one another.
358 	 * If they aren't, then they will surely collide.
359 	 */
360 	max_cycle_time = max(offload->cycle_time, admin->cycle_time);
361 	min_cycle_time = min(offload->cycle_time, admin->cycle_time);
362 	div_s64_rem(max_cycle_time, min_cycle_time, &rem);
363 	if (rem)
364 		return true;
365 
366 	/* Calculate the "reduced" base time of each of the two cycles
367 	 * (transposed back as close to 0 as possible) by dividing to
368 	 * the cycle time.
369 	 */
370 	div_s64_rem(offload->base_time, offload->cycle_time, &rem);
371 	rbt1 = rem;
372 
373 	div_s64_rem(admin->base_time, admin->cycle_time, &rem);
374 	rbt2 = rem;
375 
376 	stop_time = max_cycle_time + max(rbt1, rbt2);
377 
378 	/* delta1 is the relative base time of each GCL entry within
379 	 * the established ports' TAS config.
380 	 */
381 	for (i = 0, delta1 = 0;
382 	     i < offload->num_entries;
383 	     delta1 += offload->entries[i].interval, i++) {
384 		/* delta2 is the relative base time of each GCL entry
385 		 * within the newly added TAS config.
386 		 */
387 		for (j = 0, delta2 = 0;
388 		     j < admin->num_entries;
389 		     delta2 += admin->entries[j].interval, j++) {
390 			/* t1 follows all possible occurrences of the
391 			 * established ports' GCL entry i within the
392 			 * first cycle time.
393 			 */
394 			for (t1 = rbt1 + delta1;
395 			     t1 <= stop_time;
396 			     t1 += offload->cycle_time) {
397 				/* t2 follows all possible occurrences
398 				 * of the newly added GCL entry j
399 				 * within the first cycle time.
400 				 */
401 				for (t2 = rbt2 + delta2;
402 				     t2 <= stop_time;
403 				     t2 += admin->cycle_time) {
404 					if (t1 == t2) {
405 						dev_warn(priv->ds->dev,
406 							 "GCL entry %d collides with entry %d of port %d\n",
407 							 j, i, port);
408 						return true;
409 					}
410 				}
411 			}
412 		}
413 	}
414 
415 	return false;
416 }
417 
418 int sja1105_setup_tc_taprio(struct dsa_switch *ds, int port,
419 			    struct tc_taprio_qopt_offload *admin)
420 {
421 	struct sja1105_private *priv = ds->priv;
422 	struct sja1105_tas_data *tas_data = &priv->tas_data;
423 	int other_port, rc, i;
424 
425 	/* Can't change an already configured port (must delete qdisc first).
426 	 * Can't delete the qdisc from an unconfigured port.
427 	 */
428 	if (!!tas_data->offload[port] == admin->enable)
429 		return -EINVAL;
430 
431 	if (!admin->enable) {
432 		taprio_offload_free(tas_data->offload[port]);
433 		tas_data->offload[port] = NULL;
434 
435 		rc = sja1105_init_scheduling(priv);
436 		if (rc < 0)
437 			return rc;
438 
439 		return sja1105_static_config_reload(priv, SJA1105_SCHEDULING);
440 	}
441 
442 	/* The cycle time extension is the amount of time the last cycle from
443 	 * the old OPER needs to be extended in order to phase-align with the
444 	 * base time of the ADMIN when that becomes the new OPER.
445 	 * But of course our switch needs to be reset to switch-over between
446 	 * the ADMIN and the OPER configs - so much for a seamless transition.
447 	 * So don't add insult over injury and just say we don't support cycle
448 	 * time extension.
449 	 */
450 	if (admin->cycle_time_extension)
451 		return -ENOTSUPP;
452 
453 	for (i = 0; i < admin->num_entries; i++) {
454 		s64 delta_ns = admin->entries[i].interval;
455 		s64 delta_cycles = ns_to_sja1105_delta(delta_ns);
456 		bool too_long, too_short;
457 
458 		too_long = (delta_cycles >= SJA1105_TAS_MAX_DELTA);
459 		too_short = (delta_cycles == 0);
460 		if (too_long || too_short) {
461 			dev_err(priv->ds->dev,
462 				"Interval %llu too %s for GCL entry %d\n",
463 				delta_ns, too_long ? "long" : "short", i);
464 			return -ERANGE;
465 		}
466 	}
467 
468 	for (other_port = 0; other_port < SJA1105_NUM_PORTS; other_port++) {
469 		if (other_port == port)
470 			continue;
471 
472 		if (sja1105_tas_check_conflicts(priv, other_port, admin))
473 			return -ERANGE;
474 	}
475 
476 	tas_data->offload[port] = taprio_offload_get(admin);
477 
478 	rc = sja1105_init_scheduling(priv);
479 	if (rc < 0)
480 		return rc;
481 
482 	return sja1105_static_config_reload(priv, SJA1105_SCHEDULING);
483 }
484 
485 static int sja1105_tas_check_running(struct sja1105_private *priv)
486 {
487 	struct sja1105_tas_data *tas_data = &priv->tas_data;
488 	struct dsa_switch *ds = priv->ds;
489 	struct sja1105_ptp_cmd cmd = {0};
490 	int rc;
491 
492 	rc = sja1105_ptp_commit(ds, &cmd, SPI_READ);
493 	if (rc < 0)
494 		return rc;
495 
496 	if (cmd.ptpstrtsch == 1)
497 		/* Schedule successfully started */
498 		tas_data->state = SJA1105_TAS_STATE_RUNNING;
499 	else if (cmd.ptpstopsch == 1)
500 		/* Schedule is stopped */
501 		tas_data->state = SJA1105_TAS_STATE_DISABLED;
502 	else
503 		/* Schedule is probably not configured with PTP clock source */
504 		rc = -EINVAL;
505 
506 	return rc;
507 }
508 
509 /* Write to PTPCLKCORP */
510 static int sja1105_tas_adjust_drift(struct sja1105_private *priv,
511 				    u64 correction)
512 {
513 	const struct sja1105_regs *regs = priv->info->regs;
514 	u32 ptpclkcorp = ns_to_sja1105_ticks(correction);
515 
516 	return sja1105_xfer_u32(priv, SPI_WRITE, regs->ptpclkcorp,
517 				&ptpclkcorp, NULL);
518 }
519 
520 /* Write to PTPSCHTM */
521 static int sja1105_tas_set_base_time(struct sja1105_private *priv,
522 				     u64 base_time)
523 {
524 	const struct sja1105_regs *regs = priv->info->regs;
525 	u64 ptpschtm = ns_to_sja1105_ticks(base_time);
526 
527 	return sja1105_xfer_u64(priv, SPI_WRITE, regs->ptpschtm,
528 				&ptpschtm, NULL);
529 }
530 
531 static int sja1105_tas_start(struct sja1105_private *priv)
532 {
533 	struct sja1105_tas_data *tas_data = &priv->tas_data;
534 	struct sja1105_ptp_cmd *cmd = &priv->ptp_data.cmd;
535 	struct dsa_switch *ds = priv->ds;
536 	int rc;
537 
538 	dev_dbg(ds->dev, "Starting the TAS\n");
539 
540 	if (tas_data->state == SJA1105_TAS_STATE_ENABLED_NOT_RUNNING ||
541 	    tas_data->state == SJA1105_TAS_STATE_RUNNING) {
542 		dev_err(ds->dev, "TAS already started\n");
543 		return -EINVAL;
544 	}
545 
546 	cmd->ptpstrtsch = 1;
547 	cmd->ptpstopsch = 0;
548 
549 	rc = sja1105_ptp_commit(ds, cmd, SPI_WRITE);
550 	if (rc < 0)
551 		return rc;
552 
553 	tas_data->state = SJA1105_TAS_STATE_ENABLED_NOT_RUNNING;
554 
555 	return 0;
556 }
557 
558 static int sja1105_tas_stop(struct sja1105_private *priv)
559 {
560 	struct sja1105_tas_data *tas_data = &priv->tas_data;
561 	struct sja1105_ptp_cmd *cmd = &priv->ptp_data.cmd;
562 	struct dsa_switch *ds = priv->ds;
563 	int rc;
564 
565 	dev_dbg(ds->dev, "Stopping the TAS\n");
566 
567 	if (tas_data->state == SJA1105_TAS_STATE_DISABLED) {
568 		dev_err(ds->dev, "TAS already disabled\n");
569 		return -EINVAL;
570 	}
571 
572 	cmd->ptpstopsch = 1;
573 	cmd->ptpstrtsch = 0;
574 
575 	rc = sja1105_ptp_commit(ds, cmd, SPI_WRITE);
576 	if (rc < 0)
577 		return rc;
578 
579 	tas_data->state = SJA1105_TAS_STATE_DISABLED;
580 
581 	return 0;
582 }
583 
584 /* The schedule engine and the PTP clock are driven by the same oscillator, and
585  * they run in parallel. But whilst the PTP clock can keep an absolute
586  * time-of-day, the schedule engine is only running in 'ticks' (25 ticks make
587  * up a delta, which is 200ns), and wrapping around at the end of each cycle.
588  * The schedule engine is started when the PTP clock reaches the PTPSCHTM time
589  * (in PTP domain).
590  * Because the PTP clock can be rate-corrected (accelerated or slowed down) by
591  * a software servo, and the schedule engine clock runs in parallel to the PTP
592  * clock, there is logic internal to the switch that periodically keeps the
593  * schedule engine from drifting away. The frequency with which this internal
594  * syntonization happens is the PTP clock correction period (PTPCLKCORP). It is
595  * a value also in the PTP clock domain, and is also rate-corrected.
596  * To be precise, during a correction period, there is logic to determine by
597  * how many scheduler clock ticks has the PTP clock drifted. At the end of each
598  * correction period/beginning of new one, the length of a delta is shrunk or
599  * expanded with an integer number of ticks, compared with the typical 25.
600  * So a delta lasts for 200ns (or 25 ticks) only on average.
601  * Sometimes it is longer, sometimes it is shorter. The internal syntonization
602  * logic can adjust for at most 5 ticks each 20 ticks.
603  *
604  * The first implication is that you should choose your schedule correction
605  * period to be an integer multiple of the schedule length. Preferably one.
606  * In case there are schedules of multiple ports active, then the correction
607  * period needs to be a multiple of them all. Given the restriction that the
608  * cycle times have to be multiples of one another anyway, this means the
609  * correction period can simply be the largest cycle time, hence the current
610  * choice. This way, the updates are always synchronous to the transmission
611  * cycle, and therefore predictable.
612  *
613  * The second implication is that at the beginning of a correction period, the
614  * first few deltas will be modulated in time, until the schedule engine is
615  * properly phase-aligned with the PTP clock. For this reason, you should place
616  * your best-effort traffic at the beginning of a cycle, and your
617  * time-triggered traffic afterwards.
618  *
619  * The third implication is that once the schedule engine is started, it can
620  * only adjust for so much drift within a correction period. In the servo you
621  * can only change the PTPCLKRATE, but not step the clock (PTPCLKADD). If you
622  * want to do the latter, you need to stop and restart the schedule engine,
623  * which is what the state machine handles.
624  */
625 static void sja1105_tas_state_machine(struct work_struct *work)
626 {
627 	struct sja1105_tas_data *tas_data = work_to_sja1105_tas(work);
628 	struct sja1105_private *priv = tas_to_sja1105(tas_data);
629 	struct sja1105_ptp_data *ptp_data = &priv->ptp_data;
630 	struct timespec64 base_time_ts, now_ts;
631 	struct dsa_switch *ds = priv->ds;
632 	struct timespec64 diff;
633 	s64 base_time, now;
634 	int rc = 0;
635 
636 	mutex_lock(&ptp_data->lock);
637 
638 	switch (tas_data->state) {
639 	case SJA1105_TAS_STATE_DISABLED:
640 		/* Can't do anything at all if clock is still being stepped */
641 		if (tas_data->last_op != SJA1105_PTP_ADJUSTFREQ)
642 			break;
643 
644 		rc = sja1105_tas_adjust_drift(priv, tas_data->max_cycle_time);
645 		if (rc < 0)
646 			break;
647 
648 		rc = __sja1105_ptp_gettimex(ds, &now, NULL);
649 		if (rc < 0)
650 			break;
651 
652 		/* Plan to start the earliest schedule first. The others
653 		 * will be started in hardware, by way of their respective
654 		 * entry points delta.
655 		 * Try our best to avoid fringe cases (race condition between
656 		 * ptpschtm and ptpstrtsch) by pushing the oper_base_time at
657 		 * least one second in the future from now. This is not ideal,
658 		 * but this only needs to buy us time until the
659 		 * sja1105_tas_start command below gets executed.
660 		 */
661 		base_time = future_base_time(tas_data->earliest_base_time,
662 					     tas_data->max_cycle_time,
663 					     now + 1ull * NSEC_PER_SEC);
664 		base_time -= sja1105_delta_to_ns(1);
665 
666 		rc = sja1105_tas_set_base_time(priv, base_time);
667 		if (rc < 0)
668 			break;
669 
670 		tas_data->oper_base_time = base_time;
671 
672 		rc = sja1105_tas_start(priv);
673 		if (rc < 0)
674 			break;
675 
676 		base_time_ts = ns_to_timespec64(base_time);
677 		now_ts = ns_to_timespec64(now);
678 
679 		dev_dbg(ds->dev, "OPER base time %lld.%09ld (now %lld.%09ld)\n",
680 			base_time_ts.tv_sec, base_time_ts.tv_nsec,
681 			now_ts.tv_sec, now_ts.tv_nsec);
682 
683 		break;
684 
685 	case SJA1105_TAS_STATE_ENABLED_NOT_RUNNING:
686 		if (tas_data->last_op != SJA1105_PTP_ADJUSTFREQ) {
687 			/* Clock was stepped.. bad news for TAS */
688 			sja1105_tas_stop(priv);
689 			break;
690 		}
691 
692 		/* Check if TAS has actually started, by comparing the
693 		 * scheduled start time with the SJA1105 PTP clock
694 		 */
695 		rc = __sja1105_ptp_gettimex(ds, &now, NULL);
696 		if (rc < 0)
697 			break;
698 
699 		if (now < tas_data->oper_base_time) {
700 			/* TAS has not started yet */
701 			diff = ns_to_timespec64(tas_data->oper_base_time - now);
702 			dev_dbg(ds->dev, "time to start: [%lld.%09ld]",
703 				diff.tv_sec, diff.tv_nsec);
704 			break;
705 		}
706 
707 		/* Time elapsed, what happened? */
708 		rc = sja1105_tas_check_running(priv);
709 		if (rc < 0)
710 			break;
711 
712 		if (tas_data->state != SJA1105_TAS_STATE_RUNNING)
713 			/* TAS has started */
714 			dev_err(ds->dev,
715 				"TAS not started despite time elapsed\n");
716 
717 		break;
718 
719 	case SJA1105_TAS_STATE_RUNNING:
720 		/* Clock was stepped.. bad news for TAS */
721 		if (tas_data->last_op != SJA1105_PTP_ADJUSTFREQ) {
722 			sja1105_tas_stop(priv);
723 			break;
724 		}
725 
726 		rc = sja1105_tas_check_running(priv);
727 		if (rc < 0)
728 			break;
729 
730 		if (tas_data->state != SJA1105_TAS_STATE_RUNNING)
731 			dev_err(ds->dev, "TAS surprisingly stopped\n");
732 
733 		break;
734 
735 	default:
736 		if (net_ratelimit())
737 			dev_err(ds->dev, "TAS in an invalid state (incorrect use of API)!\n");
738 	}
739 
740 	if (rc && net_ratelimit())
741 		dev_err(ds->dev, "An operation returned %d\n", rc);
742 
743 	mutex_unlock(&ptp_data->lock);
744 }
745 
746 void sja1105_tas_clockstep(struct dsa_switch *ds)
747 {
748 	struct sja1105_private *priv = ds->priv;
749 	struct sja1105_tas_data *tas_data = &priv->tas_data;
750 
751 	if (!tas_data->enabled)
752 		return;
753 
754 	tas_data->last_op = SJA1105_PTP_CLOCKSTEP;
755 	schedule_work(&tas_data->tas_work);
756 }
757 
758 void sja1105_tas_adjfreq(struct dsa_switch *ds)
759 {
760 	struct sja1105_private *priv = ds->priv;
761 	struct sja1105_tas_data *tas_data = &priv->tas_data;
762 
763 	if (!tas_data->enabled)
764 		return;
765 
766 	/* No reason to schedule the workqueue, nothing changed */
767 	if (tas_data->state == SJA1105_TAS_STATE_RUNNING)
768 		return;
769 
770 	tas_data->last_op = SJA1105_PTP_ADJUSTFREQ;
771 	schedule_work(&tas_data->tas_work);
772 }
773 
774 void sja1105_tas_setup(struct dsa_switch *ds)
775 {
776 	struct sja1105_private *priv = ds->priv;
777 	struct sja1105_tas_data *tas_data = &priv->tas_data;
778 
779 	INIT_WORK(&tas_data->tas_work, sja1105_tas_state_machine);
780 	tas_data->state = SJA1105_TAS_STATE_DISABLED;
781 	tas_data->last_op = SJA1105_PTP_NONE;
782 }
783 
784 void sja1105_tas_teardown(struct dsa_switch *ds)
785 {
786 	struct sja1105_private *priv = ds->priv;
787 	struct tc_taprio_qopt_offload *offload;
788 	int port;
789 
790 	cancel_work_sync(&priv->tas_data.tas_work);
791 
792 	for (port = 0; port < SJA1105_NUM_PORTS; port++) {
793 		offload = priv->tas_data.offload[port];
794 		if (!offload)
795 			continue;
796 
797 		taprio_offload_free(offload);
798 	}
799 }
800