xref: /openbmc/linux/drivers/net/dsa/sja1105/sja1105_tas.c (revision f8523d0e83613ab8d082cd504dc53a09fbba4889)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2019, Vladimir Oltean <olteanv@gmail.com>
3  */
4 #include "sja1105.h"
5 
6 #define SJA1105_TAS_CLKSRC_DISABLED	0
7 #define SJA1105_TAS_CLKSRC_STANDALONE	1
8 #define SJA1105_TAS_CLKSRC_AS6802	2
9 #define SJA1105_TAS_CLKSRC_PTP		3
10 #define SJA1105_GATE_MASK		GENMASK_ULL(SJA1105_NUM_TC - 1, 0)
11 
12 #define work_to_sja1105_tas(d) \
13 	container_of((d), struct sja1105_tas_data, tas_work)
14 #define tas_to_sja1105(d) \
15 	container_of((d), struct sja1105_private, tas_data)
16 
17 static int sja1105_tas_set_runtime_params(struct sja1105_private *priv)
18 {
19 	struct sja1105_tas_data *tas_data = &priv->tas_data;
20 	struct sja1105_gating_config *gating_cfg = &tas_data->gating_cfg;
21 	struct dsa_switch *ds = priv->ds;
22 	s64 earliest_base_time = S64_MAX;
23 	s64 latest_base_time = 0;
24 	s64 its_cycle_time = 0;
25 	s64 max_cycle_time = 0;
26 	int port;
27 
28 	tas_data->enabled = false;
29 
30 	for (port = 0; port < SJA1105_NUM_PORTS; port++) {
31 		const struct tc_taprio_qopt_offload *offload;
32 
33 		offload = tas_data->offload[port];
34 		if (!offload)
35 			continue;
36 
37 		tas_data->enabled = true;
38 
39 		if (max_cycle_time < offload->cycle_time)
40 			max_cycle_time = offload->cycle_time;
41 		if (latest_base_time < offload->base_time)
42 			latest_base_time = offload->base_time;
43 		if (earliest_base_time > offload->base_time) {
44 			earliest_base_time = offload->base_time;
45 			its_cycle_time = offload->cycle_time;
46 		}
47 	}
48 
49 	if (!list_empty(&gating_cfg->entries)) {
50 		tas_data->enabled = true;
51 
52 		if (max_cycle_time < gating_cfg->cycle_time)
53 			max_cycle_time = gating_cfg->cycle_time;
54 		if (latest_base_time < gating_cfg->base_time)
55 			latest_base_time = gating_cfg->base_time;
56 		if (earliest_base_time > gating_cfg->base_time) {
57 			earliest_base_time = gating_cfg->base_time;
58 			its_cycle_time = gating_cfg->cycle_time;
59 		}
60 	}
61 
62 	if (!tas_data->enabled)
63 		return 0;
64 
65 	/* Roll the earliest base time over until it is in a comparable
66 	 * time base with the latest, then compare their deltas.
67 	 * We want to enforce that all ports' base times are within
68 	 * SJA1105_TAS_MAX_DELTA 200ns cycles of one another.
69 	 */
70 	earliest_base_time = future_base_time(earliest_base_time,
71 					      its_cycle_time,
72 					      latest_base_time);
73 	while (earliest_base_time > latest_base_time)
74 		earliest_base_time -= its_cycle_time;
75 	if (latest_base_time - earliest_base_time >
76 	    sja1105_delta_to_ns(SJA1105_TAS_MAX_DELTA)) {
77 		dev_err(ds->dev,
78 			"Base times too far apart: min %llu max %llu\n",
79 			earliest_base_time, latest_base_time);
80 		return -ERANGE;
81 	}
82 
83 	tas_data->earliest_base_time = earliest_base_time;
84 	tas_data->max_cycle_time = max_cycle_time;
85 
86 	dev_dbg(ds->dev, "earliest base time %lld ns\n", earliest_base_time);
87 	dev_dbg(ds->dev, "latest base time %lld ns\n", latest_base_time);
88 	dev_dbg(ds->dev, "longest cycle time %lld ns\n", max_cycle_time);
89 
90 	return 0;
91 }
92 
93 /* Lo and behold: the egress scheduler from hell.
94  *
95  * At the hardware level, the Time-Aware Shaper holds a global linear arrray of
96  * all schedule entries for all ports. These are the Gate Control List (GCL)
97  * entries, let's call them "timeslots" for short. This linear array of
98  * timeslots is held in BLK_IDX_SCHEDULE.
99  *
100  * Then there are a maximum of 8 "execution threads" inside the switch, which
101  * iterate cyclically through the "schedule". Each "cycle" has an entry point
102  * and an exit point, both being timeslot indices in the schedule table. The
103  * hardware calls each cycle a "subschedule".
104  *
105  * Subschedule (cycle) i starts when
106  *   ptpclkval >= ptpschtm + BLK_IDX_SCHEDULE_ENTRY_POINTS[i].delta.
107  *
108  * The hardware scheduler iterates BLK_IDX_SCHEDULE with a k ranging from
109  *   k = BLK_IDX_SCHEDULE_ENTRY_POINTS[i].address to
110  *   k = BLK_IDX_SCHEDULE_PARAMS.subscheind[i]
111  *
112  * For each schedule entry (timeslot) k, the engine executes the gate control
113  * list entry for the duration of BLK_IDX_SCHEDULE[k].delta.
114  *
115  *         +---------+
116  *         |         | BLK_IDX_SCHEDULE_ENTRY_POINTS_PARAMS
117  *         +---------+
118  *              |
119  *              +-----------------+
120  *                                | .actsubsch
121  *  BLK_IDX_SCHEDULE_ENTRY_POINTS v
122  *                 +-------+-------+
123  *                 |cycle 0|cycle 1|
124  *                 +-------+-------+
125  *                   |  |      |  |
126  *  +----------------+  |      |  +-------------------------------------+
127  *  |   .subschindx     |      |             .subschindx                |
128  *  |                   |      +---------------+                        |
129  *  |          .address |        .address      |                        |
130  *  |                   |                      |                        |
131  *  |                   |                      |                        |
132  *  |  BLK_IDX_SCHEDULE v                      v                        |
133  *  |              +-------+-------+-------+-------+-------+------+     |
134  *  |              |entry 0|entry 1|entry 2|entry 3|entry 4|entry5|     |
135  *  |              +-------+-------+-------+-------+-------+------+     |
136  *  |                                  ^                    ^  ^  ^     |
137  *  |                                  |                    |  |  |     |
138  *  |        +-------------------------+                    |  |  |     |
139  *  |        |              +-------------------------------+  |  |     |
140  *  |        |              |              +-------------------+  |     |
141  *  |        |              |              |                      |     |
142  *  | +---------------------------------------------------------------+ |
143  *  | |subscheind[0]<=subscheind[1]<=subscheind[2]<=...<=subscheind[7]| |
144  *  | +---------------------------------------------------------------+ |
145  *  |        ^              ^                BLK_IDX_SCHEDULE_PARAMS    |
146  *  |        |              |                                           |
147  *  +--------+              +-------------------------------------------+
148  *
149  *  In the above picture there are two subschedules (cycles):
150  *
151  *  - cycle 0: iterates the schedule table from 0 to 2 (and back)
152  *  - cycle 1: iterates the schedule table from 3 to 5 (and back)
153  *
154  *  All other possible execution threads must be marked as unused by making
155  *  their "subschedule end index" (subscheind) equal to the last valid
156  *  subschedule's end index (in this case 5).
157  */
158 int sja1105_init_scheduling(struct sja1105_private *priv)
159 {
160 	struct sja1105_schedule_entry_points_entry *schedule_entry_points;
161 	struct sja1105_schedule_entry_points_params_entry
162 					*schedule_entry_points_params;
163 	struct sja1105_schedule_params_entry *schedule_params;
164 	struct sja1105_tas_data *tas_data = &priv->tas_data;
165 	struct sja1105_gating_config *gating_cfg = &tas_data->gating_cfg;
166 	struct sja1105_schedule_entry *schedule;
167 	struct sja1105_table *table;
168 	int schedule_start_idx;
169 	s64 entry_point_delta;
170 	int schedule_end_idx;
171 	int num_entries = 0;
172 	int num_cycles = 0;
173 	int cycle = 0;
174 	int i, k = 0;
175 	int port, rc;
176 
177 	rc = sja1105_tas_set_runtime_params(priv);
178 	if (rc < 0)
179 		return rc;
180 
181 	/* Discard previous Schedule Table */
182 	table = &priv->static_config.tables[BLK_IDX_SCHEDULE];
183 	if (table->entry_count) {
184 		kfree(table->entries);
185 		table->entry_count = 0;
186 	}
187 
188 	/* Discard previous Schedule Entry Points Parameters Table */
189 	table = &priv->static_config.tables[BLK_IDX_SCHEDULE_ENTRY_POINTS_PARAMS];
190 	if (table->entry_count) {
191 		kfree(table->entries);
192 		table->entry_count = 0;
193 	}
194 
195 	/* Discard previous Schedule Parameters Table */
196 	table = &priv->static_config.tables[BLK_IDX_SCHEDULE_PARAMS];
197 	if (table->entry_count) {
198 		kfree(table->entries);
199 		table->entry_count = 0;
200 	}
201 
202 	/* Discard previous Schedule Entry Points Table */
203 	table = &priv->static_config.tables[BLK_IDX_SCHEDULE_ENTRY_POINTS];
204 	if (table->entry_count) {
205 		kfree(table->entries);
206 		table->entry_count = 0;
207 	}
208 
209 	/* Figure out the dimensioning of the problem */
210 	for (port = 0; port < SJA1105_NUM_PORTS; port++) {
211 		if (tas_data->offload[port]) {
212 			num_entries += tas_data->offload[port]->num_entries;
213 			num_cycles++;
214 		}
215 	}
216 
217 	if (!list_empty(&gating_cfg->entries)) {
218 		num_entries += gating_cfg->num_entries;
219 		num_cycles++;
220 	}
221 
222 	/* Nothing to do */
223 	if (!num_cycles)
224 		return 0;
225 
226 	/* Pre-allocate space in the static config tables */
227 
228 	/* Schedule Table */
229 	table = &priv->static_config.tables[BLK_IDX_SCHEDULE];
230 	table->entries = kcalloc(num_entries, table->ops->unpacked_entry_size,
231 				 GFP_KERNEL);
232 	if (!table->entries)
233 		return -ENOMEM;
234 	table->entry_count = num_entries;
235 	schedule = table->entries;
236 
237 	/* Schedule Points Parameters Table */
238 	table = &priv->static_config.tables[BLK_IDX_SCHEDULE_ENTRY_POINTS_PARAMS];
239 	table->entries = kcalloc(SJA1105_MAX_SCHEDULE_ENTRY_POINTS_PARAMS_COUNT,
240 				 table->ops->unpacked_entry_size, GFP_KERNEL);
241 	if (!table->entries)
242 		/* Previously allocated memory will be freed automatically in
243 		 * sja1105_static_config_free. This is true for all early
244 		 * returns below.
245 		 */
246 		return -ENOMEM;
247 	table->entry_count = SJA1105_MAX_SCHEDULE_ENTRY_POINTS_PARAMS_COUNT;
248 	schedule_entry_points_params = table->entries;
249 
250 	/* Schedule Parameters Table */
251 	table = &priv->static_config.tables[BLK_IDX_SCHEDULE_PARAMS];
252 	table->entries = kcalloc(SJA1105_MAX_SCHEDULE_PARAMS_COUNT,
253 				 table->ops->unpacked_entry_size, GFP_KERNEL);
254 	if (!table->entries)
255 		return -ENOMEM;
256 	table->entry_count = SJA1105_MAX_SCHEDULE_PARAMS_COUNT;
257 	schedule_params = table->entries;
258 
259 	/* Schedule Entry Points Table */
260 	table = &priv->static_config.tables[BLK_IDX_SCHEDULE_ENTRY_POINTS];
261 	table->entries = kcalloc(num_cycles, table->ops->unpacked_entry_size,
262 				 GFP_KERNEL);
263 	if (!table->entries)
264 		return -ENOMEM;
265 	table->entry_count = num_cycles;
266 	schedule_entry_points = table->entries;
267 
268 	/* Finally start populating the static config tables */
269 	schedule_entry_points_params->clksrc = SJA1105_TAS_CLKSRC_PTP;
270 	schedule_entry_points_params->actsubsch = num_cycles - 1;
271 
272 	for (port = 0; port < SJA1105_NUM_PORTS; port++) {
273 		const struct tc_taprio_qopt_offload *offload;
274 		/* Relative base time */
275 		s64 rbt;
276 
277 		offload = tas_data->offload[port];
278 		if (!offload)
279 			continue;
280 
281 		schedule_start_idx = k;
282 		schedule_end_idx = k + offload->num_entries - 1;
283 		/* This is the base time expressed as a number of TAS ticks
284 		 * relative to PTPSCHTM, which we'll (perhaps improperly) call
285 		 * the operational base time.
286 		 */
287 		rbt = future_base_time(offload->base_time,
288 				       offload->cycle_time,
289 				       tas_data->earliest_base_time);
290 		rbt -= tas_data->earliest_base_time;
291 		/* UM10944.pdf 4.2.2. Schedule Entry Points table says that
292 		 * delta cannot be zero, which is shitty. Advance all relative
293 		 * base times by 1 TAS delta, so that even the earliest base
294 		 * time becomes 1 in relative terms. Then start the operational
295 		 * base time (PTPSCHTM) one TAS delta earlier than planned.
296 		 */
297 		entry_point_delta = ns_to_sja1105_delta(rbt) + 1;
298 
299 		schedule_entry_points[cycle].subschindx = cycle;
300 		schedule_entry_points[cycle].delta = entry_point_delta;
301 		schedule_entry_points[cycle].address = schedule_start_idx;
302 
303 		/* The subschedule end indices need to be
304 		 * monotonically increasing.
305 		 */
306 		for (i = cycle; i < 8; i++)
307 			schedule_params->subscheind[i] = schedule_end_idx;
308 
309 		for (i = 0; i < offload->num_entries; i++, k++) {
310 			s64 delta_ns = offload->entries[i].interval;
311 
312 			schedule[k].delta = ns_to_sja1105_delta(delta_ns);
313 			schedule[k].destports = BIT(port);
314 			schedule[k].resmedia_en = true;
315 			schedule[k].resmedia = SJA1105_GATE_MASK &
316 					~offload->entries[i].gate_mask;
317 		}
318 		cycle++;
319 	}
320 
321 	if (!list_empty(&gating_cfg->entries)) {
322 		struct sja1105_gate_entry *e;
323 
324 		/* Relative base time */
325 		s64 rbt;
326 
327 		schedule_start_idx = k;
328 		schedule_end_idx = k + gating_cfg->num_entries - 1;
329 		rbt = future_base_time(gating_cfg->base_time,
330 				       gating_cfg->cycle_time,
331 				       tas_data->earliest_base_time);
332 		rbt -= tas_data->earliest_base_time;
333 		entry_point_delta = ns_to_sja1105_delta(rbt) + 1;
334 
335 		schedule_entry_points[cycle].subschindx = cycle;
336 		schedule_entry_points[cycle].delta = entry_point_delta;
337 		schedule_entry_points[cycle].address = schedule_start_idx;
338 
339 		for (i = cycle; i < 8; i++)
340 			schedule_params->subscheind[i] = schedule_end_idx;
341 
342 		list_for_each_entry(e, &gating_cfg->entries, list) {
343 			schedule[k].delta = ns_to_sja1105_delta(e->interval);
344 			schedule[k].destports = e->rule->vl.destports;
345 			schedule[k].setvalid = true;
346 			schedule[k].txen = true;
347 			schedule[k].vlindex = e->rule->vl.sharindx;
348 			schedule[k].winstindex = e->rule->vl.sharindx;
349 			if (e->gate_state) /* Gate open */
350 				schedule[k].winst = true;
351 			else /* Gate closed */
352 				schedule[k].winend = true;
353 			k++;
354 		}
355 	}
356 
357 	return 0;
358 }
359 
360 /* Be there 2 port subschedules, each executing an arbitrary number of gate
361  * open/close events cyclically.
362  * None of those gate events must ever occur at the exact same time, otherwise
363  * the switch is known to act in exotically strange ways.
364  * However the hardware doesn't bother performing these integrity checks.
365  * So here we are with the task of validating whether the new @admin offload
366  * has any conflict with the already established TAS configuration in
367  * tas_data->offload.  We already know the other ports are in harmony with one
368  * another, otherwise we wouldn't have saved them.
369  * Each gate event executes periodically, with a period of @cycle_time and a
370  * phase given by its cycle's @base_time plus its offset within the cycle
371  * (which in turn is given by the length of the events prior to it).
372  * There are two aspects to possible collisions:
373  * - Collisions within one cycle's (actually the longest cycle's) time frame.
374  *   For that, we need to compare the cartesian product of each possible
375  *   occurrence of each event within one cycle time.
376  * - Collisions in the future. Events may not collide within one cycle time,
377  *   but if two port schedules don't have the same periodicity (aka the cycle
378  *   times aren't multiples of one another), they surely will some time in the
379  *   future (actually they will collide an infinite amount of times).
380  */
381 static bool
382 sja1105_tas_check_conflicts(struct sja1105_private *priv, int port,
383 			    const struct tc_taprio_qopt_offload *admin)
384 {
385 	struct sja1105_tas_data *tas_data = &priv->tas_data;
386 	const struct tc_taprio_qopt_offload *offload;
387 	s64 max_cycle_time, min_cycle_time;
388 	s64 delta1, delta2;
389 	s64 rbt1, rbt2;
390 	s64 stop_time;
391 	s64 t1, t2;
392 	int i, j;
393 	s32 rem;
394 
395 	offload = tas_data->offload[port];
396 	if (!offload)
397 		return false;
398 
399 	/* Check if the two cycle times are multiples of one another.
400 	 * If they aren't, then they will surely collide.
401 	 */
402 	max_cycle_time = max(offload->cycle_time, admin->cycle_time);
403 	min_cycle_time = min(offload->cycle_time, admin->cycle_time);
404 	div_s64_rem(max_cycle_time, min_cycle_time, &rem);
405 	if (rem)
406 		return true;
407 
408 	/* Calculate the "reduced" base time of each of the two cycles
409 	 * (transposed back as close to 0 as possible) by dividing to
410 	 * the cycle time.
411 	 */
412 	div_s64_rem(offload->base_time, offload->cycle_time, &rem);
413 	rbt1 = rem;
414 
415 	div_s64_rem(admin->base_time, admin->cycle_time, &rem);
416 	rbt2 = rem;
417 
418 	stop_time = max_cycle_time + max(rbt1, rbt2);
419 
420 	/* delta1 is the relative base time of each GCL entry within
421 	 * the established ports' TAS config.
422 	 */
423 	for (i = 0, delta1 = 0;
424 	     i < offload->num_entries;
425 	     delta1 += offload->entries[i].interval, i++) {
426 		/* delta2 is the relative base time of each GCL entry
427 		 * within the newly added TAS config.
428 		 */
429 		for (j = 0, delta2 = 0;
430 		     j < admin->num_entries;
431 		     delta2 += admin->entries[j].interval, j++) {
432 			/* t1 follows all possible occurrences of the
433 			 * established ports' GCL entry i within the
434 			 * first cycle time.
435 			 */
436 			for (t1 = rbt1 + delta1;
437 			     t1 <= stop_time;
438 			     t1 += offload->cycle_time) {
439 				/* t2 follows all possible occurrences
440 				 * of the newly added GCL entry j
441 				 * within the first cycle time.
442 				 */
443 				for (t2 = rbt2 + delta2;
444 				     t2 <= stop_time;
445 				     t2 += admin->cycle_time) {
446 					if (t1 == t2) {
447 						dev_warn(priv->ds->dev,
448 							 "GCL entry %d collides with entry %d of port %d\n",
449 							 j, i, port);
450 						return true;
451 					}
452 				}
453 			}
454 		}
455 	}
456 
457 	return false;
458 }
459 
460 /* Check the tc-taprio configuration on @port for conflicts with the tc-gate
461  * global subschedule. If @port is -1, check it against all ports.
462  * To reuse the sja1105_tas_check_conflicts logic without refactoring it,
463  * convert the gating configuration to a dummy tc-taprio offload structure.
464  */
465 bool sja1105_gating_check_conflicts(struct sja1105_private *priv, int port,
466 				    struct netlink_ext_ack *extack)
467 {
468 	struct sja1105_gating_config *gating_cfg = &priv->tas_data.gating_cfg;
469 	size_t num_entries = gating_cfg->num_entries;
470 	struct tc_taprio_qopt_offload *dummy;
471 	struct sja1105_gate_entry *e;
472 	bool conflict;
473 	int i = 0;
474 
475 	if (list_empty(&gating_cfg->entries))
476 		return false;
477 
478 	dummy = kzalloc(sizeof(struct tc_taprio_sched_entry) * num_entries +
479 			sizeof(struct tc_taprio_qopt_offload), GFP_KERNEL);
480 	if (!dummy) {
481 		NL_SET_ERR_MSG_MOD(extack, "Failed to allocate memory");
482 		return true;
483 	}
484 
485 	dummy->num_entries = num_entries;
486 	dummy->base_time = gating_cfg->base_time;
487 	dummy->cycle_time = gating_cfg->cycle_time;
488 
489 	list_for_each_entry(e, &gating_cfg->entries, list)
490 		dummy->entries[i++].interval = e->interval;
491 
492 	if (port != -1) {
493 		conflict = sja1105_tas_check_conflicts(priv, port, dummy);
494 	} else {
495 		for (port = 0; port < SJA1105_NUM_PORTS; port++) {
496 			conflict = sja1105_tas_check_conflicts(priv, port,
497 							       dummy);
498 			if (conflict)
499 				break;
500 		}
501 	}
502 
503 	kfree(dummy);
504 
505 	return conflict;
506 }
507 
508 int sja1105_setup_tc_taprio(struct dsa_switch *ds, int port,
509 			    struct tc_taprio_qopt_offload *admin)
510 {
511 	struct sja1105_private *priv = ds->priv;
512 	struct sja1105_tas_data *tas_data = &priv->tas_data;
513 	int other_port, rc, i;
514 
515 	/* Can't change an already configured port (must delete qdisc first).
516 	 * Can't delete the qdisc from an unconfigured port.
517 	 */
518 	if (!!tas_data->offload[port] == admin->enable)
519 		return -EINVAL;
520 
521 	if (!admin->enable) {
522 		taprio_offload_free(tas_data->offload[port]);
523 		tas_data->offload[port] = NULL;
524 
525 		rc = sja1105_init_scheduling(priv);
526 		if (rc < 0)
527 			return rc;
528 
529 		return sja1105_static_config_reload(priv, SJA1105_SCHEDULING);
530 	}
531 
532 	/* The cycle time extension is the amount of time the last cycle from
533 	 * the old OPER needs to be extended in order to phase-align with the
534 	 * base time of the ADMIN when that becomes the new OPER.
535 	 * But of course our switch needs to be reset to switch-over between
536 	 * the ADMIN and the OPER configs - so much for a seamless transition.
537 	 * So don't add insult over injury and just say we don't support cycle
538 	 * time extension.
539 	 */
540 	if (admin->cycle_time_extension)
541 		return -ENOTSUPP;
542 
543 	for (i = 0; i < admin->num_entries; i++) {
544 		s64 delta_ns = admin->entries[i].interval;
545 		s64 delta_cycles = ns_to_sja1105_delta(delta_ns);
546 		bool too_long, too_short;
547 
548 		too_long = (delta_cycles >= SJA1105_TAS_MAX_DELTA);
549 		too_short = (delta_cycles == 0);
550 		if (too_long || too_short) {
551 			dev_err(priv->ds->dev,
552 				"Interval %llu too %s for GCL entry %d\n",
553 				delta_ns, too_long ? "long" : "short", i);
554 			return -ERANGE;
555 		}
556 	}
557 
558 	for (other_port = 0; other_port < SJA1105_NUM_PORTS; other_port++) {
559 		if (other_port == port)
560 			continue;
561 
562 		if (sja1105_tas_check_conflicts(priv, other_port, admin))
563 			return -ERANGE;
564 	}
565 
566 	if (sja1105_gating_check_conflicts(priv, port, NULL)) {
567 		dev_err(ds->dev, "Conflict with tc-gate schedule\n");
568 		return -ERANGE;
569 	}
570 
571 	tas_data->offload[port] = taprio_offload_get(admin);
572 
573 	rc = sja1105_init_scheduling(priv);
574 	if (rc < 0)
575 		return rc;
576 
577 	return sja1105_static_config_reload(priv, SJA1105_SCHEDULING);
578 }
579 
580 static int sja1105_tas_check_running(struct sja1105_private *priv)
581 {
582 	struct sja1105_tas_data *tas_data = &priv->tas_data;
583 	struct dsa_switch *ds = priv->ds;
584 	struct sja1105_ptp_cmd cmd = {0};
585 	int rc;
586 
587 	rc = sja1105_ptp_commit(ds, &cmd, SPI_READ);
588 	if (rc < 0)
589 		return rc;
590 
591 	if (cmd.ptpstrtsch == 1)
592 		/* Schedule successfully started */
593 		tas_data->state = SJA1105_TAS_STATE_RUNNING;
594 	else if (cmd.ptpstopsch == 1)
595 		/* Schedule is stopped */
596 		tas_data->state = SJA1105_TAS_STATE_DISABLED;
597 	else
598 		/* Schedule is probably not configured with PTP clock source */
599 		rc = -EINVAL;
600 
601 	return rc;
602 }
603 
604 /* Write to PTPCLKCORP */
605 static int sja1105_tas_adjust_drift(struct sja1105_private *priv,
606 				    u64 correction)
607 {
608 	const struct sja1105_regs *regs = priv->info->regs;
609 	u32 ptpclkcorp = ns_to_sja1105_ticks(correction);
610 
611 	return sja1105_xfer_u32(priv, SPI_WRITE, regs->ptpclkcorp,
612 				&ptpclkcorp, NULL);
613 }
614 
615 /* Write to PTPSCHTM */
616 static int sja1105_tas_set_base_time(struct sja1105_private *priv,
617 				     u64 base_time)
618 {
619 	const struct sja1105_regs *regs = priv->info->regs;
620 	u64 ptpschtm = ns_to_sja1105_ticks(base_time);
621 
622 	return sja1105_xfer_u64(priv, SPI_WRITE, regs->ptpschtm,
623 				&ptpschtm, NULL);
624 }
625 
626 static int sja1105_tas_start(struct sja1105_private *priv)
627 {
628 	struct sja1105_tas_data *tas_data = &priv->tas_data;
629 	struct sja1105_ptp_cmd *cmd = &priv->ptp_data.cmd;
630 	struct dsa_switch *ds = priv->ds;
631 	int rc;
632 
633 	dev_dbg(ds->dev, "Starting the TAS\n");
634 
635 	if (tas_data->state == SJA1105_TAS_STATE_ENABLED_NOT_RUNNING ||
636 	    tas_data->state == SJA1105_TAS_STATE_RUNNING) {
637 		dev_err(ds->dev, "TAS already started\n");
638 		return -EINVAL;
639 	}
640 
641 	cmd->ptpstrtsch = 1;
642 	cmd->ptpstopsch = 0;
643 
644 	rc = sja1105_ptp_commit(ds, cmd, SPI_WRITE);
645 	if (rc < 0)
646 		return rc;
647 
648 	tas_data->state = SJA1105_TAS_STATE_ENABLED_NOT_RUNNING;
649 
650 	return 0;
651 }
652 
653 static int sja1105_tas_stop(struct sja1105_private *priv)
654 {
655 	struct sja1105_tas_data *tas_data = &priv->tas_data;
656 	struct sja1105_ptp_cmd *cmd = &priv->ptp_data.cmd;
657 	struct dsa_switch *ds = priv->ds;
658 	int rc;
659 
660 	dev_dbg(ds->dev, "Stopping the TAS\n");
661 
662 	if (tas_data->state == SJA1105_TAS_STATE_DISABLED) {
663 		dev_err(ds->dev, "TAS already disabled\n");
664 		return -EINVAL;
665 	}
666 
667 	cmd->ptpstopsch = 1;
668 	cmd->ptpstrtsch = 0;
669 
670 	rc = sja1105_ptp_commit(ds, cmd, SPI_WRITE);
671 	if (rc < 0)
672 		return rc;
673 
674 	tas_data->state = SJA1105_TAS_STATE_DISABLED;
675 
676 	return 0;
677 }
678 
679 /* The schedule engine and the PTP clock are driven by the same oscillator, and
680  * they run in parallel. But whilst the PTP clock can keep an absolute
681  * time-of-day, the schedule engine is only running in 'ticks' (25 ticks make
682  * up a delta, which is 200ns), and wrapping around at the end of each cycle.
683  * The schedule engine is started when the PTP clock reaches the PTPSCHTM time
684  * (in PTP domain).
685  * Because the PTP clock can be rate-corrected (accelerated or slowed down) by
686  * a software servo, and the schedule engine clock runs in parallel to the PTP
687  * clock, there is logic internal to the switch that periodically keeps the
688  * schedule engine from drifting away. The frequency with which this internal
689  * syntonization happens is the PTP clock correction period (PTPCLKCORP). It is
690  * a value also in the PTP clock domain, and is also rate-corrected.
691  * To be precise, during a correction period, there is logic to determine by
692  * how many scheduler clock ticks has the PTP clock drifted. At the end of each
693  * correction period/beginning of new one, the length of a delta is shrunk or
694  * expanded with an integer number of ticks, compared with the typical 25.
695  * So a delta lasts for 200ns (or 25 ticks) only on average.
696  * Sometimes it is longer, sometimes it is shorter. The internal syntonization
697  * logic can adjust for at most 5 ticks each 20 ticks.
698  *
699  * The first implication is that you should choose your schedule correction
700  * period to be an integer multiple of the schedule length. Preferably one.
701  * In case there are schedules of multiple ports active, then the correction
702  * period needs to be a multiple of them all. Given the restriction that the
703  * cycle times have to be multiples of one another anyway, this means the
704  * correction period can simply be the largest cycle time, hence the current
705  * choice. This way, the updates are always synchronous to the transmission
706  * cycle, and therefore predictable.
707  *
708  * The second implication is that at the beginning of a correction period, the
709  * first few deltas will be modulated in time, until the schedule engine is
710  * properly phase-aligned with the PTP clock. For this reason, you should place
711  * your best-effort traffic at the beginning of a cycle, and your
712  * time-triggered traffic afterwards.
713  *
714  * The third implication is that once the schedule engine is started, it can
715  * only adjust for so much drift within a correction period. In the servo you
716  * can only change the PTPCLKRATE, but not step the clock (PTPCLKADD). If you
717  * want to do the latter, you need to stop and restart the schedule engine,
718  * which is what the state machine handles.
719  */
720 static void sja1105_tas_state_machine(struct work_struct *work)
721 {
722 	struct sja1105_tas_data *tas_data = work_to_sja1105_tas(work);
723 	struct sja1105_private *priv = tas_to_sja1105(tas_data);
724 	struct sja1105_ptp_data *ptp_data = &priv->ptp_data;
725 	struct timespec64 base_time_ts, now_ts;
726 	struct dsa_switch *ds = priv->ds;
727 	struct timespec64 diff;
728 	s64 base_time, now;
729 	int rc = 0;
730 
731 	mutex_lock(&ptp_data->lock);
732 
733 	switch (tas_data->state) {
734 	case SJA1105_TAS_STATE_DISABLED:
735 		/* Can't do anything at all if clock is still being stepped */
736 		if (tas_data->last_op != SJA1105_PTP_ADJUSTFREQ)
737 			break;
738 
739 		rc = sja1105_tas_adjust_drift(priv, tas_data->max_cycle_time);
740 		if (rc < 0)
741 			break;
742 
743 		rc = __sja1105_ptp_gettimex(ds, &now, NULL);
744 		if (rc < 0)
745 			break;
746 
747 		/* Plan to start the earliest schedule first. The others
748 		 * will be started in hardware, by way of their respective
749 		 * entry points delta.
750 		 * Try our best to avoid fringe cases (race condition between
751 		 * ptpschtm and ptpstrtsch) by pushing the oper_base_time at
752 		 * least one second in the future from now. This is not ideal,
753 		 * but this only needs to buy us time until the
754 		 * sja1105_tas_start command below gets executed.
755 		 */
756 		base_time = future_base_time(tas_data->earliest_base_time,
757 					     tas_data->max_cycle_time,
758 					     now + 1ull * NSEC_PER_SEC);
759 		base_time -= sja1105_delta_to_ns(1);
760 
761 		rc = sja1105_tas_set_base_time(priv, base_time);
762 		if (rc < 0)
763 			break;
764 
765 		tas_data->oper_base_time = base_time;
766 
767 		rc = sja1105_tas_start(priv);
768 		if (rc < 0)
769 			break;
770 
771 		base_time_ts = ns_to_timespec64(base_time);
772 		now_ts = ns_to_timespec64(now);
773 
774 		dev_dbg(ds->dev, "OPER base time %lld.%09ld (now %lld.%09ld)\n",
775 			base_time_ts.tv_sec, base_time_ts.tv_nsec,
776 			now_ts.tv_sec, now_ts.tv_nsec);
777 
778 		break;
779 
780 	case SJA1105_TAS_STATE_ENABLED_NOT_RUNNING:
781 		if (tas_data->last_op != SJA1105_PTP_ADJUSTFREQ) {
782 			/* Clock was stepped.. bad news for TAS */
783 			sja1105_tas_stop(priv);
784 			break;
785 		}
786 
787 		/* Check if TAS has actually started, by comparing the
788 		 * scheduled start time with the SJA1105 PTP clock
789 		 */
790 		rc = __sja1105_ptp_gettimex(ds, &now, NULL);
791 		if (rc < 0)
792 			break;
793 
794 		if (now < tas_data->oper_base_time) {
795 			/* TAS has not started yet */
796 			diff = ns_to_timespec64(tas_data->oper_base_time - now);
797 			dev_dbg(ds->dev, "time to start: [%lld.%09ld]",
798 				diff.tv_sec, diff.tv_nsec);
799 			break;
800 		}
801 
802 		/* Time elapsed, what happened? */
803 		rc = sja1105_tas_check_running(priv);
804 		if (rc < 0)
805 			break;
806 
807 		if (tas_data->state != SJA1105_TAS_STATE_RUNNING)
808 			/* TAS has started */
809 			dev_err(ds->dev,
810 				"TAS not started despite time elapsed\n");
811 
812 		break;
813 
814 	case SJA1105_TAS_STATE_RUNNING:
815 		/* Clock was stepped.. bad news for TAS */
816 		if (tas_data->last_op != SJA1105_PTP_ADJUSTFREQ) {
817 			sja1105_tas_stop(priv);
818 			break;
819 		}
820 
821 		rc = sja1105_tas_check_running(priv);
822 		if (rc < 0)
823 			break;
824 
825 		if (tas_data->state != SJA1105_TAS_STATE_RUNNING)
826 			dev_err(ds->dev, "TAS surprisingly stopped\n");
827 
828 		break;
829 
830 	default:
831 		if (net_ratelimit())
832 			dev_err(ds->dev, "TAS in an invalid state (incorrect use of API)!\n");
833 	}
834 
835 	if (rc && net_ratelimit())
836 		dev_err(ds->dev, "An operation returned %d\n", rc);
837 
838 	mutex_unlock(&ptp_data->lock);
839 }
840 
841 void sja1105_tas_clockstep(struct dsa_switch *ds)
842 {
843 	struct sja1105_private *priv = ds->priv;
844 	struct sja1105_tas_data *tas_data = &priv->tas_data;
845 
846 	if (!tas_data->enabled)
847 		return;
848 
849 	tas_data->last_op = SJA1105_PTP_CLOCKSTEP;
850 	schedule_work(&tas_data->tas_work);
851 }
852 
853 void sja1105_tas_adjfreq(struct dsa_switch *ds)
854 {
855 	struct sja1105_private *priv = ds->priv;
856 	struct sja1105_tas_data *tas_data = &priv->tas_data;
857 
858 	if (!tas_data->enabled)
859 		return;
860 
861 	/* No reason to schedule the workqueue, nothing changed */
862 	if (tas_data->state == SJA1105_TAS_STATE_RUNNING)
863 		return;
864 
865 	tas_data->last_op = SJA1105_PTP_ADJUSTFREQ;
866 	schedule_work(&tas_data->tas_work);
867 }
868 
869 void sja1105_tas_setup(struct dsa_switch *ds)
870 {
871 	struct sja1105_private *priv = ds->priv;
872 	struct sja1105_tas_data *tas_data = &priv->tas_data;
873 
874 	INIT_WORK(&tas_data->tas_work, sja1105_tas_state_machine);
875 	tas_data->state = SJA1105_TAS_STATE_DISABLED;
876 	tas_data->last_op = SJA1105_PTP_NONE;
877 
878 	INIT_LIST_HEAD(&tas_data->gating_cfg.entries);
879 }
880 
881 void sja1105_tas_teardown(struct dsa_switch *ds)
882 {
883 	struct sja1105_private *priv = ds->priv;
884 	struct tc_taprio_qopt_offload *offload;
885 	int port;
886 
887 	cancel_work_sync(&priv->tas_data.tas_work);
888 
889 	for (port = 0; port < SJA1105_NUM_PORTS; port++) {
890 		offload = priv->tas_data.offload[port];
891 		if (!offload)
892 			continue;
893 
894 		taprio_offload_free(offload);
895 	}
896 }
897