xref: /openbmc/linux/drivers/net/dsa/sja1105/sja1105_tas.c (revision 5fa1f7680f2728d62561db6d4a9282c4d21f2324)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2019, Vladimir Oltean <olteanv@gmail.com>
3  */
4 #include "sja1105.h"
5 
6 #define SJA1105_TAS_CLKSRC_DISABLED	0
7 #define SJA1105_TAS_CLKSRC_STANDALONE	1
8 #define SJA1105_TAS_CLKSRC_AS6802	2
9 #define SJA1105_TAS_CLKSRC_PTP		3
10 #define SJA1105_GATE_MASK		GENMASK_ULL(SJA1105_NUM_TC - 1, 0)
11 
12 #define work_to_sja1105_tas(d) \
13 	container_of((d), struct sja1105_tas_data, tas_work)
14 #define tas_to_sja1105(d) \
15 	container_of((d), struct sja1105_private, tas_data)
16 
17 static int sja1105_tas_set_runtime_params(struct sja1105_private *priv)
18 {
19 	struct sja1105_tas_data *tas_data = &priv->tas_data;
20 	struct sja1105_gating_config *gating_cfg = &tas_data->gating_cfg;
21 	struct dsa_switch *ds = priv->ds;
22 	s64 earliest_base_time = S64_MAX;
23 	s64 latest_base_time = 0;
24 	s64 its_cycle_time = 0;
25 	s64 max_cycle_time = 0;
26 	int port;
27 
28 	tas_data->enabled = false;
29 
30 	for (port = 0; port < SJA1105_NUM_PORTS; port++) {
31 		const struct tc_taprio_qopt_offload *offload;
32 
33 		offload = tas_data->offload[port];
34 		if (!offload)
35 			continue;
36 
37 		tas_data->enabled = true;
38 
39 		if (max_cycle_time < offload->cycle_time)
40 			max_cycle_time = offload->cycle_time;
41 		if (latest_base_time < offload->base_time)
42 			latest_base_time = offload->base_time;
43 		if (earliest_base_time > offload->base_time) {
44 			earliest_base_time = offload->base_time;
45 			its_cycle_time = offload->cycle_time;
46 		}
47 	}
48 
49 	if (!list_empty(&gating_cfg->entries)) {
50 		tas_data->enabled = true;
51 
52 		if (max_cycle_time < gating_cfg->cycle_time)
53 			max_cycle_time = gating_cfg->cycle_time;
54 		if (latest_base_time < gating_cfg->base_time)
55 			latest_base_time = gating_cfg->base_time;
56 		if (earliest_base_time > gating_cfg->base_time) {
57 			earliest_base_time = gating_cfg->base_time;
58 			its_cycle_time = gating_cfg->cycle_time;
59 		}
60 	}
61 
62 	if (!tas_data->enabled)
63 		return 0;
64 
65 	/* Roll the earliest base time over until it is in a comparable
66 	 * time base with the latest, then compare their deltas.
67 	 * We want to enforce that all ports' base times are within
68 	 * SJA1105_TAS_MAX_DELTA 200ns cycles of one another.
69 	 */
70 	earliest_base_time = future_base_time(earliest_base_time,
71 					      its_cycle_time,
72 					      latest_base_time);
73 	while (earliest_base_time > latest_base_time)
74 		earliest_base_time -= its_cycle_time;
75 	if (latest_base_time - earliest_base_time >
76 	    sja1105_delta_to_ns(SJA1105_TAS_MAX_DELTA)) {
77 		dev_err(ds->dev,
78 			"Base times too far apart: min %llu max %llu\n",
79 			earliest_base_time, latest_base_time);
80 		return -ERANGE;
81 	}
82 
83 	tas_data->earliest_base_time = earliest_base_time;
84 	tas_data->max_cycle_time = max_cycle_time;
85 
86 	dev_dbg(ds->dev, "earliest base time %lld ns\n", earliest_base_time);
87 	dev_dbg(ds->dev, "latest base time %lld ns\n", latest_base_time);
88 	dev_dbg(ds->dev, "longest cycle time %lld ns\n", max_cycle_time);
89 
90 	return 0;
91 }
92 
93 /* Lo and behold: the egress scheduler from hell.
94  *
95  * At the hardware level, the Time-Aware Shaper holds a global linear arrray of
96  * all schedule entries for all ports. These are the Gate Control List (GCL)
97  * entries, let's call them "timeslots" for short. This linear array of
98  * timeslots is held in BLK_IDX_SCHEDULE.
99  *
100  * Then there are a maximum of 8 "execution threads" inside the switch, which
101  * iterate cyclically through the "schedule". Each "cycle" has an entry point
102  * and an exit point, both being timeslot indices in the schedule table. The
103  * hardware calls each cycle a "subschedule".
104  *
105  * Subschedule (cycle) i starts when
106  *   ptpclkval >= ptpschtm + BLK_IDX_SCHEDULE_ENTRY_POINTS[i].delta.
107  *
108  * The hardware scheduler iterates BLK_IDX_SCHEDULE with a k ranging from
109  *   k = BLK_IDX_SCHEDULE_ENTRY_POINTS[i].address to
110  *   k = BLK_IDX_SCHEDULE_PARAMS.subscheind[i]
111  *
112  * For each schedule entry (timeslot) k, the engine executes the gate control
113  * list entry for the duration of BLK_IDX_SCHEDULE[k].delta.
114  *
115  *         +---------+
116  *         |         | BLK_IDX_SCHEDULE_ENTRY_POINTS_PARAMS
117  *         +---------+
118  *              |
119  *              +-----------------+
120  *                                | .actsubsch
121  *  BLK_IDX_SCHEDULE_ENTRY_POINTS v
122  *                 +-------+-------+
123  *                 |cycle 0|cycle 1|
124  *                 +-------+-------+
125  *                   |  |      |  |
126  *  +----------------+  |      |  +-------------------------------------+
127  *  |   .subschindx     |      |             .subschindx                |
128  *  |                   |      +---------------+                        |
129  *  |          .address |        .address      |                        |
130  *  |                   |                      |                        |
131  *  |                   |                      |                        |
132  *  |  BLK_IDX_SCHEDULE v                      v                        |
133  *  |              +-------+-------+-------+-------+-------+------+     |
134  *  |              |entry 0|entry 1|entry 2|entry 3|entry 4|entry5|     |
135  *  |              +-------+-------+-------+-------+-------+------+     |
136  *  |                                  ^                    ^  ^  ^     |
137  *  |                                  |                    |  |  |     |
138  *  |        +-------------------------+                    |  |  |     |
139  *  |        |              +-------------------------------+  |  |     |
140  *  |        |              |              +-------------------+  |     |
141  *  |        |              |              |                      |     |
142  *  | +---------------------------------------------------------------+ |
143  *  | |subscheind[0]<=subscheind[1]<=subscheind[2]<=...<=subscheind[7]| |
144  *  | +---------------------------------------------------------------+ |
145  *  |        ^              ^                BLK_IDX_SCHEDULE_PARAMS    |
146  *  |        |              |                                           |
147  *  +--------+              +-------------------------------------------+
148  *
149  *  In the above picture there are two subschedules (cycles):
150  *
151  *  - cycle 0: iterates the schedule table from 0 to 2 (and back)
152  *  - cycle 1: iterates the schedule table from 3 to 5 (and back)
153  *
154  *  All other possible execution threads must be marked as unused by making
155  *  their "subschedule end index" (subscheind) equal to the last valid
156  *  subschedule's end index (in this case 5).
157  */
158 int sja1105_init_scheduling(struct sja1105_private *priv)
159 {
160 	struct sja1105_schedule_entry_points_entry *schedule_entry_points;
161 	struct sja1105_schedule_entry_points_params_entry
162 					*schedule_entry_points_params;
163 	struct sja1105_schedule_params_entry *schedule_params;
164 	struct sja1105_tas_data *tas_data = &priv->tas_data;
165 	struct sja1105_gating_config *gating_cfg = &tas_data->gating_cfg;
166 	struct sja1105_schedule_entry *schedule;
167 	struct sja1105_table *table;
168 	int schedule_start_idx;
169 	s64 entry_point_delta;
170 	int schedule_end_idx;
171 	int num_entries = 0;
172 	int num_cycles = 0;
173 	int cycle = 0;
174 	int i, k = 0;
175 	int port, rc;
176 
177 	rc = sja1105_tas_set_runtime_params(priv);
178 	if (rc < 0)
179 		return rc;
180 
181 	/* Discard previous Schedule Table */
182 	table = &priv->static_config.tables[BLK_IDX_SCHEDULE];
183 	if (table->entry_count) {
184 		kfree(table->entries);
185 		table->entry_count = 0;
186 	}
187 
188 	/* Discard previous Schedule Entry Points Parameters Table */
189 	table = &priv->static_config.tables[BLK_IDX_SCHEDULE_ENTRY_POINTS_PARAMS];
190 	if (table->entry_count) {
191 		kfree(table->entries);
192 		table->entry_count = 0;
193 	}
194 
195 	/* Discard previous Schedule Parameters Table */
196 	table = &priv->static_config.tables[BLK_IDX_SCHEDULE_PARAMS];
197 	if (table->entry_count) {
198 		kfree(table->entries);
199 		table->entry_count = 0;
200 	}
201 
202 	/* Discard previous Schedule Entry Points Table */
203 	table = &priv->static_config.tables[BLK_IDX_SCHEDULE_ENTRY_POINTS];
204 	if (table->entry_count) {
205 		kfree(table->entries);
206 		table->entry_count = 0;
207 	}
208 
209 	/* Figure out the dimensioning of the problem */
210 	for (port = 0; port < SJA1105_NUM_PORTS; port++) {
211 		if (tas_data->offload[port]) {
212 			num_entries += tas_data->offload[port]->num_entries;
213 			num_cycles++;
214 		}
215 	}
216 
217 	if (!list_empty(&gating_cfg->entries)) {
218 		num_entries += gating_cfg->num_entries;
219 		num_cycles++;
220 	}
221 
222 	/* Nothing to do */
223 	if (!num_cycles)
224 		return 0;
225 
226 	/* Pre-allocate space in the static config tables */
227 
228 	/* Schedule Table */
229 	table = &priv->static_config.tables[BLK_IDX_SCHEDULE];
230 	table->entries = kcalloc(num_entries, table->ops->unpacked_entry_size,
231 				 GFP_KERNEL);
232 	if (!table->entries)
233 		return -ENOMEM;
234 	table->entry_count = num_entries;
235 	schedule = table->entries;
236 
237 	/* Schedule Points Parameters Table */
238 	table = &priv->static_config.tables[BLK_IDX_SCHEDULE_ENTRY_POINTS_PARAMS];
239 	table->entries = kcalloc(SJA1105_MAX_SCHEDULE_ENTRY_POINTS_PARAMS_COUNT,
240 				 table->ops->unpacked_entry_size, GFP_KERNEL);
241 	if (!table->entries)
242 		/* Previously allocated memory will be freed automatically in
243 		 * sja1105_static_config_free. This is true for all early
244 		 * returns below.
245 		 */
246 		return -ENOMEM;
247 	table->entry_count = SJA1105_MAX_SCHEDULE_ENTRY_POINTS_PARAMS_COUNT;
248 	schedule_entry_points_params = table->entries;
249 
250 	/* Schedule Parameters Table */
251 	table = &priv->static_config.tables[BLK_IDX_SCHEDULE_PARAMS];
252 	table->entries = kcalloc(SJA1105_MAX_SCHEDULE_PARAMS_COUNT,
253 				 table->ops->unpacked_entry_size, GFP_KERNEL);
254 	if (!table->entries)
255 		return -ENOMEM;
256 	table->entry_count = SJA1105_MAX_SCHEDULE_PARAMS_COUNT;
257 	schedule_params = table->entries;
258 
259 	/* Schedule Entry Points Table */
260 	table = &priv->static_config.tables[BLK_IDX_SCHEDULE_ENTRY_POINTS];
261 	table->entries = kcalloc(num_cycles, table->ops->unpacked_entry_size,
262 				 GFP_KERNEL);
263 	if (!table->entries)
264 		return -ENOMEM;
265 	table->entry_count = num_cycles;
266 	schedule_entry_points = table->entries;
267 
268 	/* Finally start populating the static config tables */
269 	schedule_entry_points_params->clksrc = SJA1105_TAS_CLKSRC_PTP;
270 	schedule_entry_points_params->actsubsch = num_cycles - 1;
271 
272 	for (port = 0; port < SJA1105_NUM_PORTS; port++) {
273 		const struct tc_taprio_qopt_offload *offload;
274 		/* Relative base time */
275 		s64 rbt;
276 
277 		offload = tas_data->offload[port];
278 		if (!offload)
279 			continue;
280 
281 		schedule_start_idx = k;
282 		schedule_end_idx = k + offload->num_entries - 1;
283 		/* This is the base time expressed as a number of TAS ticks
284 		 * relative to PTPSCHTM, which we'll (perhaps improperly) call
285 		 * the operational base time.
286 		 */
287 		rbt = future_base_time(offload->base_time,
288 				       offload->cycle_time,
289 				       tas_data->earliest_base_time);
290 		rbt -= tas_data->earliest_base_time;
291 		/* UM10944.pdf 4.2.2. Schedule Entry Points table says that
292 		 * delta cannot be zero, which is shitty. Advance all relative
293 		 * base times by 1 TAS delta, so that even the earliest base
294 		 * time becomes 1 in relative terms. Then start the operational
295 		 * base time (PTPSCHTM) one TAS delta earlier than planned.
296 		 */
297 		entry_point_delta = ns_to_sja1105_delta(rbt) + 1;
298 
299 		schedule_entry_points[cycle].subschindx = cycle;
300 		schedule_entry_points[cycle].delta = entry_point_delta;
301 		schedule_entry_points[cycle].address = schedule_start_idx;
302 
303 		/* The subschedule end indices need to be
304 		 * monotonically increasing.
305 		 */
306 		for (i = cycle; i < 8; i++)
307 			schedule_params->subscheind[i] = schedule_end_idx;
308 
309 		for (i = 0; i < offload->num_entries; i++, k++) {
310 			s64 delta_ns = offload->entries[i].interval;
311 
312 			schedule[k].delta = ns_to_sja1105_delta(delta_ns);
313 			schedule[k].destports = BIT(port);
314 			schedule[k].resmedia_en = true;
315 			schedule[k].resmedia = SJA1105_GATE_MASK &
316 					~offload->entries[i].gate_mask;
317 		}
318 		cycle++;
319 	}
320 
321 	if (!list_empty(&gating_cfg->entries)) {
322 		struct sja1105_gate_entry *e;
323 
324 		/* Relative base time */
325 		s64 rbt;
326 
327 		schedule_start_idx = k;
328 		schedule_end_idx = k + gating_cfg->num_entries - 1;
329 		rbt = future_base_time(gating_cfg->base_time,
330 				       gating_cfg->cycle_time,
331 				       tas_data->earliest_base_time);
332 		rbt -= tas_data->earliest_base_time;
333 		entry_point_delta = ns_to_sja1105_delta(rbt) + 1;
334 
335 		schedule_entry_points[cycle].subschindx = cycle;
336 		schedule_entry_points[cycle].delta = entry_point_delta;
337 		schedule_entry_points[cycle].address = schedule_start_idx;
338 
339 		for (i = cycle; i < 8; i++)
340 			schedule_params->subscheind[i] = schedule_end_idx;
341 
342 		list_for_each_entry(e, &gating_cfg->entries, list) {
343 			schedule[k].delta = ns_to_sja1105_delta(e->interval);
344 			schedule[k].destports = e->rule->vl.destports;
345 			schedule[k].setvalid = true;
346 			schedule[k].txen = true;
347 			schedule[k].vlindex = e->rule->vl.sharindx;
348 			schedule[k].winstindex = e->rule->vl.sharindx;
349 			if (e->gate_state) /* Gate open */
350 				schedule[k].winst = true;
351 			else /* Gate closed */
352 				schedule[k].winend = true;
353 			k++;
354 		}
355 	}
356 
357 	return 0;
358 }
359 
360 /* Be there 2 port subschedules, each executing an arbitrary number of gate
361  * open/close events cyclically.
362  * None of those gate events must ever occur at the exact same time, otherwise
363  * the switch is known to act in exotically strange ways.
364  * However the hardware doesn't bother performing these integrity checks.
365  * So here we are with the task of validating whether the new @admin offload
366  * has any conflict with the already established TAS configuration in
367  * tas_data->offload.  We already know the other ports are in harmony with one
368  * another, otherwise we wouldn't have saved them.
369  * Each gate event executes periodically, with a period of @cycle_time and a
370  * phase given by its cycle's @base_time plus its offset within the cycle
371  * (which in turn is given by the length of the events prior to it).
372  * There are two aspects to possible collisions:
373  * - Collisions within one cycle's (actually the longest cycle's) time frame.
374  *   For that, we need to compare the cartesian product of each possible
375  *   occurrence of each event within one cycle time.
376  * - Collisions in the future. Events may not collide within one cycle time,
377  *   but if two port schedules don't have the same periodicity (aka the cycle
378  *   times aren't multiples of one another), they surely will some time in the
379  *   future (actually they will collide an infinite amount of times).
380  */
381 static bool
382 sja1105_tas_check_conflicts(struct sja1105_private *priv, int port,
383 			    const struct tc_taprio_qopt_offload *admin)
384 {
385 	struct sja1105_tas_data *tas_data = &priv->tas_data;
386 	const struct tc_taprio_qopt_offload *offload;
387 	s64 max_cycle_time, min_cycle_time;
388 	s64 delta1, delta2;
389 	s64 rbt1, rbt2;
390 	s64 stop_time;
391 	s64 t1, t2;
392 	int i, j;
393 	s32 rem;
394 
395 	offload = tas_data->offload[port];
396 	if (!offload)
397 		return false;
398 
399 	/* Check if the two cycle times are multiples of one another.
400 	 * If they aren't, then they will surely collide.
401 	 */
402 	max_cycle_time = max(offload->cycle_time, admin->cycle_time);
403 	min_cycle_time = min(offload->cycle_time, admin->cycle_time);
404 	div_s64_rem(max_cycle_time, min_cycle_time, &rem);
405 	if (rem)
406 		return true;
407 
408 	/* Calculate the "reduced" base time of each of the two cycles
409 	 * (transposed back as close to 0 as possible) by dividing to
410 	 * the cycle time.
411 	 */
412 	div_s64_rem(offload->base_time, offload->cycle_time, &rem);
413 	rbt1 = rem;
414 
415 	div_s64_rem(admin->base_time, admin->cycle_time, &rem);
416 	rbt2 = rem;
417 
418 	stop_time = max_cycle_time + max(rbt1, rbt2);
419 
420 	/* delta1 is the relative base time of each GCL entry within
421 	 * the established ports' TAS config.
422 	 */
423 	for (i = 0, delta1 = 0;
424 	     i < offload->num_entries;
425 	     delta1 += offload->entries[i].interval, i++) {
426 		/* delta2 is the relative base time of each GCL entry
427 		 * within the newly added TAS config.
428 		 */
429 		for (j = 0, delta2 = 0;
430 		     j < admin->num_entries;
431 		     delta2 += admin->entries[j].interval, j++) {
432 			/* t1 follows all possible occurrences of the
433 			 * established ports' GCL entry i within the
434 			 * first cycle time.
435 			 */
436 			for (t1 = rbt1 + delta1;
437 			     t1 <= stop_time;
438 			     t1 += offload->cycle_time) {
439 				/* t2 follows all possible occurrences
440 				 * of the newly added GCL entry j
441 				 * within the first cycle time.
442 				 */
443 				for (t2 = rbt2 + delta2;
444 				     t2 <= stop_time;
445 				     t2 += admin->cycle_time) {
446 					if (t1 == t2) {
447 						dev_warn(priv->ds->dev,
448 							 "GCL entry %d collides with entry %d of port %d\n",
449 							 j, i, port);
450 						return true;
451 					}
452 				}
453 			}
454 		}
455 	}
456 
457 	return false;
458 }
459 
460 /* Check the tc-taprio configuration on @port for conflicts with the tc-gate
461  * global subschedule. If @port is -1, check it against all ports.
462  * To reuse the sja1105_tas_check_conflicts logic without refactoring it,
463  * convert the gating configuration to a dummy tc-taprio offload structure.
464  */
465 bool sja1105_gating_check_conflicts(struct sja1105_private *priv, int port,
466 				    struct netlink_ext_ack *extack)
467 {
468 	struct sja1105_gating_config *gating_cfg = &priv->tas_data.gating_cfg;
469 	size_t num_entries = gating_cfg->num_entries;
470 	struct tc_taprio_qopt_offload *dummy;
471 	struct sja1105_gate_entry *e;
472 	bool conflict;
473 	int i = 0;
474 
475 	if (list_empty(&gating_cfg->entries))
476 		return false;
477 
478 	dummy = kzalloc(struct_size(dummy, entries, num_entries), GFP_KERNEL);
479 	if (!dummy) {
480 		NL_SET_ERR_MSG_MOD(extack, "Failed to allocate memory");
481 		return true;
482 	}
483 
484 	dummy->num_entries = num_entries;
485 	dummy->base_time = gating_cfg->base_time;
486 	dummy->cycle_time = gating_cfg->cycle_time;
487 
488 	list_for_each_entry(e, &gating_cfg->entries, list)
489 		dummy->entries[i++].interval = e->interval;
490 
491 	if (port != -1) {
492 		conflict = sja1105_tas_check_conflicts(priv, port, dummy);
493 	} else {
494 		for (port = 0; port < SJA1105_NUM_PORTS; port++) {
495 			conflict = sja1105_tas_check_conflicts(priv, port,
496 							       dummy);
497 			if (conflict)
498 				break;
499 		}
500 	}
501 
502 	kfree(dummy);
503 
504 	return conflict;
505 }
506 
507 int sja1105_setup_tc_taprio(struct dsa_switch *ds, int port,
508 			    struct tc_taprio_qopt_offload *admin)
509 {
510 	struct sja1105_private *priv = ds->priv;
511 	struct sja1105_tas_data *tas_data = &priv->tas_data;
512 	int other_port, rc, i;
513 
514 	/* Can't change an already configured port (must delete qdisc first).
515 	 * Can't delete the qdisc from an unconfigured port.
516 	 */
517 	if (!!tas_data->offload[port] == admin->enable)
518 		return -EINVAL;
519 
520 	if (!admin->enable) {
521 		taprio_offload_free(tas_data->offload[port]);
522 		tas_data->offload[port] = NULL;
523 
524 		rc = sja1105_init_scheduling(priv);
525 		if (rc < 0)
526 			return rc;
527 
528 		return sja1105_static_config_reload(priv, SJA1105_SCHEDULING);
529 	}
530 
531 	/* The cycle time extension is the amount of time the last cycle from
532 	 * the old OPER needs to be extended in order to phase-align with the
533 	 * base time of the ADMIN when that becomes the new OPER.
534 	 * But of course our switch needs to be reset to switch-over between
535 	 * the ADMIN and the OPER configs - so much for a seamless transition.
536 	 * So don't add insult over injury and just say we don't support cycle
537 	 * time extension.
538 	 */
539 	if (admin->cycle_time_extension)
540 		return -ENOTSUPP;
541 
542 	for (i = 0; i < admin->num_entries; i++) {
543 		s64 delta_ns = admin->entries[i].interval;
544 		s64 delta_cycles = ns_to_sja1105_delta(delta_ns);
545 		bool too_long, too_short;
546 
547 		too_long = (delta_cycles >= SJA1105_TAS_MAX_DELTA);
548 		too_short = (delta_cycles == 0);
549 		if (too_long || too_short) {
550 			dev_err(priv->ds->dev,
551 				"Interval %llu too %s for GCL entry %d\n",
552 				delta_ns, too_long ? "long" : "short", i);
553 			return -ERANGE;
554 		}
555 	}
556 
557 	for (other_port = 0; other_port < SJA1105_NUM_PORTS; other_port++) {
558 		if (other_port == port)
559 			continue;
560 
561 		if (sja1105_tas_check_conflicts(priv, other_port, admin))
562 			return -ERANGE;
563 	}
564 
565 	if (sja1105_gating_check_conflicts(priv, port, NULL)) {
566 		dev_err(ds->dev, "Conflict with tc-gate schedule\n");
567 		return -ERANGE;
568 	}
569 
570 	tas_data->offload[port] = taprio_offload_get(admin);
571 
572 	rc = sja1105_init_scheduling(priv);
573 	if (rc < 0)
574 		return rc;
575 
576 	return sja1105_static_config_reload(priv, SJA1105_SCHEDULING);
577 }
578 
579 static int sja1105_tas_check_running(struct sja1105_private *priv)
580 {
581 	struct sja1105_tas_data *tas_data = &priv->tas_data;
582 	struct dsa_switch *ds = priv->ds;
583 	struct sja1105_ptp_cmd cmd = {0};
584 	int rc;
585 
586 	rc = sja1105_ptp_commit(ds, &cmd, SPI_READ);
587 	if (rc < 0)
588 		return rc;
589 
590 	if (cmd.ptpstrtsch == 1)
591 		/* Schedule successfully started */
592 		tas_data->state = SJA1105_TAS_STATE_RUNNING;
593 	else if (cmd.ptpstopsch == 1)
594 		/* Schedule is stopped */
595 		tas_data->state = SJA1105_TAS_STATE_DISABLED;
596 	else
597 		/* Schedule is probably not configured with PTP clock source */
598 		rc = -EINVAL;
599 
600 	return rc;
601 }
602 
603 /* Write to PTPCLKCORP */
604 static int sja1105_tas_adjust_drift(struct sja1105_private *priv,
605 				    u64 correction)
606 {
607 	const struct sja1105_regs *regs = priv->info->regs;
608 	u32 ptpclkcorp = ns_to_sja1105_ticks(correction);
609 
610 	return sja1105_xfer_u32(priv, SPI_WRITE, regs->ptpclkcorp,
611 				&ptpclkcorp, NULL);
612 }
613 
614 /* Write to PTPSCHTM */
615 static int sja1105_tas_set_base_time(struct sja1105_private *priv,
616 				     u64 base_time)
617 {
618 	const struct sja1105_regs *regs = priv->info->regs;
619 	u64 ptpschtm = ns_to_sja1105_ticks(base_time);
620 
621 	return sja1105_xfer_u64(priv, SPI_WRITE, regs->ptpschtm,
622 				&ptpschtm, NULL);
623 }
624 
625 static int sja1105_tas_start(struct sja1105_private *priv)
626 {
627 	struct sja1105_tas_data *tas_data = &priv->tas_data;
628 	struct sja1105_ptp_cmd *cmd = &priv->ptp_data.cmd;
629 	struct dsa_switch *ds = priv->ds;
630 	int rc;
631 
632 	dev_dbg(ds->dev, "Starting the TAS\n");
633 
634 	if (tas_data->state == SJA1105_TAS_STATE_ENABLED_NOT_RUNNING ||
635 	    tas_data->state == SJA1105_TAS_STATE_RUNNING) {
636 		dev_err(ds->dev, "TAS already started\n");
637 		return -EINVAL;
638 	}
639 
640 	cmd->ptpstrtsch = 1;
641 	cmd->ptpstopsch = 0;
642 
643 	rc = sja1105_ptp_commit(ds, cmd, SPI_WRITE);
644 	if (rc < 0)
645 		return rc;
646 
647 	tas_data->state = SJA1105_TAS_STATE_ENABLED_NOT_RUNNING;
648 
649 	return 0;
650 }
651 
652 static int sja1105_tas_stop(struct sja1105_private *priv)
653 {
654 	struct sja1105_tas_data *tas_data = &priv->tas_data;
655 	struct sja1105_ptp_cmd *cmd = &priv->ptp_data.cmd;
656 	struct dsa_switch *ds = priv->ds;
657 	int rc;
658 
659 	dev_dbg(ds->dev, "Stopping the TAS\n");
660 
661 	if (tas_data->state == SJA1105_TAS_STATE_DISABLED) {
662 		dev_err(ds->dev, "TAS already disabled\n");
663 		return -EINVAL;
664 	}
665 
666 	cmd->ptpstopsch = 1;
667 	cmd->ptpstrtsch = 0;
668 
669 	rc = sja1105_ptp_commit(ds, cmd, SPI_WRITE);
670 	if (rc < 0)
671 		return rc;
672 
673 	tas_data->state = SJA1105_TAS_STATE_DISABLED;
674 
675 	return 0;
676 }
677 
678 /* The schedule engine and the PTP clock are driven by the same oscillator, and
679  * they run in parallel. But whilst the PTP clock can keep an absolute
680  * time-of-day, the schedule engine is only running in 'ticks' (25 ticks make
681  * up a delta, which is 200ns), and wrapping around at the end of each cycle.
682  * The schedule engine is started when the PTP clock reaches the PTPSCHTM time
683  * (in PTP domain).
684  * Because the PTP clock can be rate-corrected (accelerated or slowed down) by
685  * a software servo, and the schedule engine clock runs in parallel to the PTP
686  * clock, there is logic internal to the switch that periodically keeps the
687  * schedule engine from drifting away. The frequency with which this internal
688  * syntonization happens is the PTP clock correction period (PTPCLKCORP). It is
689  * a value also in the PTP clock domain, and is also rate-corrected.
690  * To be precise, during a correction period, there is logic to determine by
691  * how many scheduler clock ticks has the PTP clock drifted. At the end of each
692  * correction period/beginning of new one, the length of a delta is shrunk or
693  * expanded with an integer number of ticks, compared with the typical 25.
694  * So a delta lasts for 200ns (or 25 ticks) only on average.
695  * Sometimes it is longer, sometimes it is shorter. The internal syntonization
696  * logic can adjust for at most 5 ticks each 20 ticks.
697  *
698  * The first implication is that you should choose your schedule correction
699  * period to be an integer multiple of the schedule length. Preferably one.
700  * In case there are schedules of multiple ports active, then the correction
701  * period needs to be a multiple of them all. Given the restriction that the
702  * cycle times have to be multiples of one another anyway, this means the
703  * correction period can simply be the largest cycle time, hence the current
704  * choice. This way, the updates are always synchronous to the transmission
705  * cycle, and therefore predictable.
706  *
707  * The second implication is that at the beginning of a correction period, the
708  * first few deltas will be modulated in time, until the schedule engine is
709  * properly phase-aligned with the PTP clock. For this reason, you should place
710  * your best-effort traffic at the beginning of a cycle, and your
711  * time-triggered traffic afterwards.
712  *
713  * The third implication is that once the schedule engine is started, it can
714  * only adjust for so much drift within a correction period. In the servo you
715  * can only change the PTPCLKRATE, but not step the clock (PTPCLKADD). If you
716  * want to do the latter, you need to stop and restart the schedule engine,
717  * which is what the state machine handles.
718  */
719 static void sja1105_tas_state_machine(struct work_struct *work)
720 {
721 	struct sja1105_tas_data *tas_data = work_to_sja1105_tas(work);
722 	struct sja1105_private *priv = tas_to_sja1105(tas_data);
723 	struct sja1105_ptp_data *ptp_data = &priv->ptp_data;
724 	struct timespec64 base_time_ts, now_ts;
725 	struct dsa_switch *ds = priv->ds;
726 	struct timespec64 diff;
727 	s64 base_time, now;
728 	int rc = 0;
729 
730 	mutex_lock(&ptp_data->lock);
731 
732 	switch (tas_data->state) {
733 	case SJA1105_TAS_STATE_DISABLED:
734 		/* Can't do anything at all if clock is still being stepped */
735 		if (tas_data->last_op != SJA1105_PTP_ADJUSTFREQ)
736 			break;
737 
738 		rc = sja1105_tas_adjust_drift(priv, tas_data->max_cycle_time);
739 		if (rc < 0)
740 			break;
741 
742 		rc = __sja1105_ptp_gettimex(ds, &now, NULL);
743 		if (rc < 0)
744 			break;
745 
746 		/* Plan to start the earliest schedule first. The others
747 		 * will be started in hardware, by way of their respective
748 		 * entry points delta.
749 		 * Try our best to avoid fringe cases (race condition between
750 		 * ptpschtm and ptpstrtsch) by pushing the oper_base_time at
751 		 * least one second in the future from now. This is not ideal,
752 		 * but this only needs to buy us time until the
753 		 * sja1105_tas_start command below gets executed.
754 		 */
755 		base_time = future_base_time(tas_data->earliest_base_time,
756 					     tas_data->max_cycle_time,
757 					     now + 1ull * NSEC_PER_SEC);
758 		base_time -= sja1105_delta_to_ns(1);
759 
760 		rc = sja1105_tas_set_base_time(priv, base_time);
761 		if (rc < 0)
762 			break;
763 
764 		tas_data->oper_base_time = base_time;
765 
766 		rc = sja1105_tas_start(priv);
767 		if (rc < 0)
768 			break;
769 
770 		base_time_ts = ns_to_timespec64(base_time);
771 		now_ts = ns_to_timespec64(now);
772 
773 		dev_dbg(ds->dev, "OPER base time %lld.%09ld (now %lld.%09ld)\n",
774 			base_time_ts.tv_sec, base_time_ts.tv_nsec,
775 			now_ts.tv_sec, now_ts.tv_nsec);
776 
777 		break;
778 
779 	case SJA1105_TAS_STATE_ENABLED_NOT_RUNNING:
780 		if (tas_data->last_op != SJA1105_PTP_ADJUSTFREQ) {
781 			/* Clock was stepped.. bad news for TAS */
782 			sja1105_tas_stop(priv);
783 			break;
784 		}
785 
786 		/* Check if TAS has actually started, by comparing the
787 		 * scheduled start time with the SJA1105 PTP clock
788 		 */
789 		rc = __sja1105_ptp_gettimex(ds, &now, NULL);
790 		if (rc < 0)
791 			break;
792 
793 		if (now < tas_data->oper_base_time) {
794 			/* TAS has not started yet */
795 			diff = ns_to_timespec64(tas_data->oper_base_time - now);
796 			dev_dbg(ds->dev, "time to start: [%lld.%09ld]",
797 				diff.tv_sec, diff.tv_nsec);
798 			break;
799 		}
800 
801 		/* Time elapsed, what happened? */
802 		rc = sja1105_tas_check_running(priv);
803 		if (rc < 0)
804 			break;
805 
806 		if (tas_data->state != SJA1105_TAS_STATE_RUNNING)
807 			/* TAS has started */
808 			dev_err(ds->dev,
809 				"TAS not started despite time elapsed\n");
810 
811 		break;
812 
813 	case SJA1105_TAS_STATE_RUNNING:
814 		/* Clock was stepped.. bad news for TAS */
815 		if (tas_data->last_op != SJA1105_PTP_ADJUSTFREQ) {
816 			sja1105_tas_stop(priv);
817 			break;
818 		}
819 
820 		rc = sja1105_tas_check_running(priv);
821 		if (rc < 0)
822 			break;
823 
824 		if (tas_data->state != SJA1105_TAS_STATE_RUNNING)
825 			dev_err(ds->dev, "TAS surprisingly stopped\n");
826 
827 		break;
828 
829 	default:
830 		if (net_ratelimit())
831 			dev_err(ds->dev, "TAS in an invalid state (incorrect use of API)!\n");
832 	}
833 
834 	if (rc && net_ratelimit())
835 		dev_err(ds->dev, "An operation returned %d\n", rc);
836 
837 	mutex_unlock(&ptp_data->lock);
838 }
839 
840 void sja1105_tas_clockstep(struct dsa_switch *ds)
841 {
842 	struct sja1105_private *priv = ds->priv;
843 	struct sja1105_tas_data *tas_data = &priv->tas_data;
844 
845 	if (!tas_data->enabled)
846 		return;
847 
848 	tas_data->last_op = SJA1105_PTP_CLOCKSTEP;
849 	schedule_work(&tas_data->tas_work);
850 }
851 
852 void sja1105_tas_adjfreq(struct dsa_switch *ds)
853 {
854 	struct sja1105_private *priv = ds->priv;
855 	struct sja1105_tas_data *tas_data = &priv->tas_data;
856 
857 	if (!tas_data->enabled)
858 		return;
859 
860 	/* No reason to schedule the workqueue, nothing changed */
861 	if (tas_data->state == SJA1105_TAS_STATE_RUNNING)
862 		return;
863 
864 	tas_data->last_op = SJA1105_PTP_ADJUSTFREQ;
865 	schedule_work(&tas_data->tas_work);
866 }
867 
868 void sja1105_tas_setup(struct dsa_switch *ds)
869 {
870 	struct sja1105_private *priv = ds->priv;
871 	struct sja1105_tas_data *tas_data = &priv->tas_data;
872 
873 	INIT_WORK(&tas_data->tas_work, sja1105_tas_state_machine);
874 	tas_data->state = SJA1105_TAS_STATE_DISABLED;
875 	tas_data->last_op = SJA1105_PTP_NONE;
876 
877 	INIT_LIST_HEAD(&tas_data->gating_cfg.entries);
878 }
879 
880 void sja1105_tas_teardown(struct dsa_switch *ds)
881 {
882 	struct sja1105_private *priv = ds->priv;
883 	struct tc_taprio_qopt_offload *offload;
884 	int port;
885 
886 	cancel_work_sync(&priv->tas_data.tas_work);
887 
888 	for (port = 0; port < SJA1105_NUM_PORTS; port++) {
889 		offload = priv->tas_data.offload[port];
890 		if (!offload)
891 			continue;
892 
893 		taprio_offload_free(offload);
894 	}
895 }
896