xref: /openbmc/linux/drivers/net/dsa/sja1105/sja1105_tas.c (revision a03a91bd68cb00c615e602cf605e6be12bedaa90)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2019, Vladimir Oltean <olteanv@gmail.com>
3  */
4 #include "sja1105.h"
5 
6 #define SJA1105_TAS_CLKSRC_DISABLED	0
7 #define SJA1105_TAS_CLKSRC_STANDALONE	1
8 #define SJA1105_TAS_CLKSRC_AS6802	2
9 #define SJA1105_TAS_CLKSRC_PTP		3
10 #define SJA1105_GATE_MASK		GENMASK_ULL(SJA1105_NUM_TC - 1, 0)
11 
12 #define work_to_sja1105_tas(d) \
13 	container_of((d), struct sja1105_tas_data, tas_work)
14 #define tas_to_sja1105(d) \
15 	container_of((d), struct sja1105_private, tas_data)
16 
17 static int sja1105_tas_set_runtime_params(struct sja1105_private *priv)
18 {
19 	struct sja1105_tas_data *tas_data = &priv->tas_data;
20 	struct sja1105_gating_config *gating_cfg = &tas_data->gating_cfg;
21 	struct dsa_switch *ds = priv->ds;
22 	s64 earliest_base_time = S64_MAX;
23 	s64 latest_base_time = 0;
24 	s64 its_cycle_time = 0;
25 	s64 max_cycle_time = 0;
26 	int port;
27 
28 	tas_data->enabled = false;
29 
30 	for (port = 0; port < ds->num_ports; port++) {
31 		const struct tc_taprio_qopt_offload *offload;
32 
33 		offload = tas_data->offload[port];
34 		if (!offload)
35 			continue;
36 
37 		tas_data->enabled = true;
38 
39 		if (max_cycle_time < offload->cycle_time)
40 			max_cycle_time = offload->cycle_time;
41 		if (latest_base_time < offload->base_time)
42 			latest_base_time = offload->base_time;
43 		if (earliest_base_time > offload->base_time) {
44 			earliest_base_time = offload->base_time;
45 			its_cycle_time = offload->cycle_time;
46 		}
47 	}
48 
49 	if (!list_empty(&gating_cfg->entries)) {
50 		tas_data->enabled = true;
51 
52 		if (max_cycle_time < gating_cfg->cycle_time)
53 			max_cycle_time = gating_cfg->cycle_time;
54 		if (latest_base_time < gating_cfg->base_time)
55 			latest_base_time = gating_cfg->base_time;
56 		if (earliest_base_time > gating_cfg->base_time) {
57 			earliest_base_time = gating_cfg->base_time;
58 			its_cycle_time = gating_cfg->cycle_time;
59 		}
60 	}
61 
62 	if (!tas_data->enabled)
63 		return 0;
64 
65 	/* Roll the earliest base time over until it is in a comparable
66 	 * time base with the latest, then compare their deltas.
67 	 * We want to enforce that all ports' base times are within
68 	 * SJA1105_TAS_MAX_DELTA 200ns cycles of one another.
69 	 */
70 	earliest_base_time = future_base_time(earliest_base_time,
71 					      its_cycle_time,
72 					      latest_base_time);
73 	while (earliest_base_time > latest_base_time)
74 		earliest_base_time -= its_cycle_time;
75 	if (latest_base_time - earliest_base_time >
76 	    sja1105_delta_to_ns(SJA1105_TAS_MAX_DELTA)) {
77 		dev_err(ds->dev,
78 			"Base times too far apart: min %llu max %llu\n",
79 			earliest_base_time, latest_base_time);
80 		return -ERANGE;
81 	}
82 
83 	tas_data->earliest_base_time = earliest_base_time;
84 	tas_data->max_cycle_time = max_cycle_time;
85 
86 	dev_dbg(ds->dev, "earliest base time %lld ns\n", earliest_base_time);
87 	dev_dbg(ds->dev, "latest base time %lld ns\n", latest_base_time);
88 	dev_dbg(ds->dev, "longest cycle time %lld ns\n", max_cycle_time);
89 
90 	return 0;
91 }
92 
93 /* Lo and behold: the egress scheduler from hell.
94  *
95  * At the hardware level, the Time-Aware Shaper holds a global linear arrray of
96  * all schedule entries for all ports. These are the Gate Control List (GCL)
97  * entries, let's call them "timeslots" for short. This linear array of
98  * timeslots is held in BLK_IDX_SCHEDULE.
99  *
100  * Then there are a maximum of 8 "execution threads" inside the switch, which
101  * iterate cyclically through the "schedule". Each "cycle" has an entry point
102  * and an exit point, both being timeslot indices in the schedule table. The
103  * hardware calls each cycle a "subschedule".
104  *
105  * Subschedule (cycle) i starts when
106  *   ptpclkval >= ptpschtm + BLK_IDX_SCHEDULE_ENTRY_POINTS[i].delta.
107  *
108  * The hardware scheduler iterates BLK_IDX_SCHEDULE with a k ranging from
109  *   k = BLK_IDX_SCHEDULE_ENTRY_POINTS[i].address to
110  *   k = BLK_IDX_SCHEDULE_PARAMS.subscheind[i]
111  *
112  * For each schedule entry (timeslot) k, the engine executes the gate control
113  * list entry for the duration of BLK_IDX_SCHEDULE[k].delta.
114  *
115  *         +---------+
116  *         |         | BLK_IDX_SCHEDULE_ENTRY_POINTS_PARAMS
117  *         +---------+
118  *              |
119  *              +-----------------+
120  *                                | .actsubsch
121  *  BLK_IDX_SCHEDULE_ENTRY_POINTS v
122  *                 +-------+-------+
123  *                 |cycle 0|cycle 1|
124  *                 +-------+-------+
125  *                   |  |      |  |
126  *  +----------------+  |      |  +-------------------------------------+
127  *  |   .subschindx     |      |             .subschindx                |
128  *  |                   |      +---------------+                        |
129  *  |          .address |        .address      |                        |
130  *  |                   |                      |                        |
131  *  |                   |                      |                        |
132  *  |  BLK_IDX_SCHEDULE v                      v                        |
133  *  |              +-------+-------+-------+-------+-------+------+     |
134  *  |              |entry 0|entry 1|entry 2|entry 3|entry 4|entry5|     |
135  *  |              +-------+-------+-------+-------+-------+------+     |
136  *  |                                  ^                    ^  ^  ^     |
137  *  |                                  |                    |  |  |     |
138  *  |        +-------------------------+                    |  |  |     |
139  *  |        |              +-------------------------------+  |  |     |
140  *  |        |              |              +-------------------+  |     |
141  *  |        |              |              |                      |     |
142  *  | +---------------------------------------------------------------+ |
143  *  | |subscheind[0]<=subscheind[1]<=subscheind[2]<=...<=subscheind[7]| |
144  *  | +---------------------------------------------------------------+ |
145  *  |        ^              ^                BLK_IDX_SCHEDULE_PARAMS    |
146  *  |        |              |                                           |
147  *  +--------+              +-------------------------------------------+
148  *
149  *  In the above picture there are two subschedules (cycles):
150  *
151  *  - cycle 0: iterates the schedule table from 0 to 2 (and back)
152  *  - cycle 1: iterates the schedule table from 3 to 5 (and back)
153  *
154  *  All other possible execution threads must be marked as unused by making
155  *  their "subschedule end index" (subscheind) equal to the last valid
156  *  subschedule's end index (in this case 5).
157  */
158 int sja1105_init_scheduling(struct sja1105_private *priv)
159 {
160 	struct sja1105_schedule_entry_points_entry *schedule_entry_points;
161 	struct sja1105_schedule_entry_points_params_entry
162 					*schedule_entry_points_params;
163 	struct sja1105_schedule_params_entry *schedule_params;
164 	struct sja1105_tas_data *tas_data = &priv->tas_data;
165 	struct sja1105_gating_config *gating_cfg = &tas_data->gating_cfg;
166 	struct sja1105_schedule_entry *schedule;
167 	struct dsa_switch *ds = priv->ds;
168 	struct sja1105_table *table;
169 	int schedule_start_idx;
170 	s64 entry_point_delta;
171 	int schedule_end_idx;
172 	int num_entries = 0;
173 	int num_cycles = 0;
174 	int cycle = 0;
175 	int i, k = 0;
176 	int port, rc;
177 
178 	rc = sja1105_tas_set_runtime_params(priv);
179 	if (rc < 0)
180 		return rc;
181 
182 	/* Discard previous Schedule Table */
183 	table = &priv->static_config.tables[BLK_IDX_SCHEDULE];
184 	if (table->entry_count) {
185 		kfree(table->entries);
186 		table->entry_count = 0;
187 	}
188 
189 	/* Discard previous Schedule Entry Points Parameters Table */
190 	table = &priv->static_config.tables[BLK_IDX_SCHEDULE_ENTRY_POINTS_PARAMS];
191 	if (table->entry_count) {
192 		kfree(table->entries);
193 		table->entry_count = 0;
194 	}
195 
196 	/* Discard previous Schedule Parameters Table */
197 	table = &priv->static_config.tables[BLK_IDX_SCHEDULE_PARAMS];
198 	if (table->entry_count) {
199 		kfree(table->entries);
200 		table->entry_count = 0;
201 	}
202 
203 	/* Discard previous Schedule Entry Points Table */
204 	table = &priv->static_config.tables[BLK_IDX_SCHEDULE_ENTRY_POINTS];
205 	if (table->entry_count) {
206 		kfree(table->entries);
207 		table->entry_count = 0;
208 	}
209 
210 	/* Figure out the dimensioning of the problem */
211 	for (port = 0; port < ds->num_ports; port++) {
212 		if (tas_data->offload[port]) {
213 			num_entries += tas_data->offload[port]->num_entries;
214 			num_cycles++;
215 		}
216 	}
217 
218 	if (!list_empty(&gating_cfg->entries)) {
219 		num_entries += gating_cfg->num_entries;
220 		num_cycles++;
221 	}
222 
223 	/* Nothing to do */
224 	if (!num_cycles)
225 		return 0;
226 
227 	/* Pre-allocate space in the static config tables */
228 
229 	/* Schedule Table */
230 	table = &priv->static_config.tables[BLK_IDX_SCHEDULE];
231 	table->entries = kcalloc(num_entries, table->ops->unpacked_entry_size,
232 				 GFP_KERNEL);
233 	if (!table->entries)
234 		return -ENOMEM;
235 	table->entry_count = num_entries;
236 	schedule = table->entries;
237 
238 	/* Schedule Points Parameters Table */
239 	table = &priv->static_config.tables[BLK_IDX_SCHEDULE_ENTRY_POINTS_PARAMS];
240 	table->entries = kcalloc(SJA1105_MAX_SCHEDULE_ENTRY_POINTS_PARAMS_COUNT,
241 				 table->ops->unpacked_entry_size, GFP_KERNEL);
242 	if (!table->entries)
243 		/* Previously allocated memory will be freed automatically in
244 		 * sja1105_static_config_free. This is true for all early
245 		 * returns below.
246 		 */
247 		return -ENOMEM;
248 	table->entry_count = SJA1105_MAX_SCHEDULE_ENTRY_POINTS_PARAMS_COUNT;
249 	schedule_entry_points_params = table->entries;
250 
251 	/* Schedule Parameters Table */
252 	table = &priv->static_config.tables[BLK_IDX_SCHEDULE_PARAMS];
253 	table->entries = kcalloc(SJA1105_MAX_SCHEDULE_PARAMS_COUNT,
254 				 table->ops->unpacked_entry_size, GFP_KERNEL);
255 	if (!table->entries)
256 		return -ENOMEM;
257 	table->entry_count = SJA1105_MAX_SCHEDULE_PARAMS_COUNT;
258 	schedule_params = table->entries;
259 
260 	/* Schedule Entry Points Table */
261 	table = &priv->static_config.tables[BLK_IDX_SCHEDULE_ENTRY_POINTS];
262 	table->entries = kcalloc(num_cycles, table->ops->unpacked_entry_size,
263 				 GFP_KERNEL);
264 	if (!table->entries)
265 		return -ENOMEM;
266 	table->entry_count = num_cycles;
267 	schedule_entry_points = table->entries;
268 
269 	/* Finally start populating the static config tables */
270 	schedule_entry_points_params->clksrc = SJA1105_TAS_CLKSRC_PTP;
271 	schedule_entry_points_params->actsubsch = num_cycles - 1;
272 
273 	for (port = 0; port < ds->num_ports; port++) {
274 		const struct tc_taprio_qopt_offload *offload;
275 		/* Relative base time */
276 		s64 rbt;
277 
278 		offload = tas_data->offload[port];
279 		if (!offload)
280 			continue;
281 
282 		schedule_start_idx = k;
283 		schedule_end_idx = k + offload->num_entries - 1;
284 		/* This is the base time expressed as a number of TAS ticks
285 		 * relative to PTPSCHTM, which we'll (perhaps improperly) call
286 		 * the operational base time.
287 		 */
288 		rbt = future_base_time(offload->base_time,
289 				       offload->cycle_time,
290 				       tas_data->earliest_base_time);
291 		rbt -= tas_data->earliest_base_time;
292 		/* UM10944.pdf 4.2.2. Schedule Entry Points table says that
293 		 * delta cannot be zero, which is shitty. Advance all relative
294 		 * base times by 1 TAS delta, so that even the earliest base
295 		 * time becomes 1 in relative terms. Then start the operational
296 		 * base time (PTPSCHTM) one TAS delta earlier than planned.
297 		 */
298 		entry_point_delta = ns_to_sja1105_delta(rbt) + 1;
299 
300 		schedule_entry_points[cycle].subschindx = cycle;
301 		schedule_entry_points[cycle].delta = entry_point_delta;
302 		schedule_entry_points[cycle].address = schedule_start_idx;
303 
304 		/* The subschedule end indices need to be
305 		 * monotonically increasing.
306 		 */
307 		for (i = cycle; i < 8; i++)
308 			schedule_params->subscheind[i] = schedule_end_idx;
309 
310 		for (i = 0; i < offload->num_entries; i++, k++) {
311 			s64 delta_ns = offload->entries[i].interval;
312 
313 			schedule[k].delta = ns_to_sja1105_delta(delta_ns);
314 			schedule[k].destports = BIT(port);
315 			schedule[k].resmedia_en = true;
316 			schedule[k].resmedia = SJA1105_GATE_MASK &
317 					~offload->entries[i].gate_mask;
318 		}
319 		cycle++;
320 	}
321 
322 	if (!list_empty(&gating_cfg->entries)) {
323 		struct sja1105_gate_entry *e;
324 
325 		/* Relative base time */
326 		s64 rbt;
327 
328 		schedule_start_idx = k;
329 		schedule_end_idx = k + gating_cfg->num_entries - 1;
330 		rbt = future_base_time(gating_cfg->base_time,
331 				       gating_cfg->cycle_time,
332 				       tas_data->earliest_base_time);
333 		rbt -= tas_data->earliest_base_time;
334 		entry_point_delta = ns_to_sja1105_delta(rbt) + 1;
335 
336 		schedule_entry_points[cycle].subschindx = cycle;
337 		schedule_entry_points[cycle].delta = entry_point_delta;
338 		schedule_entry_points[cycle].address = schedule_start_idx;
339 
340 		for (i = cycle; i < 8; i++)
341 			schedule_params->subscheind[i] = schedule_end_idx;
342 
343 		list_for_each_entry(e, &gating_cfg->entries, list) {
344 			schedule[k].delta = ns_to_sja1105_delta(e->interval);
345 			schedule[k].destports = e->rule->vl.destports;
346 			schedule[k].setvalid = true;
347 			schedule[k].txen = true;
348 			schedule[k].vlindex = e->rule->vl.sharindx;
349 			schedule[k].winstindex = e->rule->vl.sharindx;
350 			if (e->gate_state) /* Gate open */
351 				schedule[k].winst = true;
352 			else /* Gate closed */
353 				schedule[k].winend = true;
354 			k++;
355 		}
356 	}
357 
358 	return 0;
359 }
360 
361 /* Be there 2 port subschedules, each executing an arbitrary number of gate
362  * open/close events cyclically.
363  * None of those gate events must ever occur at the exact same time, otherwise
364  * the switch is known to act in exotically strange ways.
365  * However the hardware doesn't bother performing these integrity checks.
366  * So here we are with the task of validating whether the new @admin offload
367  * has any conflict with the already established TAS configuration in
368  * tas_data->offload.  We already know the other ports are in harmony with one
369  * another, otherwise we wouldn't have saved them.
370  * Each gate event executes periodically, with a period of @cycle_time and a
371  * phase given by its cycle's @base_time plus its offset within the cycle
372  * (which in turn is given by the length of the events prior to it).
373  * There are two aspects to possible collisions:
374  * - Collisions within one cycle's (actually the longest cycle's) time frame.
375  *   For that, we need to compare the cartesian product of each possible
376  *   occurrence of each event within one cycle time.
377  * - Collisions in the future. Events may not collide within one cycle time,
378  *   but if two port schedules don't have the same periodicity (aka the cycle
379  *   times aren't multiples of one another), they surely will some time in the
380  *   future (actually they will collide an infinite amount of times).
381  */
382 static bool
383 sja1105_tas_check_conflicts(struct sja1105_private *priv, int port,
384 			    const struct tc_taprio_qopt_offload *admin)
385 {
386 	struct sja1105_tas_data *tas_data = &priv->tas_data;
387 	const struct tc_taprio_qopt_offload *offload;
388 	s64 max_cycle_time, min_cycle_time;
389 	s64 delta1, delta2;
390 	s64 rbt1, rbt2;
391 	s64 stop_time;
392 	s64 t1, t2;
393 	int i, j;
394 	s32 rem;
395 
396 	offload = tas_data->offload[port];
397 	if (!offload)
398 		return false;
399 
400 	/* Check if the two cycle times are multiples of one another.
401 	 * If they aren't, then they will surely collide.
402 	 */
403 	max_cycle_time = max(offload->cycle_time, admin->cycle_time);
404 	min_cycle_time = min(offload->cycle_time, admin->cycle_time);
405 	div_s64_rem(max_cycle_time, min_cycle_time, &rem);
406 	if (rem)
407 		return true;
408 
409 	/* Calculate the "reduced" base time of each of the two cycles
410 	 * (transposed back as close to 0 as possible) by dividing to
411 	 * the cycle time.
412 	 */
413 	div_s64_rem(offload->base_time, offload->cycle_time, &rem);
414 	rbt1 = rem;
415 
416 	div_s64_rem(admin->base_time, admin->cycle_time, &rem);
417 	rbt2 = rem;
418 
419 	stop_time = max_cycle_time + max(rbt1, rbt2);
420 
421 	/* delta1 is the relative base time of each GCL entry within
422 	 * the established ports' TAS config.
423 	 */
424 	for (i = 0, delta1 = 0;
425 	     i < offload->num_entries;
426 	     delta1 += offload->entries[i].interval, i++) {
427 		/* delta2 is the relative base time of each GCL entry
428 		 * within the newly added TAS config.
429 		 */
430 		for (j = 0, delta2 = 0;
431 		     j < admin->num_entries;
432 		     delta2 += admin->entries[j].interval, j++) {
433 			/* t1 follows all possible occurrences of the
434 			 * established ports' GCL entry i within the
435 			 * first cycle time.
436 			 */
437 			for (t1 = rbt1 + delta1;
438 			     t1 <= stop_time;
439 			     t1 += offload->cycle_time) {
440 				/* t2 follows all possible occurrences
441 				 * of the newly added GCL entry j
442 				 * within the first cycle time.
443 				 */
444 				for (t2 = rbt2 + delta2;
445 				     t2 <= stop_time;
446 				     t2 += admin->cycle_time) {
447 					if (t1 == t2) {
448 						dev_warn(priv->ds->dev,
449 							 "GCL entry %d collides with entry %d of port %d\n",
450 							 j, i, port);
451 						return true;
452 					}
453 				}
454 			}
455 		}
456 	}
457 
458 	return false;
459 }
460 
461 /* Check the tc-taprio configuration on @port for conflicts with the tc-gate
462  * global subschedule. If @port is -1, check it against all ports.
463  * To reuse the sja1105_tas_check_conflicts logic without refactoring it,
464  * convert the gating configuration to a dummy tc-taprio offload structure.
465  */
466 bool sja1105_gating_check_conflicts(struct sja1105_private *priv, int port,
467 				    struct netlink_ext_ack *extack)
468 {
469 	struct sja1105_gating_config *gating_cfg = &priv->tas_data.gating_cfg;
470 	size_t num_entries = gating_cfg->num_entries;
471 	struct tc_taprio_qopt_offload *dummy;
472 	struct dsa_switch *ds = priv->ds;
473 	struct sja1105_gate_entry *e;
474 	bool conflict;
475 	int i = 0;
476 
477 	if (list_empty(&gating_cfg->entries))
478 		return false;
479 
480 	dummy = kzalloc(struct_size(dummy, entries, num_entries), GFP_KERNEL);
481 	if (!dummy) {
482 		NL_SET_ERR_MSG_MOD(extack, "Failed to allocate memory");
483 		return true;
484 	}
485 
486 	dummy->num_entries = num_entries;
487 	dummy->base_time = gating_cfg->base_time;
488 	dummy->cycle_time = gating_cfg->cycle_time;
489 
490 	list_for_each_entry(e, &gating_cfg->entries, list)
491 		dummy->entries[i++].interval = e->interval;
492 
493 	if (port != -1) {
494 		conflict = sja1105_tas_check_conflicts(priv, port, dummy);
495 	} else {
496 		for (port = 0; port < ds->num_ports; port++) {
497 			conflict = sja1105_tas_check_conflicts(priv, port,
498 							       dummy);
499 			if (conflict)
500 				break;
501 		}
502 	}
503 
504 	kfree(dummy);
505 
506 	return conflict;
507 }
508 
509 int sja1105_setup_tc_taprio(struct dsa_switch *ds, int port,
510 			    struct tc_taprio_qopt_offload *admin)
511 {
512 	struct sja1105_private *priv = ds->priv;
513 	struct sja1105_tas_data *tas_data = &priv->tas_data;
514 	int other_port, rc, i;
515 
516 	/* Can't change an already configured port (must delete qdisc first).
517 	 * Can't delete the qdisc from an unconfigured port.
518 	 */
519 	if ((!!tas_data->offload[port] && admin->cmd == TAPRIO_CMD_REPLACE) ||
520 	    (!tas_data->offload[port] && admin->cmd == TAPRIO_CMD_DESTROY))
521 		return -EINVAL;
522 
523 	if (admin->cmd == TAPRIO_CMD_DESTROY) {
524 		taprio_offload_free(tas_data->offload[port]);
525 		tas_data->offload[port] = NULL;
526 
527 		rc = sja1105_init_scheduling(priv);
528 		if (rc < 0)
529 			return rc;
530 
531 		return sja1105_static_config_reload(priv, SJA1105_SCHEDULING);
532 	} else if (admin->cmd != TAPRIO_CMD_REPLACE) {
533 		return -EOPNOTSUPP;
534 	}
535 
536 	/* The cycle time extension is the amount of time the last cycle from
537 	 * the old OPER needs to be extended in order to phase-align with the
538 	 * base time of the ADMIN when that becomes the new OPER.
539 	 * But of course our switch needs to be reset to switch-over between
540 	 * the ADMIN and the OPER configs - so much for a seamless transition.
541 	 * So don't add insult over injury and just say we don't support cycle
542 	 * time extension.
543 	 */
544 	if (admin->cycle_time_extension)
545 		return -ENOTSUPP;
546 
547 	for (i = 0; i < admin->num_entries; i++) {
548 		s64 delta_ns = admin->entries[i].interval;
549 		s64 delta_cycles = ns_to_sja1105_delta(delta_ns);
550 		bool too_long, too_short;
551 
552 		too_long = (delta_cycles >= SJA1105_TAS_MAX_DELTA);
553 		too_short = (delta_cycles == 0);
554 		if (too_long || too_short) {
555 			dev_err(priv->ds->dev,
556 				"Interval %llu too %s for GCL entry %d\n",
557 				delta_ns, too_long ? "long" : "short", i);
558 			return -ERANGE;
559 		}
560 	}
561 
562 	for (other_port = 0; other_port < ds->num_ports; other_port++) {
563 		if (other_port == port)
564 			continue;
565 
566 		if (sja1105_tas_check_conflicts(priv, other_port, admin))
567 			return -ERANGE;
568 	}
569 
570 	if (sja1105_gating_check_conflicts(priv, port, NULL)) {
571 		dev_err(ds->dev, "Conflict with tc-gate schedule\n");
572 		return -ERANGE;
573 	}
574 
575 	tas_data->offload[port] = taprio_offload_get(admin);
576 
577 	rc = sja1105_init_scheduling(priv);
578 	if (rc < 0)
579 		return rc;
580 
581 	return sja1105_static_config_reload(priv, SJA1105_SCHEDULING);
582 }
583 
584 static int sja1105_tas_check_running(struct sja1105_private *priv)
585 {
586 	struct sja1105_tas_data *tas_data = &priv->tas_data;
587 	struct dsa_switch *ds = priv->ds;
588 	struct sja1105_ptp_cmd cmd = {0};
589 	int rc;
590 
591 	rc = sja1105_ptp_commit(ds, &cmd, SPI_READ);
592 	if (rc < 0)
593 		return rc;
594 
595 	if (cmd.ptpstrtsch == 1)
596 		/* Schedule successfully started */
597 		tas_data->state = SJA1105_TAS_STATE_RUNNING;
598 	else if (cmd.ptpstopsch == 1)
599 		/* Schedule is stopped */
600 		tas_data->state = SJA1105_TAS_STATE_DISABLED;
601 	else
602 		/* Schedule is probably not configured with PTP clock source */
603 		rc = -EINVAL;
604 
605 	return rc;
606 }
607 
608 /* Write to PTPCLKCORP */
609 static int sja1105_tas_adjust_drift(struct sja1105_private *priv,
610 				    u64 correction)
611 {
612 	const struct sja1105_regs *regs = priv->info->regs;
613 	u32 ptpclkcorp = ns_to_sja1105_ticks(correction);
614 
615 	return sja1105_xfer_u32(priv, SPI_WRITE, regs->ptpclkcorp,
616 				&ptpclkcorp, NULL);
617 }
618 
619 /* Write to PTPSCHTM */
620 static int sja1105_tas_set_base_time(struct sja1105_private *priv,
621 				     u64 base_time)
622 {
623 	const struct sja1105_regs *regs = priv->info->regs;
624 	u64 ptpschtm = ns_to_sja1105_ticks(base_time);
625 
626 	return sja1105_xfer_u64(priv, SPI_WRITE, regs->ptpschtm,
627 				&ptpschtm, NULL);
628 }
629 
630 static int sja1105_tas_start(struct sja1105_private *priv)
631 {
632 	struct sja1105_tas_data *tas_data = &priv->tas_data;
633 	struct sja1105_ptp_cmd *cmd = &priv->ptp_data.cmd;
634 	struct dsa_switch *ds = priv->ds;
635 	int rc;
636 
637 	dev_dbg(ds->dev, "Starting the TAS\n");
638 
639 	if (tas_data->state == SJA1105_TAS_STATE_ENABLED_NOT_RUNNING ||
640 	    tas_data->state == SJA1105_TAS_STATE_RUNNING) {
641 		dev_err(ds->dev, "TAS already started\n");
642 		return -EINVAL;
643 	}
644 
645 	cmd->ptpstrtsch = 1;
646 	cmd->ptpstopsch = 0;
647 
648 	rc = sja1105_ptp_commit(ds, cmd, SPI_WRITE);
649 	if (rc < 0)
650 		return rc;
651 
652 	tas_data->state = SJA1105_TAS_STATE_ENABLED_NOT_RUNNING;
653 
654 	return 0;
655 }
656 
657 static int sja1105_tas_stop(struct sja1105_private *priv)
658 {
659 	struct sja1105_tas_data *tas_data = &priv->tas_data;
660 	struct sja1105_ptp_cmd *cmd = &priv->ptp_data.cmd;
661 	struct dsa_switch *ds = priv->ds;
662 	int rc;
663 
664 	dev_dbg(ds->dev, "Stopping the TAS\n");
665 
666 	if (tas_data->state == SJA1105_TAS_STATE_DISABLED) {
667 		dev_err(ds->dev, "TAS already disabled\n");
668 		return -EINVAL;
669 	}
670 
671 	cmd->ptpstopsch = 1;
672 	cmd->ptpstrtsch = 0;
673 
674 	rc = sja1105_ptp_commit(ds, cmd, SPI_WRITE);
675 	if (rc < 0)
676 		return rc;
677 
678 	tas_data->state = SJA1105_TAS_STATE_DISABLED;
679 
680 	return 0;
681 }
682 
683 /* The schedule engine and the PTP clock are driven by the same oscillator, and
684  * they run in parallel. But whilst the PTP clock can keep an absolute
685  * time-of-day, the schedule engine is only running in 'ticks' (25 ticks make
686  * up a delta, which is 200ns), and wrapping around at the end of each cycle.
687  * The schedule engine is started when the PTP clock reaches the PTPSCHTM time
688  * (in PTP domain).
689  * Because the PTP clock can be rate-corrected (accelerated or slowed down) by
690  * a software servo, and the schedule engine clock runs in parallel to the PTP
691  * clock, there is logic internal to the switch that periodically keeps the
692  * schedule engine from drifting away. The frequency with which this internal
693  * syntonization happens is the PTP clock correction period (PTPCLKCORP). It is
694  * a value also in the PTP clock domain, and is also rate-corrected.
695  * To be precise, during a correction period, there is logic to determine by
696  * how many scheduler clock ticks has the PTP clock drifted. At the end of each
697  * correction period/beginning of new one, the length of a delta is shrunk or
698  * expanded with an integer number of ticks, compared with the typical 25.
699  * So a delta lasts for 200ns (or 25 ticks) only on average.
700  * Sometimes it is longer, sometimes it is shorter. The internal syntonization
701  * logic can adjust for at most 5 ticks each 20 ticks.
702  *
703  * The first implication is that you should choose your schedule correction
704  * period to be an integer multiple of the schedule length. Preferably one.
705  * In case there are schedules of multiple ports active, then the correction
706  * period needs to be a multiple of them all. Given the restriction that the
707  * cycle times have to be multiples of one another anyway, this means the
708  * correction period can simply be the largest cycle time, hence the current
709  * choice. This way, the updates are always synchronous to the transmission
710  * cycle, and therefore predictable.
711  *
712  * The second implication is that at the beginning of a correction period, the
713  * first few deltas will be modulated in time, until the schedule engine is
714  * properly phase-aligned with the PTP clock. For this reason, you should place
715  * your best-effort traffic at the beginning of a cycle, and your
716  * time-triggered traffic afterwards.
717  *
718  * The third implication is that once the schedule engine is started, it can
719  * only adjust for so much drift within a correction period. In the servo you
720  * can only change the PTPCLKRATE, but not step the clock (PTPCLKADD). If you
721  * want to do the latter, you need to stop and restart the schedule engine,
722  * which is what the state machine handles.
723  */
724 static void sja1105_tas_state_machine(struct work_struct *work)
725 {
726 	struct sja1105_tas_data *tas_data = work_to_sja1105_tas(work);
727 	struct sja1105_private *priv = tas_to_sja1105(tas_data);
728 	struct sja1105_ptp_data *ptp_data = &priv->ptp_data;
729 	struct timespec64 base_time_ts, now_ts;
730 	struct dsa_switch *ds = priv->ds;
731 	struct timespec64 diff;
732 	s64 base_time, now;
733 	int rc = 0;
734 
735 	mutex_lock(&ptp_data->lock);
736 
737 	switch (tas_data->state) {
738 	case SJA1105_TAS_STATE_DISABLED:
739 		/* Can't do anything at all if clock is still being stepped */
740 		if (tas_data->last_op != SJA1105_PTP_ADJUSTFREQ)
741 			break;
742 
743 		rc = sja1105_tas_adjust_drift(priv, tas_data->max_cycle_time);
744 		if (rc < 0)
745 			break;
746 
747 		rc = __sja1105_ptp_gettimex(ds, &now, NULL);
748 		if (rc < 0)
749 			break;
750 
751 		/* Plan to start the earliest schedule first. The others
752 		 * will be started in hardware, by way of their respective
753 		 * entry points delta.
754 		 * Try our best to avoid fringe cases (race condition between
755 		 * ptpschtm and ptpstrtsch) by pushing the oper_base_time at
756 		 * least one second in the future from now. This is not ideal,
757 		 * but this only needs to buy us time until the
758 		 * sja1105_tas_start command below gets executed.
759 		 */
760 		base_time = future_base_time(tas_data->earliest_base_time,
761 					     tas_data->max_cycle_time,
762 					     now + 1ull * NSEC_PER_SEC);
763 		base_time -= sja1105_delta_to_ns(1);
764 
765 		rc = sja1105_tas_set_base_time(priv, base_time);
766 		if (rc < 0)
767 			break;
768 
769 		tas_data->oper_base_time = base_time;
770 
771 		rc = sja1105_tas_start(priv);
772 		if (rc < 0)
773 			break;
774 
775 		base_time_ts = ns_to_timespec64(base_time);
776 		now_ts = ns_to_timespec64(now);
777 
778 		dev_dbg(ds->dev, "OPER base time %lld.%09ld (now %lld.%09ld)\n",
779 			base_time_ts.tv_sec, base_time_ts.tv_nsec,
780 			now_ts.tv_sec, now_ts.tv_nsec);
781 
782 		break;
783 
784 	case SJA1105_TAS_STATE_ENABLED_NOT_RUNNING:
785 		if (tas_data->last_op != SJA1105_PTP_ADJUSTFREQ) {
786 			/* Clock was stepped.. bad news for TAS */
787 			sja1105_tas_stop(priv);
788 			break;
789 		}
790 
791 		/* Check if TAS has actually started, by comparing the
792 		 * scheduled start time with the SJA1105 PTP clock
793 		 */
794 		rc = __sja1105_ptp_gettimex(ds, &now, NULL);
795 		if (rc < 0)
796 			break;
797 
798 		if (now < tas_data->oper_base_time) {
799 			/* TAS has not started yet */
800 			diff = ns_to_timespec64(tas_data->oper_base_time - now);
801 			dev_dbg(ds->dev, "time to start: [%lld.%09ld]",
802 				diff.tv_sec, diff.tv_nsec);
803 			break;
804 		}
805 
806 		/* Time elapsed, what happened? */
807 		rc = sja1105_tas_check_running(priv);
808 		if (rc < 0)
809 			break;
810 
811 		if (tas_data->state != SJA1105_TAS_STATE_RUNNING)
812 			/* TAS has started */
813 			dev_err(ds->dev,
814 				"TAS not started despite time elapsed\n");
815 
816 		break;
817 
818 	case SJA1105_TAS_STATE_RUNNING:
819 		/* Clock was stepped.. bad news for TAS */
820 		if (tas_data->last_op != SJA1105_PTP_ADJUSTFREQ) {
821 			sja1105_tas_stop(priv);
822 			break;
823 		}
824 
825 		rc = sja1105_tas_check_running(priv);
826 		if (rc < 0)
827 			break;
828 
829 		if (tas_data->state != SJA1105_TAS_STATE_RUNNING)
830 			dev_err(ds->dev, "TAS surprisingly stopped\n");
831 
832 		break;
833 
834 	default:
835 		if (net_ratelimit())
836 			dev_err(ds->dev, "TAS in an invalid state (incorrect use of API)!\n");
837 	}
838 
839 	if (rc && net_ratelimit())
840 		dev_err(ds->dev, "An operation returned %d\n", rc);
841 
842 	mutex_unlock(&ptp_data->lock);
843 }
844 
845 void sja1105_tas_clockstep(struct dsa_switch *ds)
846 {
847 	struct sja1105_private *priv = ds->priv;
848 	struct sja1105_tas_data *tas_data = &priv->tas_data;
849 
850 	if (!tas_data->enabled)
851 		return;
852 
853 	tas_data->last_op = SJA1105_PTP_CLOCKSTEP;
854 	schedule_work(&tas_data->tas_work);
855 }
856 
857 void sja1105_tas_adjfreq(struct dsa_switch *ds)
858 {
859 	struct sja1105_private *priv = ds->priv;
860 	struct sja1105_tas_data *tas_data = &priv->tas_data;
861 
862 	if (!tas_data->enabled)
863 		return;
864 
865 	/* No reason to schedule the workqueue, nothing changed */
866 	if (tas_data->state == SJA1105_TAS_STATE_RUNNING)
867 		return;
868 
869 	tas_data->last_op = SJA1105_PTP_ADJUSTFREQ;
870 	schedule_work(&tas_data->tas_work);
871 }
872 
873 void sja1105_tas_setup(struct dsa_switch *ds)
874 {
875 	struct sja1105_private *priv = ds->priv;
876 	struct sja1105_tas_data *tas_data = &priv->tas_data;
877 
878 	INIT_WORK(&tas_data->tas_work, sja1105_tas_state_machine);
879 	tas_data->state = SJA1105_TAS_STATE_DISABLED;
880 	tas_data->last_op = SJA1105_PTP_NONE;
881 
882 	INIT_LIST_HEAD(&tas_data->gating_cfg.entries);
883 }
884 
885 void sja1105_tas_teardown(struct dsa_switch *ds)
886 {
887 	struct sja1105_private *priv = ds->priv;
888 	struct tc_taprio_qopt_offload *offload;
889 	int port;
890 
891 	cancel_work_sync(&priv->tas_data.tas_work);
892 
893 	for (port = 0; port < ds->num_ports; port++) {
894 		offload = priv->tas_data.offload[port];
895 		if (!offload)
896 			continue;
897 
898 		taprio_offload_free(offload);
899 	}
900 }
901