xref: /openbmc/qemu/include/hw/xen/interface/io/netif.h (revision 8ac98aed)
1 /* SPDX-License-Identifier: MIT */
2 /******************************************************************************
3  * netif.h
4  *
5  * Unified network-device I/O interface for Xen guest OSes.
6  *
7  * Copyright (c) 2003-2004, Keir Fraser
8  */
9 
10 #ifndef __XEN_PUBLIC_IO_NETIF_H__
11 #define __XEN_PUBLIC_IO_NETIF_H__
12 
13 #include "ring.h"
14 #include "../grant_table.h"
15 
16 /*
17  * Older implementation of Xen network frontend / backend has an
18  * implicit dependency on the MAX_SKB_FRAGS as the maximum number of
19  * ring slots a skb can use. Netfront / netback may not work as
20  * expected when frontend and backend have different MAX_SKB_FRAGS.
21  *
22  * A better approach is to add mechanism for netfront / netback to
23  * negotiate this value. However we cannot fix all possible
24  * frontends, so we need to define a value which states the minimum
25  * slots backend must support.
26  *
27  * The minimum value derives from older Linux kernel's MAX_SKB_FRAGS
28  * (18), which is proved to work with most frontends. Any new backend
29  * which doesn't negotiate with frontend should expect frontend to
30  * send a valid packet using slots up to this value.
31  */
32 #define XEN_NETIF_NR_SLOTS_MIN 18
33 
34 /*
35  * Notifications after enqueuing any type of message should be conditional on
36  * the appropriate req_event or rsp_event field in the shared ring.
37  * If the client sends notification for rx requests then it should specify
38  * feature 'feature-rx-notify' via xenbus. Otherwise the backend will assume
39  * that it cannot safely queue packets (as it may not be kicked to send them).
40  */
41 
42 /*
43  * "feature-split-event-channels" is introduced to separate guest TX
44  * and RX notification. Backend either doesn't support this feature or
45  * advertises it via xenstore as 0 (disabled) or 1 (enabled).
46  *
47  * To make use of this feature, frontend should allocate two event
48  * channels for TX and RX, advertise them to backend as
49  * "event-channel-tx" and "event-channel-rx" respectively. If frontend
50  * doesn't want to use this feature, it just writes "event-channel"
51  * node as before.
52  */
53 
54 /*
55  * Multiple transmit and receive queues:
56  * If supported, the backend will write the key "multi-queue-max-queues" to
57  * the directory for that vif, and set its value to the maximum supported
58  * number of queues.
59  * Frontends that are aware of this feature and wish to use it can write the
60  * key "multi-queue-num-queues", set to the number they wish to use, which
61  * must be greater than zero, and no more than the value reported by the backend
62  * in "multi-queue-max-queues".
63  *
64  * Queues replicate the shared rings and event channels.
65  * "feature-split-event-channels" may optionally be used when using
66  * multiple queues, but is not mandatory.
67  *
68  * Each queue consists of one shared ring pair, i.e. there must be the same
69  * number of tx and rx rings.
70  *
71  * For frontends requesting just one queue, the usual event-channel and
72  * ring-ref keys are written as before, simplifying the backend processing
73  * to avoid distinguishing between a frontend that doesn't understand the
74  * multi-queue feature, and one that does, but requested only one queue.
75  *
76  * Frontends requesting two or more queues must not write the toplevel
77  * event-channel (or event-channel-{tx,rx}) and {tx,rx}-ring-ref keys,
78  * instead writing those keys under sub-keys having the name "queue-N" where
79  * N is the integer ID of the queue for which those keys belong. Queues
80  * are indexed from zero. For example, a frontend with two queues and split
81  * event channels must write the following set of queue-related keys:
82  *
83  * /local/domain/1/device/vif/0/multi-queue-num-queues = "2"
84  * /local/domain/1/device/vif/0/queue-0 = ""
85  * /local/domain/1/device/vif/0/queue-0/tx-ring-ref = "<ring-ref-tx0>"
86  * /local/domain/1/device/vif/0/queue-0/rx-ring-ref = "<ring-ref-rx0>"
87  * /local/domain/1/device/vif/0/queue-0/event-channel-tx = "<evtchn-tx0>"
88  * /local/domain/1/device/vif/0/queue-0/event-channel-rx = "<evtchn-rx0>"
89  * /local/domain/1/device/vif/0/queue-1 = ""
90  * /local/domain/1/device/vif/0/queue-1/tx-ring-ref = "<ring-ref-tx1>"
91  * /local/domain/1/device/vif/0/queue-1/rx-ring-ref = "<ring-ref-rx1"
92  * /local/domain/1/device/vif/0/queue-1/event-channel-tx = "<evtchn-tx1>"
93  * /local/domain/1/device/vif/0/queue-1/event-channel-rx = "<evtchn-rx1>"
94  *
95  * If there is any inconsistency in the XenStore data, the backend may
96  * choose not to connect any queues, instead treating the request as an
97  * error. This includes scenarios where more (or fewer) queues were
98  * requested than the frontend provided details for.
99  *
100  * Mapping of packets to queues is considered to be a function of the
101  * transmitting system (backend or frontend) and is not negotiated
102  * between the two. Guests are free to transmit packets on any queue
103  * they choose, provided it has been set up correctly. Guests must be
104  * prepared to receive packets on any queue they have requested be set up.
105  */
106 
107 /*
108  * "feature-no-csum-offload" should be used to turn IPv4 TCP/UDP checksum
109  * offload off or on. If it is missing then the feature is assumed to be on.
110  * "feature-ipv6-csum-offload" should be used to turn IPv6 TCP/UDP checksum
111  * offload on or off. If it is missing then the feature is assumed to be off.
112  */
113 
114 /*
115  * "feature-gso-tcpv4" and "feature-gso-tcpv6" advertise the capability to
116  * handle large TCP packets (in IPv4 or IPv6 form respectively). Neither
117  * frontends nor backends are assumed to be capable unless the flags are
118  * present.
119  */
120 
121 /*
122  * "feature-multicast-control" and "feature-dynamic-multicast-control"
123  * advertise the capability to filter ethernet multicast packets in the
124  * backend. If the frontend wishes to take advantage of this feature then
125  * it may set "request-multicast-control". If the backend only advertises
126  * "feature-multicast-control" then "request-multicast-control" must be set
127  * before the frontend moves into the connected state. The backend will
128  * sample the value on this state transition and any subsequent change in
129  * value will have no effect. However, if the backend also advertises
130  * "feature-dynamic-multicast-control" then "request-multicast-control"
131  * may be set by the frontend at any time. In this case, the backend will
132  * watch the value and re-sample on watch events.
133  *
134  * If the sampled value of "request-multicast-control" is set then the
135  * backend transmit side should no longer flood multicast packets to the
136  * frontend, it should instead drop any multicast packet that does not
137  * match in a filter list.
138  * The list is amended by the frontend by sending dummy transmit requests
139  * containing XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL} extra-info fragments as
140  * specified below.
141  * Note that the filter list may be amended even if the sampled value of
142  * "request-multicast-control" is not set, however the filter should only
143  * be applied if it is set.
144  */
145 
146 /*
147  * The setting of "trusted" node to "0" in the frontend path signals that the
148  * frontend should not trust the backend, and should deploy whatever measures
149  * available to protect from a malicious backend on the other end.
150  */
151 
152 /*
153  * Control ring
154  * ============
155  *
156  * Some features, such as hashing (detailed below), require a
157  * significant amount of out-of-band data to be passed from frontend to
158  * backend. Use of xenstore is not suitable for large quantities of data
159  * because of quota limitations and so a dedicated 'control ring' is used.
160  * The ability of the backend to use a control ring is advertised by
161  * setting:
162  *
163  * /local/domain/X/backend/vif/<domid>/<vif>/feature-ctrl-ring = "1"
164  *
165  * The frontend provides a control ring to the backend by setting:
166  *
167  * /local/domain/<domid>/device/vif/<vif>/ctrl-ring-ref = <gref>
168  * /local/domain/<domid>/device/vif/<vif>/event-channel-ctrl = <port>
169  *
170  * where <gref> is the grant reference of the shared page used to
171  * implement the control ring and <port> is an event channel to be used
172  * as a mailbox interrupt. These keys must be set before the frontend
173  * moves into the connected state.
174  *
175  * The control ring uses a fixed request/response message size and is
176  * balanced (i.e. one request to one response), so operationally it is much
177  * the same as a transmit or receive ring.
178  * Note that there is no requirement that responses are issued in the same
179  * order as requests.
180  */
181 
182 /*
183  * Link state
184  * ==========
185  *
186  * The backend can advertise its current link (carrier) state to the
187  * frontend using the /local/domain/X/backend/vif/<domid>/<vif>/carrier
188  * node. If this node is not present, then the frontend should assume that
189  * the link is up (for compatibility with backends that do not implement
190  * this feature). If this node is present, then a value of "0" should be
191  * interpreted by the frontend as the link being down (no carrier) and a
192  * value of "1" should be interpreted as the link being up (carrier
193  * present).
194  */
195 
196 /*
197  * MTU
198  * ===
199  *
200  * The toolstack may set a value of MTU for the frontend by setting the
201  * /local/domain/<domid>/device/vif/<vif>/mtu node with the MTU value in
202  * octets. If this node is absent the frontend should assume an MTU value
203  * of 1500 octets. A frontend is also at liberty to ignore this value so
204  * it is only suitable for informing the frontend that a packet payload
205  * >1500 octets is permitted.
206  */
207 
208 /*
209  * Hash types
210  * ==========
211  *
212  * For the purposes of the definitions below, 'Packet[]' is an array of
213  * octets containing an IP packet without options, 'Array[X..Y]' means a
214  * sub-array of 'Array' containing bytes X thru Y inclusive, and '+' is
215  * used to indicate concatenation of arrays.
216  */
217 
218 /*
219  * A hash calculated over an IP version 4 header as follows:
220  *
221  * Buffer[0..8] = Packet[12..15] (source address) +
222  *                Packet[16..19] (destination address)
223  *
224  * Result = Hash(Buffer, 8)
225  */
226 #define _XEN_NETIF_CTRL_HASH_TYPE_IPV4 0
227 #define XEN_NETIF_CTRL_HASH_TYPE_IPV4 \
228     (1 << _XEN_NETIF_CTRL_HASH_TYPE_IPV4)
229 
230 /*
231  * A hash calculated over an IP version 4 header and TCP header as
232  * follows:
233  *
234  * Buffer[0..12] = Packet[12..15] (source address) +
235  *                 Packet[16..19] (destination address) +
236  *                 Packet[20..21] (source port) +
237  *                 Packet[22..23] (destination port)
238  *
239  * Result = Hash(Buffer, 12)
240  */
241 #define _XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP 1
242 #define XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP \
243     (1 << _XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP)
244 
245 /*
246  * A hash calculated over an IP version 6 header as follows:
247  *
248  * Buffer[0..32] = Packet[8..23]  (source address ) +
249  *                 Packet[24..39] (destination address)
250  *
251  * Result = Hash(Buffer, 32)
252  */
253 #define _XEN_NETIF_CTRL_HASH_TYPE_IPV6 2
254 #define XEN_NETIF_CTRL_HASH_TYPE_IPV6 \
255     (1 << _XEN_NETIF_CTRL_HASH_TYPE_IPV6)
256 
257 /*
258  * A hash calculated over an IP version 6 header and TCP header as
259  * follows:
260  *
261  * Buffer[0..36] = Packet[8..23]  (source address) +
262  *                 Packet[24..39] (destination address) +
263  *                 Packet[40..41] (source port) +
264  *                 Packet[42..43] (destination port)
265  *
266  * Result = Hash(Buffer, 36)
267  */
268 #define _XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP 3
269 #define XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP \
270     (1 << _XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP)
271 
272 /*
273  * Hash algorithms
274  * ===============
275  */
276 
277 #define XEN_NETIF_CTRL_HASH_ALGORITHM_NONE 0
278 
279 /*
280  * Toeplitz hash:
281  */
282 
283 #define XEN_NETIF_CTRL_HASH_ALGORITHM_TOEPLITZ 1
284 
285 /*
286  * This algorithm uses a 'key' as well as the data buffer itself.
287  * (Buffer[] and Key[] are treated as shift-registers where the MSB of
288  * Buffer/Key[0] is considered 'left-most' and the LSB of Buffer/Key[N-1]
289  * is the 'right-most').
290  *
291  * Value = 0
292  * For number of bits in Buffer[]
293  *    If (left-most bit of Buffer[] is 1)
294  *        Value ^= left-most 32 bits of Key[]
295  *    Key[] << 1
296  *    Buffer[] << 1
297  *
298  * The code below is provided for convenience where an operating system
299  * does not already provide an implementation.
300  */
301 #ifdef XEN_NETIF_DEFINE_TOEPLITZ
xen_netif_toeplitz_hash(const uint8_t * key,unsigned int keylen,const uint8_t * buf,unsigned int buflen)302 static uint32_t xen_netif_toeplitz_hash(const uint8_t *key,
303                                         unsigned int keylen,
304                                         const uint8_t *buf,
305                                         unsigned int buflen)
306 {
307     unsigned int keyi, bufi;
308     uint64_t prefix = 0;
309     uint64_t hash = 0;
310 
311     /* Pre-load prefix with the first 8 bytes of the key */
312     for (keyi = 0; keyi < 8; keyi++) {
313         prefix <<= 8;
314         prefix |= (keyi < keylen) ? key[keyi] : 0;
315     }
316 
317     for (bufi = 0; bufi < buflen; bufi++) {
318         uint8_t byte = buf[bufi];
319         unsigned int bit;
320 
321         for (bit = 0; bit < 8; bit++) {
322             if (byte & 0x80)
323                 hash ^= prefix;
324             prefix <<= 1;
325             byte <<=1;
326         }
327 
328         /*
329          * 'prefix' has now been left-shifted by 8, so
330          * OR in the next byte.
331          */
332         prefix |= (keyi < keylen) ? key[keyi] : 0;
333         keyi++;
334     }
335 
336     /* The valid part of the hash is in the upper 32 bits. */
337     return hash >> 32;
338 }
339 #endif /* XEN_NETIF_DEFINE_TOEPLITZ */
340 
341 /*
342  * Control requests (struct xen_netif_ctrl_request)
343  * ================================================
344  *
345  * All requests have the following format:
346  *
347  *    0     1     2     3     4     5     6     7  octet
348  * +-----+-----+-----+-----+-----+-----+-----+-----+
349  * |    id     |   type    |         data[0]       |
350  * +-----+-----+-----+-----+-----+-----+-----+-----+
351  * |         data[1]       |         data[2]       |
352  * +-----+-----+-----+-----+-----------------------+
353  *
354  * id: the request identifier, echoed in response.
355  * type: the type of request (see below)
356  * data[]: any data associated with the request (determined by type)
357  */
358 
359 struct xen_netif_ctrl_request {
360     uint16_t id;
361     uint16_t type;
362 
363 #define XEN_NETIF_CTRL_TYPE_INVALID               0
364 #define XEN_NETIF_CTRL_TYPE_GET_HASH_FLAGS        1
365 #define XEN_NETIF_CTRL_TYPE_SET_HASH_FLAGS        2
366 #define XEN_NETIF_CTRL_TYPE_SET_HASH_KEY          3
367 #define XEN_NETIF_CTRL_TYPE_GET_HASH_MAPPING_SIZE 4
368 #define XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE 5
369 #define XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING      6
370 #define XEN_NETIF_CTRL_TYPE_SET_HASH_ALGORITHM    7
371 #define XEN_NETIF_CTRL_TYPE_GET_GREF_MAPPING_SIZE 8
372 #define XEN_NETIF_CTRL_TYPE_ADD_GREF_MAPPING      9
373 #define XEN_NETIF_CTRL_TYPE_DEL_GREF_MAPPING     10
374 
375     uint32_t data[3];
376 };
377 
378 /*
379  * Control responses (struct xen_netif_ctrl_response)
380  * ==================================================
381  *
382  * All responses have the following format:
383  *
384  *    0     1     2     3     4     5     6     7  octet
385  * +-----+-----+-----+-----+-----+-----+-----+-----+
386  * |    id     |   type    |         status        |
387  * +-----+-----+-----+-----+-----+-----+-----+-----+
388  * |         data          |
389  * +-----+-----+-----+-----+
390  *
391  * id: the corresponding request identifier
392  * type: the type of the corresponding request
393  * status: the status of request processing
394  * data: any data associated with the response (determined by type and
395  *       status)
396  */
397 
398 struct xen_netif_ctrl_response {
399     uint16_t id;
400     uint16_t type;
401     uint32_t status;
402 
403 #define XEN_NETIF_CTRL_STATUS_SUCCESS           0
404 #define XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED     1
405 #define XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER 2
406 #define XEN_NETIF_CTRL_STATUS_BUFFER_OVERFLOW   3
407 
408     uint32_t data;
409 };
410 
411 /*
412  * Static Grants (struct xen_netif_gref)
413  * =====================================
414  *
415  * A frontend may provide a fixed set of grant references to be mapped on
416  * the backend. The message of type XEN_NETIF_CTRL_TYPE_ADD_GREF_MAPPING
417  * prior its usage in the command ring allows for creation of these mappings.
418  * The backend will maintain a fixed amount of these mappings.
419  *
420  * XEN_NETIF_CTRL_TYPE_GET_GREF_MAPPING_SIZE lets a frontend query how many
421  * of these mappings can be kept.
422  *
423  * Each entry in the XEN_NETIF_CTRL_TYPE_{ADD,DEL}_GREF_MAPPING input table has
424  * the following format:
425  *
426  *    0     1     2     3     4     5     6     7  octet
427  * +-----+-----+-----+-----+-----+-----+-----+-----+
428  * | grant ref             |  flags    |  status   |
429  * +-----+-----+-----+-----+-----+-----+-----+-----+
430  *
431  * grant ref: grant reference (IN)
432  * flags: flags describing the control operation (IN)
433  * status: XEN_NETIF_CTRL_STATUS_* (OUT)
434  *
435  * 'status' is an output parameter which does not require to be set to zero
436  * prior to its usage in the corresponding control messages.
437  */
438 
439 struct xen_netif_gref {
440        grant_ref_t ref;
441        uint16_t flags;
442 
443 #define _XEN_NETIF_CTRLF_GREF_readonly    0
444 #define XEN_NETIF_CTRLF_GREF_readonly    (1U<<_XEN_NETIF_CTRLF_GREF_readonly)
445 
446        uint16_t status;
447 };
448 
449 /*
450  * Control messages
451  * ================
452  *
453  * XEN_NETIF_CTRL_TYPE_SET_HASH_ALGORITHM
454  * --------------------------------------
455  *
456  * This is sent by the frontend to set the desired hash algorithm.
457  *
458  * Request:
459  *
460  *  type    = XEN_NETIF_CTRL_TYPE_SET_HASH_ALGORITHM
461  *  data[0] = a XEN_NETIF_CTRL_HASH_ALGORITHM_* value
462  *  data[1] = 0
463  *  data[2] = 0
464  *
465  * Response:
466  *
467  *  status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED     - Operation not
468  *                                                     supported
469  *           XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - The algorithm is not
470  *                                                     supported
471  *           XEN_NETIF_CTRL_STATUS_SUCCESS           - Operation successful
472  *
473  * NOTE: Setting data[0] to XEN_NETIF_CTRL_HASH_ALGORITHM_NONE disables
474  *       hashing and the backend is free to choose how it steers packets
475  *       to queues (which is the default behaviour).
476  *
477  * XEN_NETIF_CTRL_TYPE_GET_HASH_FLAGS
478  * ----------------------------------
479  *
480  * This is sent by the frontend to query the types of hash supported by
481  * the backend.
482  *
483  * Request:
484  *
485  *  type    = XEN_NETIF_CTRL_TYPE_GET_HASH_FLAGS
486  *  data[0] = 0
487  *  data[1] = 0
488  *  data[2] = 0
489  *
490  * Response:
491  *
492  *  status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not supported
493  *           XEN_NETIF_CTRL_STATUS_SUCCESS       - Operation successful
494  *  data   = supported hash types (if operation was successful)
495  *
496  * NOTE: A valid hash algorithm must be selected before this operation can
497  *       succeed.
498  *
499  * XEN_NETIF_CTRL_TYPE_SET_HASH_FLAGS
500  * ----------------------------------
501  *
502  * This is sent by the frontend to set the types of hash that the backend
503  * should calculate. (See above for hash type definitions).
504  * Note that the 'maximal' type of hash should always be chosen. For
505  * example, if the frontend sets both IPV4 and IPV4_TCP hash types then
506  * the latter hash type should be calculated for any TCP packet and the
507  * former only calculated for non-TCP packets.
508  *
509  * Request:
510  *
511  *  type    = XEN_NETIF_CTRL_TYPE_SET_HASH_FLAGS
512  *  data[0] = bitwise OR of XEN_NETIF_CTRL_HASH_TYPE_* values
513  *  data[1] = 0
514  *  data[2] = 0
515  *
516  * Response:
517  *
518  *  status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED     - Operation not
519  *                                                     supported
520  *           XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - One or more flag
521  *                                                     value is invalid or
522  *                                                     unsupported
523  *           XEN_NETIF_CTRL_STATUS_SUCCESS           - Operation successful
524  *  data   = 0
525  *
526  * NOTE: A valid hash algorithm must be selected before this operation can
527  *       succeed.
528  *       Also, setting data[0] to zero disables hashing and the backend
529  *       is free to choose how it steers packets to queues.
530  *
531  * XEN_NETIF_CTRL_TYPE_SET_HASH_KEY
532  * --------------------------------
533  *
534  * This is sent by the frontend to set the key of the hash if the algorithm
535  * requires it. (See hash algorithms above).
536  *
537  * Request:
538  *
539  *  type    = XEN_NETIF_CTRL_TYPE_SET_HASH_KEY
540  *  data[0] = grant reference of page containing the key (assumed to
541  *            start at beginning of grant)
542  *  data[1] = size of key in octets
543  *  data[2] = 0
544  *
545  * Response:
546  *
547  *  status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED     - Operation not
548  *                                                     supported
549  *           XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - Key size is invalid
550  *           XEN_NETIF_CTRL_STATUS_BUFFER_OVERFLOW   - Key size is larger
551  *                                                     than the backend
552  *                                                     supports
553  *           XEN_NETIF_CTRL_STATUS_SUCCESS           - Operation successful
554  *  data   = 0
555  *
556  * NOTE: Any key octets not specified are assumed to be zero (the key
557  *       is assumed to be empty by default) and specifying a new key
558  *       invalidates any previous key, hence specifying a key size of
559  *       zero will clear the key (which ensures that the calculated hash
560  *       will always be zero).
561  *       The maximum size of key is algorithm and backend specific, but
562  *       is also limited by the single grant reference.
563  *       The grant reference may be read-only and must remain valid until
564  *       the response has been processed.
565  *
566  * XEN_NETIF_CTRL_TYPE_GET_HASH_MAPPING_SIZE
567  * -----------------------------------------
568  *
569  * This is sent by the frontend to query the maximum size of mapping
570  * table supported by the backend. The size is specified in terms of
571  * table entries.
572  *
573  * Request:
574  *
575  *  type    = XEN_NETIF_CTRL_TYPE_GET_HASH_MAPPING_SIZE
576  *  data[0] = 0
577  *  data[1] = 0
578  *  data[2] = 0
579  *
580  * Response:
581  *
582  *  status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not supported
583  *           XEN_NETIF_CTRL_STATUS_SUCCESS       - Operation successful
584  *  data   = maximum number of entries allowed in the mapping table
585  *           (if operation was successful) or zero if a mapping table is
586  *           not supported (i.e. hash mapping is done only by modular
587  *           arithmetic).
588  *
589  * XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE
590  * -------------------------------------
591  *
592  * This is sent by the frontend to set the actual size of the mapping
593  * table to be used by the backend. The size is specified in terms of
594  * table entries.
595  * Any previous table is invalidated by this message and any new table
596  * is assumed to be zero filled.
597  *
598  * Request:
599  *
600  *  type    = XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE
601  *  data[0] = number of entries in mapping table
602  *  data[1] = 0
603  *  data[2] = 0
604  *
605  * Response:
606  *
607  *  status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED     - Operation not
608  *                                                     supported
609  *           XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - Table size is invalid
610  *           XEN_NETIF_CTRL_STATUS_SUCCESS           - Operation successful
611  *  data   = 0
612  *
613  * NOTE: Setting data[0] to 0 means that hash mapping should be done
614  *       using modular arithmetic.
615  *
616  * XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING
617  * ------------------------------------
618  *
619  * This is sent by the frontend to set the content of the table mapping
620  * hash value to queue number. The backend should calculate the hash from
621  * the packet header, use it as an index into the table (modulo the size
622  * of the table) and then steer the packet to the queue number found at
623  * that index.
624  *
625  * Request:
626  *
627  *  type    = XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING
628  *  data[0] = grant reference of page containing the mapping (sub-)table
629  *            (assumed to start at beginning of grant)
630  *  data[1] = size of (sub-)table in entries
631  *  data[2] = offset, in entries, of sub-table within overall table
632  *
633  * Response:
634  *
635  *  status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED     - Operation not
636  *                                                     supported
637  *           XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - Table size or content
638  *                                                     is invalid
639  *           XEN_NETIF_CTRL_STATUS_BUFFER_OVERFLOW   - Table size is larger
640  *                                                     than the backend
641  *                                                     supports
642  *           XEN_NETIF_CTRL_STATUS_SUCCESS           - Operation successful
643  *  data   = 0
644  *
645  * NOTE: The overall table has the following format:
646  *
647  *          0     1     2     3     4     5     6     7  octet
648  *       +-----+-----+-----+-----+-----+-----+-----+-----+
649  *       |       mapping[0]      |       mapping[1]      |
650  *       +-----+-----+-----+-----+-----+-----+-----+-----+
651  *       |                       .                       |
652  *       |                       .                       |
653  *       |                       .                       |
654  *       +-----+-----+-----+-----+-----+-----+-----+-----+
655  *       |      mapping[N-2]     |      mapping[N-1]     |
656  *       +-----+-----+-----+-----+-----+-----+-----+-----+
657  *
658  *       where N is specified by a XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE
659  *       message and each  mapping must specifies a queue between 0 and
660  *       "multi-queue-num-queues" (see above).
661  *       The backend may support a mapping table larger than can be
662  *       mapped by a single grant reference. Thus sub-tables within a
663  *       larger table can be individually set by sending multiple messages
664  *       with differing offset values. Specifying a new sub-table does not
665  *       invalidate any table data outside that range.
666  *       The grant reference may be read-only and must remain valid until
667  *       the response has been processed.
668  *
669  * XEN_NETIF_CTRL_TYPE_GET_GREF_MAPPING_SIZE
670  * -----------------------------------------
671  *
672  * This is sent by the frontend to fetch the number of grefs that can be kept
673  * mapped in the backend.
674  *
675  * Request:
676  *
677  *  type    = XEN_NETIF_CTRL_TYPE_GET_GREF_MAPPING_SIZE
678  *  data[0] = queue index (assumed 0 for single queue)
679  *  data[1] = 0
680  *  data[2] = 0
681  *
682  * Response:
683  *
684  *  status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED     - Operation not
685  *                                                     supported
686  *           XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - The queue index is
687  *                                                     out of range
688  *           XEN_NETIF_CTRL_STATUS_SUCCESS           - Operation successful
689  *  data   = maximum number of entries allowed in the gref mapping table
690  *           (if operation was successful) or zero if it is not supported.
691  *
692  * XEN_NETIF_CTRL_TYPE_ADD_GREF_MAPPING
693  * ------------------------------------
694  *
695  * This is sent by the frontend for backend to map a list of grant
696  * references.
697  *
698  * Request:
699  *
700  *  type    = XEN_NETIF_CTRL_TYPE_ADD_GREF_MAPPING
701  *  data[0] = queue index
702  *  data[1] = grant reference of page containing the mapping list
703  *            (r/w and assumed to start at beginning of page)
704  *  data[2] = size of list in entries
705  *
706  * Response:
707  *
708  *  status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED     - Operation not
709  *                                                     supported
710  *           XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - Operation failed
711  *           XEN_NETIF_CTRL_STATUS_SUCCESS           - Operation successful
712  *
713  * NOTE: Each entry in the input table has the format outlined
714  *       in struct xen_netif_gref.
715  *       Contrary to XEN_NETIF_CTRL_TYPE_DEL_GREF_MAPPING, the struct
716  *       xen_netif_gref 'status' field is not used and therefore the response
717  *       'status' determines the success of this operation. In case of
718  *       failure none of grants mappings get added in the backend.
719  *
720  * XEN_NETIF_CTRL_TYPE_DEL_GREF_MAPPING
721  * ------------------------------------
722  *
723  * This is sent by the frontend for backend to unmap a list of grant
724  * references.
725  *
726  * Request:
727  *
728  *  type    = XEN_NETIF_CTRL_TYPE_DEL_GREF_MAPPING
729  *  data[0] = queue index
730  *  data[1] = grant reference of page containing the mapping list
731  *            (r/w and assumed to start at beginning of page)
732  *  data[2] = size of list in entries
733  *
734  * Response:
735  *
736  *  status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED     - Operation not
737  *                                                     supported
738  *           XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - Operation failed
739  *           XEN_NETIF_CTRL_STATUS_SUCCESS           - Operation successful
740  *  data   = number of entries that were unmapped
741  *
742  * NOTE: Each entry in the input table has the format outlined in struct
743  *       xen_netif_gref.
744  *       The struct xen_netif_gref 'status' field determines if the entry
745  *       was successfully removed.
746  *       The entries used are only the ones representing grant references that
747  *       were previously the subject of a XEN_NETIF_CTRL_TYPE_ADD_GREF_MAPPING
748  *       operation. Any other entries will have their status set to
749  *       XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER upon completion.
750  */
751 
752 DEFINE_RING_TYPES(xen_netif_ctrl,
753                   struct xen_netif_ctrl_request,
754                   struct xen_netif_ctrl_response);
755 
756 /*
757  * Guest transmit
758  * ==============
759  *
760  * This is the 'wire' format for transmit (frontend -> backend) packets:
761  *
762  *  Fragment 1: netif_tx_request_t  - flags = NETTXF_*
763  *                                    size = total packet size
764  * [Extra 1: netif_extra_info_t]    - (only if fragment 1 flags include
765  *                                     NETTXF_extra_info)
766  *  ...
767  * [Extra N: netif_extra_info_t]    - (only if extra N-1 flags include
768  *                                     XEN_NETIF_EXTRA_MORE)
769  *  ...
770  *  Fragment N: netif_tx_request_t  - (only if fragment N-1 flags include
771  *                                     NETTXF_more_data - flags on preceding
772  *                                     extras are not relevant here)
773  *                                    flags = 0
774  *                                    size = fragment size
775  *
776  * NOTE:
777  *
778  * This format slightly is different from that used for receive
779  * (backend -> frontend) packets. Specifically, in a multi-fragment
780  * packet the actual size of fragment 1 can only be determined by
781  * subtracting the sizes of fragments 2..N from the total packet size.
782  *
783  * Ring slot size is 12 octets, however not all request/response
784  * structs use the full size.
785  *
786  * tx request data (netif_tx_request_t)
787  * ------------------------------------
788  *
789  *    0     1     2     3     4     5     6     7  octet
790  * +-----+-----+-----+-----+-----+-----+-----+-----+
791  * | grant ref             | offset    | flags     |
792  * +-----+-----+-----+-----+-----+-----+-----+-----+
793  * | id        | size      |
794  * +-----+-----+-----+-----+
795  *
796  * grant ref: Reference to buffer page.
797  * offset: Offset within buffer page.
798  * flags: NETTXF_*.
799  * id: request identifier, echoed in response.
800  * size: packet size in bytes.
801  *
802  * tx response (netif_tx_response_t)
803  * ---------------------------------
804  *
805  *    0     1     2     3     4     5     6     7  octet
806  * +-----+-----+-----+-----+-----+-----+-----+-----+
807  * | id        | status    | unused                |
808  * +-----+-----+-----+-----+-----+-----+-----+-----+
809  * | unused                |
810  * +-----+-----+-----+-----+
811  *
812  * id: reflects id in transmit request
813  * status: NETIF_RSP_*
814  *
815  * Guest receive
816  * =============
817  *
818  * This is the 'wire' format for receive (backend -> frontend) packets:
819  *
820  *  Fragment 1: netif_rx_request_t  - flags = NETRXF_*
821  *                                    size = fragment size
822  * [Extra 1: netif_extra_info_t]    - (only if fragment 1 flags include
823  *                                     NETRXF_extra_info)
824  *  ...
825  * [Extra N: netif_extra_info_t]    - (only if extra N-1 flags include
826  *                                     XEN_NETIF_EXTRA_MORE)
827  *  ...
828  *  Fragment N: netif_rx_request_t  - (only if fragment N-1 flags include
829  *                                     NETRXF_more_data - flags on preceding
830  *                                     extras are not relevant here)
831  *                                    flags = 0
832  *                                    size = fragment size
833  *
834  * NOTE:
835  *
836  * This format slightly is different from that used for transmit
837  * (frontend -> backend) packets. Specifically, in a multi-fragment
838  * packet the size of the packet can only be determined by summing the
839  * sizes of fragments 1..N.
840  *
841  * Ring slot size is 8 octets.
842  *
843  * rx request (netif_rx_request_t)
844  * -------------------------------
845  *
846  *    0     1     2     3     4     5     6     7  octet
847  * +-----+-----+-----+-----+-----+-----+-----+-----+
848  * | id        | pad       | gref                  |
849  * +-----+-----+-----+-----+-----+-----+-----+-----+
850  *
851  * id: request identifier, echoed in response.
852  * gref: reference to incoming granted frame.
853  *
854  * rx response (netif_rx_response_t)
855  * ---------------------------------
856  *
857  *    0     1     2     3     4     5     6     7  octet
858  * +-----+-----+-----+-----+-----+-----+-----+-----+
859  * | id        | offset    | flags     | status    |
860  * +-----+-----+-----+-----+-----+-----+-----+-----+
861  *
862  * id: reflects id in receive request
863  * offset: offset in page of start of received packet
864  * flags: NETRXF_*
865  * status: -ve: NETIF_RSP_*; +ve: Rx'ed pkt size.
866  *
867  * NOTE: Historically, to support GSO on the frontend receive side, Linux
868  *       netfront does not make use of the rx response id (because, as
869  *       described below, extra info structures overlay the id field).
870  *       Instead it assumes that responses always appear in the same ring
871  *       slot as their corresponding request. Thus, to maintain
872  *       compatibility, backends must make sure this is the case.
873  *
874  * Extra Info
875  * ==========
876  *
877  * Can be present if initial request or response has NET{T,R}XF_extra_info,
878  * or previous extra request has XEN_NETIF_EXTRA_MORE.
879  *
880  * The struct therefore needs to fit into either a tx or rx slot and
881  * is therefore limited to 8 octets.
882  *
883  * NOTE: Because extra info data overlays the usual request/response
884  *       structures, there is no id information in the opposite direction.
885  *       So, if an extra info overlays an rx response the frontend can
886  *       assume that it is in the same ring slot as the request that was
887  *       consumed to make the slot available, and the backend must ensure
888  *       this assumption is true.
889  *
890  * extra info (netif_extra_info_t)
891  * -------------------------------
892  *
893  * General format:
894  *
895  *    0     1     2     3     4     5     6     7  octet
896  * +-----+-----+-----+-----+-----+-----+-----+-----+
897  * |type |flags| type specific data                |
898  * +-----+-----+-----+-----+-----+-----+-----+-----+
899  * | padding for tx        |
900  * +-----+-----+-----+-----+
901  *
902  * type: XEN_NETIF_EXTRA_TYPE_*
903  * flags: XEN_NETIF_EXTRA_FLAG_*
904  * padding for tx: present only in the tx case due to 8 octet limit
905  *                 from rx case. Not shown in type specific entries
906  *                 below.
907  *
908  * XEN_NETIF_EXTRA_TYPE_GSO:
909  *
910  *    0     1     2     3     4     5     6     7  octet
911  * +-----+-----+-----+-----+-----+-----+-----+-----+
912  * |type |flags| size      |type | pad | features  |
913  * +-----+-----+-----+-----+-----+-----+-----+-----+
914  *
915  * type: Must be XEN_NETIF_EXTRA_TYPE_GSO
916  * flags: XEN_NETIF_EXTRA_FLAG_*
917  * size: Maximum payload size of each segment. For example,
918  *       for TCP this is just the path MSS.
919  * type: XEN_NETIF_GSO_TYPE_*: This determines the protocol of
920  *       the packet and any extra features required to segment the
921  *       packet properly.
922  * features: EN_NETIF_GSO_FEAT_*: This specifies any extra GSO
923  *           features required to process this packet, such as ECN
924  *           support for TCPv4.
925  *
926  * XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL}:
927  *
928  *    0     1     2     3     4     5     6     7  octet
929  * +-----+-----+-----+-----+-----+-----+-----+-----+
930  * |type |flags| addr                              |
931  * +-----+-----+-----+-----+-----+-----+-----+-----+
932  *
933  * type: Must be XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL}
934  * flags: XEN_NETIF_EXTRA_FLAG_*
935  * addr: address to add/remove
936  *
937  * XEN_NETIF_EXTRA_TYPE_HASH:
938  *
939  * A backend that supports teoplitz hashing is assumed to accept
940  * this type of extra info in transmit packets.
941  * A frontend that enables hashing is assumed to accept
942  * this type of extra info in receive packets.
943  *
944  *    0     1     2     3     4     5     6     7  octet
945  * +-----+-----+-----+-----+-----+-----+-----+-----+
946  * |type |flags|htype| alg |LSB ---- value ---- MSB|
947  * +-----+-----+-----+-----+-----+-----+-----+-----+
948  *
949  * type: Must be XEN_NETIF_EXTRA_TYPE_HASH
950  * flags: XEN_NETIF_EXTRA_FLAG_*
951  * htype: Hash type (one of _XEN_NETIF_CTRL_HASH_TYPE_* - see above)
952  * alg: The algorithm used to calculate the hash (one of
953  *      XEN_NETIF_CTRL_HASH_TYPE_ALGORITHM_* - see above)
954  * value: Hash value
955  */
956 
957 /* Protocol checksum field is blank in the packet (hardware offload)? */
958 #define _NETTXF_csum_blank     (0)
959 #define  NETTXF_csum_blank     (1U<<_NETTXF_csum_blank)
960 
961 /* Packet data has been validated against protocol checksum. */
962 #define _NETTXF_data_validated (1)
963 #define  NETTXF_data_validated (1U<<_NETTXF_data_validated)
964 
965 /* Packet continues in the next request descriptor. */
966 #define _NETTXF_more_data      (2)
967 #define  NETTXF_more_data      (1U<<_NETTXF_more_data)
968 
969 /* Packet to be followed by extra descriptor(s). */
970 #define _NETTXF_extra_info     (3)
971 #define  NETTXF_extra_info     (1U<<_NETTXF_extra_info)
972 
973 #define XEN_NETIF_MAX_TX_SIZE 0xFFFF
974 struct netif_tx_request {
975     grant_ref_t gref;
976     uint16_t offset;
977     uint16_t flags;
978     uint16_t id;
979     uint16_t size;
980 };
981 typedef struct netif_tx_request netif_tx_request_t;
982 
983 /* Types of netif_extra_info descriptors. */
984 #define XEN_NETIF_EXTRA_TYPE_NONE      (0)  /* Never used - invalid */
985 #define XEN_NETIF_EXTRA_TYPE_GSO       (1)  /* u.gso */
986 #define XEN_NETIF_EXTRA_TYPE_MCAST_ADD (2)  /* u.mcast */
987 #define XEN_NETIF_EXTRA_TYPE_MCAST_DEL (3)  /* u.mcast */
988 #define XEN_NETIF_EXTRA_TYPE_HASH      (4)  /* u.hash */
989 #define XEN_NETIF_EXTRA_TYPE_MAX       (5)
990 
991 /* netif_extra_info_t flags. */
992 #define _XEN_NETIF_EXTRA_FLAG_MORE (0)
993 #define XEN_NETIF_EXTRA_FLAG_MORE  (1U<<_XEN_NETIF_EXTRA_FLAG_MORE)
994 
995 /* GSO types */
996 #define XEN_NETIF_GSO_TYPE_NONE         (0)
997 #define XEN_NETIF_GSO_TYPE_TCPV4        (1)
998 #define XEN_NETIF_GSO_TYPE_TCPV6        (2)
999 
1000 /*
1001  * This structure needs to fit within both netif_tx_request_t and
1002  * netif_rx_response_t for compatibility.
1003  */
1004 struct netif_extra_info {
1005     uint8_t type;
1006     uint8_t flags;
1007     union {
1008         struct {
1009             uint16_t size;
1010             uint8_t type;
1011             uint8_t pad;
1012             uint16_t features;
1013         } gso;
1014         struct {
1015             uint8_t addr[6];
1016         } mcast;
1017         struct {
1018             uint8_t type;
1019             uint8_t algorithm;
1020             uint8_t value[4];
1021         } hash;
1022         uint16_t pad[3];
1023     } u;
1024 };
1025 typedef struct netif_extra_info netif_extra_info_t;
1026 
1027 struct netif_tx_response {
1028     uint16_t id;
1029     int16_t  status;
1030 };
1031 typedef struct netif_tx_response netif_tx_response_t;
1032 
1033 struct netif_rx_request {
1034     uint16_t    id;        /* Echoed in response message.        */
1035     uint16_t    pad;
1036     grant_ref_t gref;
1037 };
1038 typedef struct netif_rx_request netif_rx_request_t;
1039 
1040 /* Packet data has been validated against protocol checksum. */
1041 #define _NETRXF_data_validated (0)
1042 #define  NETRXF_data_validated (1U<<_NETRXF_data_validated)
1043 
1044 /* Protocol checksum field is blank in the packet (hardware offload)? */
1045 #define _NETRXF_csum_blank     (1)
1046 #define  NETRXF_csum_blank     (1U<<_NETRXF_csum_blank)
1047 
1048 /* Packet continues in the next request descriptor. */
1049 #define _NETRXF_more_data      (2)
1050 #define  NETRXF_more_data      (1U<<_NETRXF_more_data)
1051 
1052 /* Packet to be followed by extra descriptor(s). */
1053 #define _NETRXF_extra_info     (3)
1054 #define  NETRXF_extra_info     (1U<<_NETRXF_extra_info)
1055 
1056 /* Packet has GSO prefix. Deprecated but included for compatibility */
1057 #define _NETRXF_gso_prefix     (4)
1058 #define  NETRXF_gso_prefix     (1U<<_NETRXF_gso_prefix)
1059 
1060 struct netif_rx_response {
1061     uint16_t id;
1062     uint16_t offset;
1063     uint16_t flags;
1064     int16_t  status;
1065 };
1066 typedef struct netif_rx_response netif_rx_response_t;
1067 
1068 /*
1069  * Generate netif ring structures and types.
1070  */
1071 
1072 DEFINE_RING_TYPES(netif_tx, struct netif_tx_request, struct netif_tx_response);
1073 DEFINE_RING_TYPES(netif_rx, struct netif_rx_request, struct netif_rx_response);
1074 
1075 #define NETIF_RSP_DROPPED         -2
1076 #define NETIF_RSP_ERROR           -1
1077 #define NETIF_RSP_OKAY             0
1078 /* No response: used for auxiliary requests (e.g., netif_extra_info_t). */
1079 #define NETIF_RSP_NULL             1
1080 
1081 #endif
1082 
1083 /*
1084  * Local variables:
1085  * mode: C
1086  * c-file-style: "BSD"
1087  * c-basic-offset: 4
1088  * tab-width: 4
1089  * indent-tabs-mode: nil
1090  * End:
1091  */
1092