1 /* SPDX-License-Identifier: MIT */ 2 /****************************************************************************** 3 * xen_netif.h 4 * 5 * Unified network-device I/O interface for Xen guest OSes. 6 * 7 * Copyright (c) 2003-2004, Keir Fraser 8 */ 9 10 #ifndef __XEN_PUBLIC_IO_XEN_NETIF_H__ 11 #define __XEN_PUBLIC_IO_XEN_NETIF_H__ 12 13 #include "ring.h" 14 #include "../grant_table.h" 15 16 /* 17 * Older implementation of Xen network frontend / backend has an 18 * implicit dependency on the MAX_SKB_FRAGS as the maximum number of 19 * ring slots a skb can use. Netfront / netback may not work as 20 * expected when frontend and backend have different MAX_SKB_FRAGS. 21 * 22 * A better approach is to add mechanism for netfront / netback to 23 * negotiate this value. However we cannot fix all possible 24 * frontends, so we need to define a value which states the minimum 25 * slots backend must support. 26 * 27 * The minimum value derives from older Linux kernel's MAX_SKB_FRAGS 28 * (18), which is proved to work with most frontends. Any new backend 29 * which doesn't negotiate with frontend should expect frontend to 30 * send a valid packet using slots up to this value. 31 */ 32 #define XEN_NETIF_NR_SLOTS_MIN 18 33 34 /* 35 * Notifications after enqueuing any type of message should be conditional on 36 * the appropriate req_event or rsp_event field in the shared ring. 37 * If the client sends notification for rx requests then it should specify 38 * feature 'feature-rx-notify' via xenbus. Otherwise the backend will assume 39 * that it cannot safely queue packets (as it may not be kicked to send them). 40 */ 41 42 /* 43 * "feature-split-event-channels" is introduced to separate guest TX 44 * and RX notification. Backend either doesn't support this feature or 45 * advertises it via xenstore as 0 (disabled) or 1 (enabled). 46 * 47 * To make use of this feature, frontend should allocate two event 48 * channels for TX and RX, advertise them to backend as 49 * "event-channel-tx" and "event-channel-rx" respectively. If frontend 50 * doesn't want to use this feature, it just writes "event-channel" 51 * node as before. 52 */ 53 54 /* 55 * Multiple transmit and receive queues: 56 * If supported, the backend will write the key "multi-queue-max-queues" to 57 * the directory for that vif, and set its value to the maximum supported 58 * number of queues. 59 * Frontends that are aware of this feature and wish to use it can write the 60 * key "multi-queue-num-queues", set to the number they wish to use, which 61 * must be greater than zero, and no more than the value reported by the backend 62 * in "multi-queue-max-queues". 63 * 64 * Queues replicate the shared rings and event channels. 65 * "feature-split-event-channels" may optionally be used when using 66 * multiple queues, but is not mandatory. 67 * 68 * Each queue consists of one shared ring pair, i.e. there must be the same 69 * number of tx and rx rings. 70 * 71 * For frontends requesting just one queue, the usual event-channel and 72 * ring-ref keys are written as before, simplifying the backend processing 73 * to avoid distinguishing between a frontend that doesn't understand the 74 * multi-queue feature, and one that does, but requested only one queue. 75 * 76 * Frontends requesting two or more queues must not write the toplevel 77 * event-channel (or event-channel-{tx,rx}) and {tx,rx}-ring-ref keys, 78 * instead writing those keys under sub-keys having the name "queue-N" where 79 * N is the integer ID of the queue for which those keys belong. Queues 80 * are indexed from zero. For example, a frontend with two queues and split 81 * event channels must write the following set of queue-related keys: 82 * 83 * /local/domain/1/device/vif/0/multi-queue-num-queues = "2" 84 * /local/domain/1/device/vif/0/queue-0 = "" 85 * /local/domain/1/device/vif/0/queue-0/tx-ring-ref = "<ring-ref-tx0>" 86 * /local/domain/1/device/vif/0/queue-0/rx-ring-ref = "<ring-ref-rx0>" 87 * /local/domain/1/device/vif/0/queue-0/event-channel-tx = "<evtchn-tx0>" 88 * /local/domain/1/device/vif/0/queue-0/event-channel-rx = "<evtchn-rx0>" 89 * /local/domain/1/device/vif/0/queue-1 = "" 90 * /local/domain/1/device/vif/0/queue-1/tx-ring-ref = "<ring-ref-tx1>" 91 * /local/domain/1/device/vif/0/queue-1/rx-ring-ref = "<ring-ref-rx1" 92 * /local/domain/1/device/vif/0/queue-1/event-channel-tx = "<evtchn-tx1>" 93 * /local/domain/1/device/vif/0/queue-1/event-channel-rx = "<evtchn-rx1>" 94 * 95 * If there is any inconsistency in the XenStore data, the backend may 96 * choose not to connect any queues, instead treating the request as an 97 * error. This includes scenarios where more (or fewer) queues were 98 * requested than the frontend provided details for. 99 * 100 * Mapping of packets to queues is considered to be a function of the 101 * transmitting system (backend or frontend) and is not negotiated 102 * between the two. Guests are free to transmit packets on any queue 103 * they choose, provided it has been set up correctly. Guests must be 104 * prepared to receive packets on any queue they have requested be set up. 105 */ 106 107 /* 108 * "feature-no-csum-offload" should be used to turn IPv4 TCP/UDP checksum 109 * offload off or on. If it is missing then the feature is assumed to be on. 110 * "feature-ipv6-csum-offload" should be used to turn IPv6 TCP/UDP checksum 111 * offload on or off. If it is missing then the feature is assumed to be off. 112 */ 113 114 /* 115 * "feature-gso-tcpv4" and "feature-gso-tcpv6" advertise the capability to 116 * handle large TCP packets (in IPv4 or IPv6 form respectively). Neither 117 * frontends nor backends are assumed to be capable unless the flags are 118 * present. 119 */ 120 121 /* 122 * "feature-multicast-control" and "feature-dynamic-multicast-control" 123 * advertise the capability to filter ethernet multicast packets in the 124 * backend. If the frontend wishes to take advantage of this feature then 125 * it may set "request-multicast-control". If the backend only advertises 126 * "feature-multicast-control" then "request-multicast-control" must be set 127 * before the frontend moves into the connected state. The backend will 128 * sample the value on this state transition and any subsequent change in 129 * value will have no effect. However, if the backend also advertises 130 * "feature-dynamic-multicast-control" then "request-multicast-control" 131 * may be set by the frontend at any time. In this case, the backend will 132 * watch the value and re-sample on watch events. 133 * 134 * If the sampled value of "request-multicast-control" is set then the 135 * backend transmit side should no longer flood multicast packets to the 136 * frontend, it should instead drop any multicast packet that does not 137 * match in a filter list. 138 * The list is amended by the frontend by sending dummy transmit requests 139 * containing XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL} extra-info fragments as 140 * specified below. 141 * Note that the filter list may be amended even if the sampled value of 142 * "request-multicast-control" is not set, however the filter should only 143 * be applied if it is set. 144 */ 145 146 /* 147 * "xdp-headroom" is used to request that extra space is added 148 * for XDP processing. The value is measured in bytes and passed by 149 * the frontend to be consistent between both ends. 150 * If the value is greater than zero that means that 151 * an RX response is going to be passed to an XDP program for processing. 152 * XEN_NETIF_MAX_XDP_HEADROOM defines the maximum headroom offset in bytes 153 * 154 * "feature-xdp-headroom" is set to "1" by the netback side like other features 155 * so a guest can check if an XDP program can be processed. 156 */ 157 #define XEN_NETIF_MAX_XDP_HEADROOM 0x7FFF 158 159 /* 160 * Control ring 161 * ============ 162 * 163 * Some features, such as hashing (detailed below), require a 164 * significant amount of out-of-band data to be passed from frontend to 165 * backend. Use of xenstore is not suitable for large quantities of data 166 * because of quota limitations and so a dedicated 'control ring' is used. 167 * The ability of the backend to use a control ring is advertised by 168 * setting: 169 * 170 * /local/domain/X/backend/<domid>/<vif>/feature-ctrl-ring = "1" 171 * 172 * The frontend provides a control ring to the backend by setting: 173 * 174 * /local/domain/<domid>/device/vif/<vif>/ctrl-ring-ref = <gref> 175 * /local/domain/<domid>/device/vif/<vif>/event-channel-ctrl = <port> 176 * 177 * where <gref> is the grant reference of the shared page used to 178 * implement the control ring and <port> is an event channel to be used 179 * as a mailbox interrupt. These keys must be set before the frontend 180 * moves into the connected state. 181 * 182 * The control ring uses a fixed request/response message size and is 183 * balanced (i.e. one request to one response), so operationally it is much 184 * the same as a transmit or receive ring. 185 * Note that there is no requirement that responses are issued in the same 186 * order as requests. 187 */ 188 189 /* 190 * Hash types 191 * ========== 192 * 193 * For the purposes of the definitions below, 'Packet[]' is an array of 194 * octets containing an IP packet without options, 'Array[X..Y]' means a 195 * sub-array of 'Array' containing bytes X thru Y inclusive, and '+' is 196 * used to indicate concatenation of arrays. 197 */ 198 199 /* 200 * A hash calculated over an IP version 4 header as follows: 201 * 202 * Buffer[0..8] = Packet[12..15] (source address) + 203 * Packet[16..19] (destination address) 204 * 205 * Result = Hash(Buffer, 8) 206 */ 207 #define _XEN_NETIF_CTRL_HASH_TYPE_IPV4 0 208 #define XEN_NETIF_CTRL_HASH_TYPE_IPV4 \ 209 (1 << _XEN_NETIF_CTRL_HASH_TYPE_IPV4) 210 211 /* 212 * A hash calculated over an IP version 4 header and TCP header as 213 * follows: 214 * 215 * Buffer[0..12] = Packet[12..15] (source address) + 216 * Packet[16..19] (destination address) + 217 * Packet[20..21] (source port) + 218 * Packet[22..23] (destination port) 219 * 220 * Result = Hash(Buffer, 12) 221 */ 222 #define _XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP 1 223 #define XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP \ 224 (1 << _XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP) 225 226 /* 227 * A hash calculated over an IP version 6 header as follows: 228 * 229 * Buffer[0..32] = Packet[8..23] (source address ) + 230 * Packet[24..39] (destination address) 231 * 232 * Result = Hash(Buffer, 32) 233 */ 234 #define _XEN_NETIF_CTRL_HASH_TYPE_IPV6 2 235 #define XEN_NETIF_CTRL_HASH_TYPE_IPV6 \ 236 (1 << _XEN_NETIF_CTRL_HASH_TYPE_IPV6) 237 238 /* 239 * A hash calculated over an IP version 6 header and TCP header as 240 * follows: 241 * 242 * Buffer[0..36] = Packet[8..23] (source address) + 243 * Packet[24..39] (destination address) + 244 * Packet[40..41] (source port) + 245 * Packet[42..43] (destination port) 246 * 247 * Result = Hash(Buffer, 36) 248 */ 249 #define _XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP 3 250 #define XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP \ 251 (1 << _XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP) 252 253 /* 254 * Hash algorithms 255 * =============== 256 */ 257 258 #define XEN_NETIF_CTRL_HASH_ALGORITHM_NONE 0 259 260 /* 261 * Toeplitz hash: 262 */ 263 264 #define XEN_NETIF_CTRL_HASH_ALGORITHM_TOEPLITZ 1 265 266 /* 267 * This algorithm uses a 'key' as well as the data buffer itself. 268 * (Buffer[] and Key[] are treated as shift-registers where the MSB of 269 * Buffer/Key[0] is considered 'left-most' and the LSB of Buffer/Key[N-1] 270 * is the 'right-most'). 271 * 272 * Value = 0 273 * For number of bits in Buffer[] 274 * If (left-most bit of Buffer[] is 1) 275 * Value ^= left-most 32 bits of Key[] 276 * Key[] << 1 277 * Buffer[] << 1 278 * 279 * The code below is provided for convenience where an operating system 280 * does not already provide an implementation. 281 */ 282 #ifdef XEN_NETIF_DEFINE_TOEPLITZ 283 static uint32_t xen_netif_toeplitz_hash(const uint8_t *key, 284 unsigned int keylen, 285 const uint8_t *buf, unsigned int buflen) 286 { 287 unsigned int keyi, bufi; 288 uint64_t prefix = 0; 289 uint64_t hash = 0; 290 291 /* Pre-load prefix with the first 8 bytes of the key */ 292 for (keyi = 0; keyi < 8; keyi++) { 293 prefix <<= 8; 294 prefix |= (keyi < keylen) ? key[keyi] : 0; 295 } 296 297 for (bufi = 0; bufi < buflen; bufi++) { 298 uint8_t byte = buf[bufi]; 299 unsigned int bit; 300 301 for (bit = 0; bit < 8; bit++) { 302 if (byte & 0x80) 303 hash ^= prefix; 304 prefix <<= 1; 305 byte <<= 1; 306 } 307 308 /* 309 * 'prefix' has now been left-shifted by 8, so 310 * OR in the next byte. 311 */ 312 prefix |= (keyi < keylen) ? key[keyi] : 0; 313 keyi++; 314 } 315 316 /* The valid part of the hash is in the upper 32 bits. */ 317 return hash >> 32; 318 } 319 #endif /* XEN_NETIF_DEFINE_TOEPLITZ */ 320 321 /* 322 * Control requests (struct xen_netif_ctrl_request) 323 * ================================================ 324 * 325 * All requests have the following format: 326 * 327 * 0 1 2 3 4 5 6 7 octet 328 * +-----+-----+-----+-----+-----+-----+-----+-----+ 329 * | id | type | data[0] | 330 * +-----+-----+-----+-----+-----+-----+-----+-----+ 331 * | data[1] | data[2] | 332 * +-----+-----+-----+-----+-----------------------+ 333 * 334 * id: the request identifier, echoed in response. 335 * type: the type of request (see below) 336 * data[]: any data associated with the request (determined by type) 337 */ 338 339 struct xen_netif_ctrl_request { 340 uint16_t id; 341 uint16_t type; 342 343 #define XEN_NETIF_CTRL_TYPE_INVALID 0 344 #define XEN_NETIF_CTRL_TYPE_GET_HASH_FLAGS 1 345 #define XEN_NETIF_CTRL_TYPE_SET_HASH_FLAGS 2 346 #define XEN_NETIF_CTRL_TYPE_SET_HASH_KEY 3 347 #define XEN_NETIF_CTRL_TYPE_GET_HASH_MAPPING_SIZE 4 348 #define XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE 5 349 #define XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING 6 350 #define XEN_NETIF_CTRL_TYPE_SET_HASH_ALGORITHM 7 351 352 uint32_t data[3]; 353 }; 354 355 /* 356 * Control responses (struct xen_netif_ctrl_response) 357 * ================================================== 358 * 359 * All responses have the following format: 360 * 361 * 0 1 2 3 4 5 6 7 octet 362 * +-----+-----+-----+-----+-----+-----+-----+-----+ 363 * | id | type | status | 364 * +-----+-----+-----+-----+-----+-----+-----+-----+ 365 * | data | 366 * +-----+-----+-----+-----+ 367 * 368 * id: the corresponding request identifier 369 * type: the type of the corresponding request 370 * status: the status of request processing 371 * data: any data associated with the response (determined by type and 372 * status) 373 */ 374 375 struct xen_netif_ctrl_response { 376 uint16_t id; 377 uint16_t type; 378 uint32_t status; 379 380 #define XEN_NETIF_CTRL_STATUS_SUCCESS 0 381 #define XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED 1 382 #define XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER 2 383 #define XEN_NETIF_CTRL_STATUS_BUFFER_OVERFLOW 3 384 385 uint32_t data; 386 }; 387 388 /* 389 * Control messages 390 * ================ 391 * 392 * XEN_NETIF_CTRL_TYPE_SET_HASH_ALGORITHM 393 * -------------------------------------- 394 * 395 * This is sent by the frontend to set the desired hash algorithm. 396 * 397 * Request: 398 * 399 * type = XEN_NETIF_CTRL_TYPE_SET_HASH_ALGORITHM 400 * data[0] = a XEN_NETIF_CTRL_HASH_ALGORITHM_* value 401 * data[1] = 0 402 * data[2] = 0 403 * 404 * Response: 405 * 406 * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not 407 * supported 408 * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - The algorithm is not 409 * supported 410 * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful 411 * 412 * NOTE: Setting data[0] to XEN_NETIF_CTRL_HASH_ALGORITHM_NONE disables 413 * hashing and the backend is free to choose how it steers packets 414 * to queues (which is the default behaviour). 415 * 416 * XEN_NETIF_CTRL_TYPE_GET_HASH_FLAGS 417 * ---------------------------------- 418 * 419 * This is sent by the frontend to query the types of hash supported by 420 * the backend. 421 * 422 * Request: 423 * 424 * type = XEN_NETIF_CTRL_TYPE_GET_HASH_FLAGS 425 * data[0] = 0 426 * data[1] = 0 427 * data[2] = 0 428 * 429 * Response: 430 * 431 * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not supported 432 * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful 433 * data = supported hash types (if operation was successful) 434 * 435 * NOTE: A valid hash algorithm must be selected before this operation can 436 * succeed. 437 * 438 * XEN_NETIF_CTRL_TYPE_SET_HASH_FLAGS 439 * ---------------------------------- 440 * 441 * This is sent by the frontend to set the types of hash that the backend 442 * should calculate. (See above for hash type definitions). 443 * Note that the 'maximal' type of hash should always be chosen. For 444 * example, if the frontend sets both IPV4 and IPV4_TCP hash types then 445 * the latter hash type should be calculated for any TCP packet and the 446 * former only calculated for non-TCP packets. 447 * 448 * Request: 449 * 450 * type = XEN_NETIF_CTRL_TYPE_SET_HASH_FLAGS 451 * data[0] = bitwise OR of XEN_NETIF_CTRL_HASH_TYPE_* values 452 * data[1] = 0 453 * data[2] = 0 454 * 455 * Response: 456 * 457 * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not 458 * supported 459 * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - One or more flag 460 * value is invalid or 461 * unsupported 462 * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful 463 * data = 0 464 * 465 * NOTE: A valid hash algorithm must be selected before this operation can 466 * succeed. 467 * Also, setting data[0] to zero disables hashing and the backend 468 * is free to choose how it steers packets to queues. 469 * 470 * XEN_NETIF_CTRL_TYPE_SET_HASH_KEY 471 * -------------------------------- 472 * 473 * This is sent by the frontend to set the key of the hash if the algorithm 474 * requires it. (See hash algorithms above). 475 * 476 * Request: 477 * 478 * type = XEN_NETIF_CTRL_TYPE_SET_HASH_KEY 479 * data[0] = grant reference of page containing the key (assumed to 480 * start at beginning of grant) 481 * data[1] = size of key in octets 482 * data[2] = 0 483 * 484 * Response: 485 * 486 * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not 487 * supported 488 * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - Key size is invalid 489 * XEN_NETIF_CTRL_STATUS_BUFFER_OVERFLOW - Key size is larger 490 * than the backend 491 * supports 492 * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful 493 * data = 0 494 * 495 * NOTE: Any key octets not specified are assumed to be zero (the key 496 * is assumed to be empty by default) and specifying a new key 497 * invalidates any previous key, hence specifying a key size of 498 * zero will clear the key (which ensures that the calculated hash 499 * will always be zero). 500 * The maximum size of key is algorithm and backend specific, but 501 * is also limited by the single grant reference. 502 * The grant reference may be read-only and must remain valid until 503 * the response has been processed. 504 * 505 * XEN_NETIF_CTRL_TYPE_GET_HASH_MAPPING_SIZE 506 * ----------------------------------------- 507 * 508 * This is sent by the frontend to query the maximum size of mapping 509 * table supported by the backend. The size is specified in terms of 510 * table entries. 511 * 512 * Request: 513 * 514 * type = XEN_NETIF_CTRL_TYPE_GET_HASH_MAPPING_SIZE 515 * data[0] = 0 516 * data[1] = 0 517 * data[2] = 0 518 * 519 * Response: 520 * 521 * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not supported 522 * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful 523 * data = maximum number of entries allowed in the mapping table 524 * (if operation was successful) or zero if a mapping table is 525 * not supported (i.e. hash mapping is done only by modular 526 * arithmetic). 527 * 528 * XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE 529 * ------------------------------------- 530 * 531 * This is sent by the frontend to set the actual size of the mapping 532 * table to be used by the backend. The size is specified in terms of 533 * table entries. 534 * Any previous table is invalidated by this message and any new table 535 * is assumed to be zero filled. 536 * 537 * Request: 538 * 539 * type = XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE 540 * data[0] = number of entries in mapping table 541 * data[1] = 0 542 * data[2] = 0 543 * 544 * Response: 545 * 546 * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not 547 * supported 548 * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - Table size is invalid 549 * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful 550 * data = 0 551 * 552 * NOTE: Setting data[0] to 0 means that hash mapping should be done 553 * using modular arithmetic. 554 * 555 * XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING 556 * ------------------------------------ 557 * 558 * This is sent by the frontend to set the content of the table mapping 559 * hash value to queue number. The backend should calculate the hash from 560 * the packet header, use it as an index into the table (modulo the size 561 * of the table) and then steer the packet to the queue number found at 562 * that index. 563 * 564 * Request: 565 * 566 * type = XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING 567 * data[0] = grant reference of page containing the mapping (sub-)table 568 * (assumed to start at beginning of grant) 569 * data[1] = size of (sub-)table in entries 570 * data[2] = offset, in entries, of sub-table within overall table 571 * 572 * Response: 573 * 574 * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not 575 * supported 576 * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - Table size or content 577 * is invalid 578 * XEN_NETIF_CTRL_STATUS_BUFFER_OVERFLOW - Table size is larger 579 * than the backend 580 * supports 581 * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful 582 * data = 0 583 * 584 * NOTE: The overall table has the following format: 585 * 586 * 0 1 2 3 4 5 6 7 octet 587 * +-----+-----+-----+-----+-----+-----+-----+-----+ 588 * | mapping[0] | mapping[1] | 589 * +-----+-----+-----+-----+-----+-----+-----+-----+ 590 * | . | 591 * | . | 592 * | . | 593 * +-----+-----+-----+-----+-----+-----+-----+-----+ 594 * | mapping[N-2] | mapping[N-1] | 595 * +-----+-----+-----+-----+-----+-----+-----+-----+ 596 * 597 * where N is specified by a XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE 598 * message and each mapping must specifies a queue between 0 and 599 * "multi-queue-num-queues" (see above). 600 * The backend may support a mapping table larger than can be 601 * mapped by a single grant reference. Thus sub-tables within a 602 * larger table can be individually set by sending multiple messages 603 * with differing offset values. Specifying a new sub-table does not 604 * invalidate any table data outside that range. 605 * The grant reference may be read-only and must remain valid until 606 * the response has been processed. 607 */ 608 609 DEFINE_RING_TYPES(xen_netif_ctrl, 610 struct xen_netif_ctrl_request, 611 struct xen_netif_ctrl_response); 612 613 /* 614 * Guest transmit 615 * ============== 616 * 617 * This is the 'wire' format for transmit (frontend -> backend) packets: 618 * 619 * Fragment 1: xen_netif_tx_request_t - flags = XEN_NETTXF_* 620 * size = total packet size 621 * [Extra 1: xen_netif_extra_info_t] - (only if fragment 1 flags include 622 * XEN_NETTXF_extra_info) 623 * ... 624 * [Extra N: xen_netif_extra_info_t] - (only if extra N-1 flags include 625 * XEN_NETIF_EXTRA_MORE) 626 * ... 627 * Fragment N: xen_netif_tx_request_t - (only if fragment N-1 flags include 628 * XEN_NETTXF_more_data - flags on preceding 629 * extras are not relevant here) 630 * flags = 0 631 * size = fragment size 632 * 633 * NOTE: 634 * 635 * This format slightly is different from that used for receive 636 * (backend -> frontend) packets. Specifically, in a multi-fragment 637 * packet the actual size of fragment 1 can only be determined by 638 * subtracting the sizes of fragments 2..N from the total packet size. 639 * 640 * Ring slot size is 12 octets, however not all request/response 641 * structs use the full size. 642 * 643 * tx request data (xen_netif_tx_request_t) 644 * ------------------------------------ 645 * 646 * 0 1 2 3 4 5 6 7 octet 647 * +-----+-----+-----+-----+-----+-----+-----+-----+ 648 * | grant ref | offset | flags | 649 * +-----+-----+-----+-----+-----+-----+-----+-----+ 650 * | id | size | 651 * +-----+-----+-----+-----+ 652 * 653 * grant ref: Reference to buffer page. 654 * offset: Offset within buffer page. 655 * flags: XEN_NETTXF_*. 656 * id: request identifier, echoed in response. 657 * size: packet size in bytes. 658 * 659 * tx response (xen_netif_tx_response_t) 660 * --------------------------------- 661 * 662 * 0 1 2 3 4 5 6 7 octet 663 * +-----+-----+-----+-----+-----+-----+-----+-----+ 664 * | id | status | unused | 665 * +-----+-----+-----+-----+-----+-----+-----+-----+ 666 * | unused | 667 * +-----+-----+-----+-----+ 668 * 669 * id: reflects id in transmit request 670 * status: XEN_NETIF_RSP_* 671 * 672 * Guest receive 673 * ============= 674 * 675 * This is the 'wire' format for receive (backend -> frontend) packets: 676 * 677 * Fragment 1: xen_netif_rx_request_t - flags = XEN_NETRXF_* 678 * size = fragment size 679 * [Extra 1: xen_netif_extra_info_t] - (only if fragment 1 flags include 680 * XEN_NETRXF_extra_info) 681 * ... 682 * [Extra N: xen_netif_extra_info_t] - (only if extra N-1 flags include 683 * XEN_NETIF_EXTRA_MORE) 684 * ... 685 * Fragment N: xen_netif_rx_request_t - (only if fragment N-1 flags include 686 * XEN_NETRXF_more_data - flags on preceding 687 * extras are not relevant here) 688 * flags = 0 689 * size = fragment size 690 * 691 * NOTE: 692 * 693 * This format slightly is different from that used for transmit 694 * (frontend -> backend) packets. Specifically, in a multi-fragment 695 * packet the size of the packet can only be determined by summing the 696 * sizes of fragments 1..N. 697 * 698 * Ring slot size is 8 octets. 699 * 700 * rx request (xen_netif_rx_request_t) 701 * ------------------------------- 702 * 703 * 0 1 2 3 4 5 6 7 octet 704 * +-----+-----+-----+-----+-----+-----+-----+-----+ 705 * | id | pad | gref | 706 * +-----+-----+-----+-----+-----+-----+-----+-----+ 707 * 708 * id: request identifier, echoed in response. 709 * gref: reference to incoming granted frame. 710 * 711 * rx response (xen_netif_rx_response_t) 712 * --------------------------------- 713 * 714 * 0 1 2 3 4 5 6 7 octet 715 * +-----+-----+-----+-----+-----+-----+-----+-----+ 716 * | id | offset | flags | status | 717 * +-----+-----+-----+-----+-----+-----+-----+-----+ 718 * 719 * id: reflects id in receive request 720 * offset: offset in page of start of received packet 721 * flags: XEN_NETRXF_* 722 * status: -ve: XEN_NETIF_RSP_*; +ve: Rx'ed pkt size. 723 * 724 * NOTE: Historically, to support GSO on the frontend receive side, Linux 725 * netfront does not make use of the rx response id (because, as 726 * described below, extra info structures overlay the id field). 727 * Instead it assumes that responses always appear in the same ring 728 * slot as their corresponding request. Thus, to maintain 729 * compatibility, backends must make sure this is the case. 730 * 731 * Extra Info 732 * ========== 733 * 734 * Can be present if initial request or response has NET{T,R}XF_extra_info, 735 * or previous extra request has XEN_NETIF_EXTRA_MORE. 736 * 737 * The struct therefore needs to fit into either a tx or rx slot and 738 * is therefore limited to 8 octets. 739 * 740 * NOTE: Because extra info data overlays the usual request/response 741 * structures, there is no id information in the opposite direction. 742 * So, if an extra info overlays an rx response the frontend can 743 * assume that it is in the same ring slot as the request that was 744 * consumed to make the slot available, and the backend must ensure 745 * this assumption is true. 746 * 747 * extra info (xen_netif_extra_info_t) 748 * ------------------------------- 749 * 750 * General format: 751 * 752 * 0 1 2 3 4 5 6 7 octet 753 * +-----+-----+-----+-----+-----+-----+-----+-----+ 754 * |type |flags| type specific data | 755 * +-----+-----+-----+-----+-----+-----+-----+-----+ 756 * | padding for tx | 757 * +-----+-----+-----+-----+ 758 * 759 * type: XEN_NETIF_EXTRA_TYPE_* 760 * flags: XEN_NETIF_EXTRA_FLAG_* 761 * padding for tx: present only in the tx case due to 8 octet limit 762 * from rx case. Not shown in type specific entries 763 * below. 764 * 765 * XEN_NETIF_EXTRA_TYPE_GSO: 766 * 767 * 0 1 2 3 4 5 6 7 octet 768 * +-----+-----+-----+-----+-----+-----+-----+-----+ 769 * |type |flags| size |type | pad | features | 770 * +-----+-----+-----+-----+-----+-----+-----+-----+ 771 * 772 * type: Must be XEN_NETIF_EXTRA_TYPE_GSO 773 * flags: XEN_NETIF_EXTRA_FLAG_* 774 * size: Maximum payload size of each segment. For example, 775 * for TCP this is just the path MSS. 776 * type: XEN_NETIF_GSO_TYPE_*: This determines the protocol of 777 * the packet and any extra features required to segment the 778 * packet properly. 779 * features: EN_XEN_NETIF_GSO_FEAT_*: This specifies any extra GSO 780 * features required to process this packet, such as ECN 781 * support for TCPv4. 782 * 783 * XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL}: 784 * 785 * 0 1 2 3 4 5 6 7 octet 786 * +-----+-----+-----+-----+-----+-----+-----+-----+ 787 * |type |flags| addr | 788 * +-----+-----+-----+-----+-----+-----+-----+-----+ 789 * 790 * type: Must be XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL} 791 * flags: XEN_NETIF_EXTRA_FLAG_* 792 * addr: address to add/remove 793 * 794 * XEN_NETIF_EXTRA_TYPE_HASH: 795 * 796 * A backend that supports teoplitz hashing is assumed to accept 797 * this type of extra info in transmit packets. 798 * A frontend that enables hashing is assumed to accept 799 * this type of extra info in receive packets. 800 * 801 * 0 1 2 3 4 5 6 7 octet 802 * +-----+-----+-----+-----+-----+-----+-----+-----+ 803 * |type |flags|htype| alg |LSB ---- value ---- MSB| 804 * +-----+-----+-----+-----+-----+-----+-----+-----+ 805 * 806 * type: Must be XEN_NETIF_EXTRA_TYPE_HASH 807 * flags: XEN_NETIF_EXTRA_FLAG_* 808 * htype: Hash type (one of _XEN_NETIF_CTRL_HASH_TYPE_* - see above) 809 * alg: The algorithm used to calculate the hash (one of 810 * XEN_NETIF_CTRL_HASH_TYPE_ALGORITHM_* - see above) 811 * value: Hash value 812 */ 813 814 /* Protocol checksum field is blank in the packet (hardware offload)? */ 815 #define _XEN_NETTXF_csum_blank (0) 816 #define XEN_NETTXF_csum_blank (1U<<_XEN_NETTXF_csum_blank) 817 818 /* Packet data has been validated against protocol checksum. */ 819 #define _XEN_NETTXF_data_validated (1) 820 #define XEN_NETTXF_data_validated (1U<<_XEN_NETTXF_data_validated) 821 822 /* Packet continues in the next request descriptor. */ 823 #define _XEN_NETTXF_more_data (2) 824 #define XEN_NETTXF_more_data (1U<<_XEN_NETTXF_more_data) 825 826 /* Packet to be followed by extra descriptor(s). */ 827 #define _XEN_NETTXF_extra_info (3) 828 #define XEN_NETTXF_extra_info (1U<<_XEN_NETTXF_extra_info) 829 830 #define XEN_NETIF_MAX_TX_SIZE 0xFFFF 831 struct xen_netif_tx_request { 832 grant_ref_t gref; 833 uint16_t offset; 834 uint16_t flags; 835 uint16_t id; 836 uint16_t size; 837 }; 838 839 /* Types of xen_netif_extra_info descriptors. */ 840 #define XEN_NETIF_EXTRA_TYPE_NONE (0) /* Never used - invalid */ 841 #define XEN_NETIF_EXTRA_TYPE_GSO (1) /* u.gso */ 842 #define XEN_NETIF_EXTRA_TYPE_MCAST_ADD (2) /* u.mcast */ 843 #define XEN_NETIF_EXTRA_TYPE_MCAST_DEL (3) /* u.mcast */ 844 #define XEN_NETIF_EXTRA_TYPE_HASH (4) /* u.hash */ 845 #define XEN_NETIF_EXTRA_TYPE_XDP (5) /* u.xdp */ 846 #define XEN_NETIF_EXTRA_TYPE_MAX (6) 847 848 /* xen_netif_extra_info_t flags. */ 849 #define _XEN_NETIF_EXTRA_FLAG_MORE (0) 850 #define XEN_NETIF_EXTRA_FLAG_MORE (1U<<_XEN_NETIF_EXTRA_FLAG_MORE) 851 852 /* GSO types */ 853 #define XEN_NETIF_GSO_TYPE_NONE (0) 854 #define XEN_NETIF_GSO_TYPE_TCPV4 (1) 855 #define XEN_NETIF_GSO_TYPE_TCPV6 (2) 856 857 /* 858 * This structure needs to fit within both xen_netif_tx_request_t and 859 * xen_netif_rx_response_t for compatibility. 860 */ 861 struct xen_netif_extra_info { 862 uint8_t type; 863 uint8_t flags; 864 union { 865 struct { 866 uint16_t size; 867 uint8_t type; 868 uint8_t pad; 869 uint16_t features; 870 } gso; 871 struct { 872 uint8_t addr[6]; 873 } mcast; 874 struct { 875 uint8_t type; 876 uint8_t algorithm; 877 uint8_t value[4]; 878 } hash; 879 struct { 880 uint16_t headroom; 881 uint16_t pad[2]; 882 } xdp; 883 uint16_t pad[3]; 884 } u; 885 }; 886 887 struct xen_netif_tx_response { 888 uint16_t id; 889 int16_t status; 890 }; 891 892 struct xen_netif_rx_request { 893 uint16_t id; /* Echoed in response message. */ 894 uint16_t pad; 895 grant_ref_t gref; 896 }; 897 898 /* Packet data has been validated against protocol checksum. */ 899 #define _XEN_NETRXF_data_validated (0) 900 #define XEN_NETRXF_data_validated (1U<<_XEN_NETRXF_data_validated) 901 902 /* Protocol checksum field is blank in the packet (hardware offload)? */ 903 #define _XEN_NETRXF_csum_blank (1) 904 #define XEN_NETRXF_csum_blank (1U<<_XEN_NETRXF_csum_blank) 905 906 /* Packet continues in the next request descriptor. */ 907 #define _XEN_NETRXF_more_data (2) 908 #define XEN_NETRXF_more_data (1U<<_XEN_NETRXF_more_data) 909 910 /* Packet to be followed by extra descriptor(s). */ 911 #define _XEN_NETRXF_extra_info (3) 912 #define XEN_NETRXF_extra_info (1U<<_XEN_NETRXF_extra_info) 913 914 /* Packet has GSO prefix. Deprecated but included for compatibility */ 915 #define _XEN_NETRXF_gso_prefix (4) 916 #define XEN_NETRXF_gso_prefix (1U<<_XEN_NETRXF_gso_prefix) 917 918 struct xen_netif_rx_response { 919 uint16_t id; 920 uint16_t offset; 921 uint16_t flags; 922 int16_t status; 923 }; 924 925 /* 926 * Generate xen_netif ring structures and types. 927 */ 928 929 DEFINE_RING_TYPES(xen_netif_tx, struct xen_netif_tx_request, 930 struct xen_netif_tx_response); 931 DEFINE_RING_TYPES(xen_netif_rx, struct xen_netif_rx_request, 932 struct xen_netif_rx_response); 933 934 #define XEN_NETIF_RSP_DROPPED -2 935 #define XEN_NETIF_RSP_ERROR -1 936 #define XEN_NETIF_RSP_OKAY 0 937 /* No response: used for auxiliary requests (e.g., xen_netif_extra_info_t). */ 938 #define XEN_NETIF_RSP_NULL 1 939 940 #endif 941