1 /* 2 * Copyright (c) 2016 Citrix Systems Inc. 3 * Copyright (c) 2002-2005, K A Fraser 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License version 2 7 * as published by the Free Software Foundation; or, when distributed 8 * separately from the Linux kernel or incorporated into other 9 * software packages, subject to the following license: 10 * 11 * Permission is hereby granted, free of charge, to any person obtaining a copy 12 * of this source file (the "Software"), to deal in the Software without 13 * restriction, including without limitation the rights to use, copy, modify, 14 * merge, publish, distribute, sublicense, and/or sell copies of the Software, 15 * and to permit persons to whom the Software is furnished to do so, subject to 16 * the following conditions: 17 * 18 * The above copyright notice and this permission notice shall be included in 19 * all copies or substantial portions of the Software. 20 * 21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 22 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 23 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 24 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 25 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 26 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 27 * IN THE SOFTWARE. 28 */ 29 #include "common.h" 30 31 #include <linux/kthread.h> 32 33 #include <xen/xen.h> 34 #include <xen/events.h> 35 36 static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue) 37 { 38 RING_IDX prod, cons; 39 struct sk_buff *skb; 40 int needed; 41 42 skb = skb_peek(&queue->rx_queue); 43 if (!skb) 44 return false; 45 46 needed = DIV_ROUND_UP(skb->len, XEN_PAGE_SIZE); 47 if (skb_is_gso(skb)) 48 needed++; 49 if (skb->sw_hash) 50 needed++; 51 52 do { 53 prod = queue->rx.sring->req_prod; 54 cons = queue->rx.req_cons; 55 56 if (prod - cons >= needed) 57 return true; 58 59 queue->rx.sring->req_event = prod + 1; 60 61 /* Make sure event is visible before we check prod 62 * again. 63 */ 64 mb(); 65 } while (queue->rx.sring->req_prod != prod); 66 67 return false; 68 } 69 70 void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb) 71 { 72 unsigned long flags; 73 74 spin_lock_irqsave(&queue->rx_queue.lock, flags); 75 76 __skb_queue_tail(&queue->rx_queue, skb); 77 78 queue->rx_queue_len += skb->len; 79 if (queue->rx_queue_len > queue->rx_queue_max) { 80 struct net_device *dev = queue->vif->dev; 81 82 netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id)); 83 } 84 85 spin_unlock_irqrestore(&queue->rx_queue.lock, flags); 86 } 87 88 static struct sk_buff *xenvif_rx_dequeue(struct xenvif_queue *queue) 89 { 90 struct sk_buff *skb; 91 92 spin_lock_irq(&queue->rx_queue.lock); 93 94 skb = __skb_dequeue(&queue->rx_queue); 95 if (skb) { 96 queue->rx_queue_len -= skb->len; 97 if (queue->rx_queue_len < queue->rx_queue_max) { 98 struct netdev_queue *txq; 99 100 txq = netdev_get_tx_queue(queue->vif->dev, queue->id); 101 netif_tx_wake_queue(txq); 102 } 103 } 104 105 spin_unlock_irq(&queue->rx_queue.lock); 106 107 return skb; 108 } 109 110 static void xenvif_rx_queue_purge(struct xenvif_queue *queue) 111 { 112 struct sk_buff *skb; 113 114 while ((skb = xenvif_rx_dequeue(queue)) != NULL) 115 kfree_skb(skb); 116 } 117 118 static void xenvif_rx_queue_drop_expired(struct xenvif_queue *queue) 119 { 120 struct sk_buff *skb; 121 122 for (;;) { 123 skb = skb_peek(&queue->rx_queue); 124 if (!skb) 125 break; 126 if (time_before(jiffies, XENVIF_RX_CB(skb)->expires)) 127 break; 128 xenvif_rx_dequeue(queue); 129 kfree_skb(skb); 130 } 131 } 132 133 static void xenvif_rx_copy_flush(struct xenvif_queue *queue) 134 { 135 unsigned int i; 136 int notify; 137 138 gnttab_batch_copy(queue->rx_copy.op, queue->rx_copy.num); 139 140 for (i = 0; i < queue->rx_copy.num; i++) { 141 struct gnttab_copy *op; 142 143 op = &queue->rx_copy.op[i]; 144 145 /* If the copy failed, overwrite the status field in 146 * the corresponding response. 147 */ 148 if (unlikely(op->status != GNTST_okay)) { 149 struct xen_netif_rx_response *rsp; 150 151 rsp = RING_GET_RESPONSE(&queue->rx, 152 queue->rx_copy.idx[i]); 153 rsp->status = op->status; 154 } 155 } 156 157 queue->rx_copy.num = 0; 158 159 /* Push responses for all completed packets. */ 160 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->rx, notify); 161 if (notify) 162 notify_remote_via_irq(queue->rx_irq); 163 164 __skb_queue_purge(queue->rx_copy.completed); 165 } 166 167 static void xenvif_rx_copy_add(struct xenvif_queue *queue, 168 struct xen_netif_rx_request *req, 169 unsigned int offset, void *data, size_t len) 170 { 171 struct gnttab_copy *op; 172 struct page *page; 173 struct xen_page_foreign *foreign; 174 175 if (queue->rx_copy.num == COPY_BATCH_SIZE) 176 xenvif_rx_copy_flush(queue); 177 178 op = &queue->rx_copy.op[queue->rx_copy.num]; 179 180 page = virt_to_page(data); 181 182 op->flags = GNTCOPY_dest_gref; 183 184 foreign = xen_page_foreign(page); 185 if (foreign) { 186 op->source.domid = foreign->domid; 187 op->source.u.ref = foreign->gref; 188 op->flags |= GNTCOPY_source_gref; 189 } else { 190 op->source.u.gmfn = virt_to_gfn(data); 191 op->source.domid = DOMID_SELF; 192 } 193 194 op->source.offset = xen_offset_in_page(data); 195 op->dest.u.ref = req->gref; 196 op->dest.domid = queue->vif->domid; 197 op->dest.offset = offset; 198 op->len = len; 199 200 queue->rx_copy.idx[queue->rx_copy.num] = queue->rx.req_cons; 201 queue->rx_copy.num++; 202 } 203 204 static unsigned int xenvif_gso_type(struct sk_buff *skb) 205 { 206 if (skb_is_gso(skb)) { 207 if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) 208 return XEN_NETIF_GSO_TYPE_TCPV4; 209 else 210 return XEN_NETIF_GSO_TYPE_TCPV6; 211 } 212 return XEN_NETIF_GSO_TYPE_NONE; 213 } 214 215 struct xenvif_pkt_state { 216 struct sk_buff *skb; 217 size_t remaining_len; 218 struct sk_buff *frag_iter; 219 int frag; /* frag == -1 => frag_iter->head */ 220 unsigned int frag_offset; 221 struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1]; 222 unsigned int extra_count; 223 unsigned int slot; 224 }; 225 226 static void xenvif_rx_next_skb(struct xenvif_queue *queue, 227 struct xenvif_pkt_state *pkt) 228 { 229 struct sk_buff *skb; 230 unsigned int gso_type; 231 232 skb = xenvif_rx_dequeue(queue); 233 234 queue->stats.tx_bytes += skb->len; 235 queue->stats.tx_packets++; 236 237 /* Reset packet state. */ 238 memset(pkt, 0, sizeof(struct xenvif_pkt_state)); 239 240 pkt->skb = skb; 241 pkt->frag_iter = skb; 242 pkt->remaining_len = skb->len; 243 pkt->frag = -1; 244 245 gso_type = xenvif_gso_type(skb); 246 if ((1 << gso_type) & queue->vif->gso_mask) { 247 struct xen_netif_extra_info *extra; 248 249 extra = &pkt->extras[XEN_NETIF_EXTRA_TYPE_GSO - 1]; 250 251 extra->u.gso.type = gso_type; 252 extra->u.gso.size = skb_shinfo(skb)->gso_size; 253 extra->u.gso.pad = 0; 254 extra->u.gso.features = 0; 255 extra->type = XEN_NETIF_EXTRA_TYPE_GSO; 256 extra->flags = 0; 257 258 pkt->extra_count++; 259 } 260 261 if (queue->vif->xdp_headroom) { 262 struct xen_netif_extra_info *extra; 263 264 extra = &pkt->extras[XEN_NETIF_EXTRA_TYPE_XDP - 1]; 265 266 memset(extra, 0, sizeof(struct xen_netif_extra_info)); 267 extra->u.xdp.headroom = queue->vif->xdp_headroom; 268 extra->type = XEN_NETIF_EXTRA_TYPE_XDP; 269 extra->flags = 0; 270 271 pkt->extra_count++; 272 } 273 274 if (skb->sw_hash) { 275 struct xen_netif_extra_info *extra; 276 277 extra = &pkt->extras[XEN_NETIF_EXTRA_TYPE_HASH - 1]; 278 279 extra->u.hash.algorithm = 280 XEN_NETIF_CTRL_HASH_ALGORITHM_TOEPLITZ; 281 282 if (skb->l4_hash) 283 extra->u.hash.type = 284 skb->protocol == htons(ETH_P_IP) ? 285 _XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP : 286 _XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP; 287 else 288 extra->u.hash.type = 289 skb->protocol == htons(ETH_P_IP) ? 290 _XEN_NETIF_CTRL_HASH_TYPE_IPV4 : 291 _XEN_NETIF_CTRL_HASH_TYPE_IPV6; 292 293 *(uint32_t *)extra->u.hash.value = skb_get_hash_raw(skb); 294 295 extra->type = XEN_NETIF_EXTRA_TYPE_HASH; 296 extra->flags = 0; 297 298 pkt->extra_count++; 299 } 300 } 301 302 static void xenvif_rx_complete(struct xenvif_queue *queue, 303 struct xenvif_pkt_state *pkt) 304 { 305 /* All responses are ready to be pushed. */ 306 queue->rx.rsp_prod_pvt = queue->rx.req_cons; 307 308 __skb_queue_tail(queue->rx_copy.completed, pkt->skb); 309 } 310 311 static void xenvif_rx_next_frag(struct xenvif_pkt_state *pkt) 312 { 313 struct sk_buff *frag_iter = pkt->frag_iter; 314 unsigned int nr_frags = skb_shinfo(frag_iter)->nr_frags; 315 316 pkt->frag++; 317 pkt->frag_offset = 0; 318 319 if (pkt->frag >= nr_frags) { 320 if (frag_iter == pkt->skb) 321 pkt->frag_iter = skb_shinfo(frag_iter)->frag_list; 322 else 323 pkt->frag_iter = frag_iter->next; 324 325 pkt->frag = -1; 326 } 327 } 328 329 static void xenvif_rx_next_chunk(struct xenvif_queue *queue, 330 struct xenvif_pkt_state *pkt, 331 unsigned int offset, void **data, 332 size_t *len) 333 { 334 struct sk_buff *frag_iter = pkt->frag_iter; 335 void *frag_data; 336 size_t frag_len, chunk_len; 337 338 BUG_ON(!frag_iter); 339 340 if (pkt->frag == -1) { 341 frag_data = frag_iter->data; 342 frag_len = skb_headlen(frag_iter); 343 } else { 344 skb_frag_t *frag = &skb_shinfo(frag_iter)->frags[pkt->frag]; 345 346 frag_data = skb_frag_address(frag); 347 frag_len = skb_frag_size(frag); 348 } 349 350 frag_data += pkt->frag_offset; 351 frag_len -= pkt->frag_offset; 352 353 chunk_len = min_t(size_t, frag_len, XEN_PAGE_SIZE - offset); 354 chunk_len = min_t(size_t, chunk_len, XEN_PAGE_SIZE - 355 xen_offset_in_page(frag_data)); 356 357 pkt->frag_offset += chunk_len; 358 359 /* Advance to next frag? */ 360 if (frag_len == chunk_len) 361 xenvif_rx_next_frag(pkt); 362 363 *data = frag_data; 364 *len = chunk_len; 365 } 366 367 static void xenvif_rx_data_slot(struct xenvif_queue *queue, 368 struct xenvif_pkt_state *pkt, 369 struct xen_netif_rx_request *req, 370 struct xen_netif_rx_response *rsp) 371 { 372 unsigned int offset = queue->vif->xdp_headroom; 373 unsigned int flags; 374 375 do { 376 size_t len; 377 void *data; 378 379 xenvif_rx_next_chunk(queue, pkt, offset, &data, &len); 380 xenvif_rx_copy_add(queue, req, offset, data, len); 381 382 offset += len; 383 pkt->remaining_len -= len; 384 385 } while (offset < XEN_PAGE_SIZE && pkt->remaining_len > 0); 386 387 if (pkt->remaining_len > 0) 388 flags = XEN_NETRXF_more_data; 389 else 390 flags = 0; 391 392 if (pkt->slot == 0) { 393 struct sk_buff *skb = pkt->skb; 394 395 if (skb->ip_summed == CHECKSUM_PARTIAL) 396 flags |= XEN_NETRXF_csum_blank | 397 XEN_NETRXF_data_validated; 398 else if (skb->ip_summed == CHECKSUM_UNNECESSARY) 399 flags |= XEN_NETRXF_data_validated; 400 401 if (pkt->extra_count != 0) 402 flags |= XEN_NETRXF_extra_info; 403 } 404 405 rsp->offset = 0; 406 rsp->flags = flags; 407 rsp->id = req->id; 408 rsp->status = (s16)offset; 409 } 410 411 static void xenvif_rx_extra_slot(struct xenvif_queue *queue, 412 struct xenvif_pkt_state *pkt, 413 struct xen_netif_rx_request *req, 414 struct xen_netif_rx_response *rsp) 415 { 416 struct xen_netif_extra_info *extra = (void *)rsp; 417 unsigned int i; 418 419 pkt->extra_count--; 420 421 for (i = 0; i < ARRAY_SIZE(pkt->extras); i++) { 422 if (pkt->extras[i].type) { 423 *extra = pkt->extras[i]; 424 425 if (pkt->extra_count != 0) 426 extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE; 427 428 pkt->extras[i].type = 0; 429 return; 430 } 431 } 432 BUG(); 433 } 434 435 static void xenvif_rx_skb(struct xenvif_queue *queue) 436 { 437 struct xenvif_pkt_state pkt; 438 439 xenvif_rx_next_skb(queue, &pkt); 440 441 queue->last_rx_time = jiffies; 442 443 do { 444 struct xen_netif_rx_request *req; 445 struct xen_netif_rx_response *rsp; 446 447 req = RING_GET_REQUEST(&queue->rx, queue->rx.req_cons); 448 rsp = RING_GET_RESPONSE(&queue->rx, queue->rx.req_cons); 449 450 /* Extras must go after the first data slot */ 451 if (pkt.slot != 0 && pkt.extra_count != 0) 452 xenvif_rx_extra_slot(queue, &pkt, req, rsp); 453 else 454 xenvif_rx_data_slot(queue, &pkt, req, rsp); 455 456 queue->rx.req_cons++; 457 pkt.slot++; 458 } while (pkt.remaining_len > 0 || pkt.extra_count != 0); 459 460 xenvif_rx_complete(queue, &pkt); 461 } 462 463 #define RX_BATCH_SIZE 64 464 465 void xenvif_rx_action(struct xenvif_queue *queue) 466 { 467 struct sk_buff_head completed_skbs; 468 unsigned int work_done = 0; 469 470 __skb_queue_head_init(&completed_skbs); 471 queue->rx_copy.completed = &completed_skbs; 472 473 while (xenvif_rx_ring_slots_available(queue) && 474 work_done < RX_BATCH_SIZE) { 475 xenvif_rx_skb(queue); 476 work_done++; 477 } 478 479 /* Flush any pending copies and complete all skbs. */ 480 xenvif_rx_copy_flush(queue); 481 } 482 483 static bool xenvif_rx_queue_stalled(struct xenvif_queue *queue) 484 { 485 RING_IDX prod, cons; 486 487 prod = queue->rx.sring->req_prod; 488 cons = queue->rx.req_cons; 489 490 return !queue->stalled && 491 prod - cons < 1 && 492 time_after(jiffies, 493 queue->last_rx_time + queue->vif->stall_timeout); 494 } 495 496 static bool xenvif_rx_queue_ready(struct xenvif_queue *queue) 497 { 498 RING_IDX prod, cons; 499 500 prod = queue->rx.sring->req_prod; 501 cons = queue->rx.req_cons; 502 503 return queue->stalled && prod - cons >= 1; 504 } 505 506 static bool xenvif_have_rx_work(struct xenvif_queue *queue) 507 { 508 return xenvif_rx_ring_slots_available(queue) || 509 (queue->vif->stall_timeout && 510 (xenvif_rx_queue_stalled(queue) || 511 xenvif_rx_queue_ready(queue))) || 512 kthread_should_stop() || 513 queue->vif->disabled; 514 } 515 516 static long xenvif_rx_queue_timeout(struct xenvif_queue *queue) 517 { 518 struct sk_buff *skb; 519 long timeout; 520 521 skb = skb_peek(&queue->rx_queue); 522 if (!skb) 523 return MAX_SCHEDULE_TIMEOUT; 524 525 timeout = XENVIF_RX_CB(skb)->expires - jiffies; 526 return timeout < 0 ? 0 : timeout; 527 } 528 529 /* Wait until the guest Rx thread has work. 530 * 531 * The timeout needs to be adjusted based on the current head of the 532 * queue (and not just the head at the beginning). In particular, if 533 * the queue is initially empty an infinite timeout is used and this 534 * needs to be reduced when a skb is queued. 535 * 536 * This cannot be done with wait_event_timeout() because it only 537 * calculates the timeout once. 538 */ 539 static void xenvif_wait_for_rx_work(struct xenvif_queue *queue) 540 { 541 DEFINE_WAIT(wait); 542 543 if (xenvif_have_rx_work(queue)) 544 return; 545 546 for (;;) { 547 long ret; 548 549 prepare_to_wait(&queue->wq, &wait, TASK_INTERRUPTIBLE); 550 if (xenvif_have_rx_work(queue)) 551 break; 552 ret = schedule_timeout(xenvif_rx_queue_timeout(queue)); 553 if (!ret) 554 break; 555 } 556 finish_wait(&queue->wq, &wait); 557 } 558 559 static void xenvif_queue_carrier_off(struct xenvif_queue *queue) 560 { 561 struct xenvif *vif = queue->vif; 562 563 queue->stalled = true; 564 565 /* At least one queue has stalled? Disable the carrier. */ 566 spin_lock(&vif->lock); 567 if (vif->stalled_queues++ == 0) { 568 netdev_info(vif->dev, "Guest Rx stalled"); 569 netif_carrier_off(vif->dev); 570 } 571 spin_unlock(&vif->lock); 572 } 573 574 static void xenvif_queue_carrier_on(struct xenvif_queue *queue) 575 { 576 struct xenvif *vif = queue->vif; 577 578 queue->last_rx_time = jiffies; /* Reset Rx stall detection. */ 579 queue->stalled = false; 580 581 /* All queues are ready? Enable the carrier. */ 582 spin_lock(&vif->lock); 583 if (--vif->stalled_queues == 0) { 584 netdev_info(vif->dev, "Guest Rx ready"); 585 netif_carrier_on(vif->dev); 586 } 587 spin_unlock(&vif->lock); 588 } 589 590 int xenvif_kthread_guest_rx(void *data) 591 { 592 struct xenvif_queue *queue = data; 593 struct xenvif *vif = queue->vif; 594 595 if (!vif->stall_timeout) 596 xenvif_queue_carrier_on(queue); 597 598 for (;;) { 599 xenvif_wait_for_rx_work(queue); 600 601 if (kthread_should_stop()) 602 break; 603 604 /* This frontend is found to be rogue, disable it in 605 * kthread context. Currently this is only set when 606 * netback finds out frontend sends malformed packet, 607 * but we cannot disable the interface in softirq 608 * context so we defer it here, if this thread is 609 * associated with queue 0. 610 */ 611 if (unlikely(vif->disabled && queue->id == 0)) { 612 xenvif_carrier_off(vif); 613 break; 614 } 615 616 if (!skb_queue_empty(&queue->rx_queue)) 617 xenvif_rx_action(queue); 618 619 /* If the guest hasn't provided any Rx slots for a 620 * while it's probably not responsive, drop the 621 * carrier so packets are dropped earlier. 622 */ 623 if (vif->stall_timeout) { 624 if (xenvif_rx_queue_stalled(queue)) 625 xenvif_queue_carrier_off(queue); 626 else if (xenvif_rx_queue_ready(queue)) 627 xenvif_queue_carrier_on(queue); 628 } 629 630 /* Queued packets may have foreign pages from other 631 * domains. These cannot be queued indefinitely as 632 * this would starve guests of grant refs and transmit 633 * slots. 634 */ 635 xenvif_rx_queue_drop_expired(queue); 636 637 cond_resched(); 638 } 639 640 /* Bin any remaining skbs */ 641 xenvif_rx_queue_purge(queue); 642 643 return 0; 644 } 645