1 /* 2 * Copyright (c) 2016 Citrix Systems Inc. 3 * Copyright (c) 2002-2005, K A Fraser 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License version 2 7 * as published by the Free Software Foundation; or, when distributed 8 * separately from the Linux kernel or incorporated into other 9 * software packages, subject to the following license: 10 * 11 * Permission is hereby granted, free of charge, to any person obtaining a copy 12 * of this source file (the "Software"), to deal in the Software without 13 * restriction, including without limitation the rights to use, copy, modify, 14 * merge, publish, distribute, sublicense, and/or sell copies of the Software, 15 * and to permit persons to whom the Software is furnished to do so, subject to 16 * the following conditions: 17 * 18 * The above copyright notice and this permission notice shall be included in 19 * all copies or substantial portions of the Software. 20 * 21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 22 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 23 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 24 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 25 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 26 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 27 * IN THE SOFTWARE. 28 */ 29 #include "common.h" 30 31 #include <linux/kthread.h> 32 33 #include <xen/xen.h> 34 #include <xen/events.h> 35 36 static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue) 37 { 38 RING_IDX prod, cons; 39 struct sk_buff *skb; 40 int needed; 41 unsigned long flags; 42 43 spin_lock_irqsave(&queue->rx_queue.lock, flags); 44 45 skb = skb_peek(&queue->rx_queue); 46 if (!skb) { 47 spin_unlock_irqrestore(&queue->rx_queue.lock, flags); 48 return false; 49 } 50 51 needed = DIV_ROUND_UP(skb->len, XEN_PAGE_SIZE); 52 if (skb_is_gso(skb)) 53 needed++; 54 if (skb->sw_hash) 55 needed++; 56 57 spin_unlock_irqrestore(&queue->rx_queue.lock, flags); 58 59 do { 60 prod = queue->rx.sring->req_prod; 61 cons = queue->rx.req_cons; 62 63 if (prod - cons >= needed) 64 return true; 65 66 queue->rx.sring->req_event = prod + 1; 67 68 /* Make sure event is visible before we check prod 69 * again. 70 */ 71 mb(); 72 } while (queue->rx.sring->req_prod != prod); 73 74 return false; 75 } 76 77 void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb) 78 { 79 unsigned long flags; 80 81 spin_lock_irqsave(&queue->rx_queue.lock, flags); 82 83 __skb_queue_tail(&queue->rx_queue, skb); 84 85 queue->rx_queue_len += skb->len; 86 if (queue->rx_queue_len > queue->rx_queue_max) { 87 struct net_device *dev = queue->vif->dev; 88 89 netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id)); 90 } 91 92 spin_unlock_irqrestore(&queue->rx_queue.lock, flags); 93 } 94 95 static struct sk_buff *xenvif_rx_dequeue(struct xenvif_queue *queue) 96 { 97 struct sk_buff *skb; 98 99 spin_lock_irq(&queue->rx_queue.lock); 100 101 skb = __skb_dequeue(&queue->rx_queue); 102 if (skb) { 103 queue->rx_queue_len -= skb->len; 104 if (queue->rx_queue_len < queue->rx_queue_max) { 105 struct netdev_queue *txq; 106 107 txq = netdev_get_tx_queue(queue->vif->dev, queue->id); 108 netif_tx_wake_queue(txq); 109 } 110 } 111 112 spin_unlock_irq(&queue->rx_queue.lock); 113 114 return skb; 115 } 116 117 static void xenvif_rx_queue_purge(struct xenvif_queue *queue) 118 { 119 struct sk_buff *skb; 120 121 while ((skb = xenvif_rx_dequeue(queue)) != NULL) 122 kfree_skb(skb); 123 } 124 125 static void xenvif_rx_queue_drop_expired(struct xenvif_queue *queue) 126 { 127 struct sk_buff *skb; 128 129 for (;;) { 130 skb = skb_peek(&queue->rx_queue); 131 if (!skb) 132 break; 133 if (time_before(jiffies, XENVIF_RX_CB(skb)->expires)) 134 break; 135 xenvif_rx_dequeue(queue); 136 kfree_skb(skb); 137 } 138 } 139 140 static void xenvif_rx_copy_flush(struct xenvif_queue *queue) 141 { 142 unsigned int i; 143 int notify; 144 145 gnttab_batch_copy(queue->rx_copy.op, queue->rx_copy.num); 146 147 for (i = 0; i < queue->rx_copy.num; i++) { 148 struct gnttab_copy *op; 149 150 op = &queue->rx_copy.op[i]; 151 152 /* If the copy failed, overwrite the status field in 153 * the corresponding response. 154 */ 155 if (unlikely(op->status != GNTST_okay)) { 156 struct xen_netif_rx_response *rsp; 157 158 rsp = RING_GET_RESPONSE(&queue->rx, 159 queue->rx_copy.idx[i]); 160 rsp->status = op->status; 161 } 162 } 163 164 queue->rx_copy.num = 0; 165 166 /* Push responses for all completed packets. */ 167 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->rx, notify); 168 if (notify) 169 notify_remote_via_irq(queue->rx_irq); 170 171 __skb_queue_purge(queue->rx_copy.completed); 172 } 173 174 static void xenvif_rx_copy_add(struct xenvif_queue *queue, 175 struct xen_netif_rx_request *req, 176 unsigned int offset, void *data, size_t len) 177 { 178 struct gnttab_copy *op; 179 struct page *page; 180 struct xen_page_foreign *foreign; 181 182 if (queue->rx_copy.num == COPY_BATCH_SIZE) 183 xenvif_rx_copy_flush(queue); 184 185 op = &queue->rx_copy.op[queue->rx_copy.num]; 186 187 page = virt_to_page(data); 188 189 op->flags = GNTCOPY_dest_gref; 190 191 foreign = xen_page_foreign(page); 192 if (foreign) { 193 op->source.domid = foreign->domid; 194 op->source.u.ref = foreign->gref; 195 op->flags |= GNTCOPY_source_gref; 196 } else { 197 op->source.u.gmfn = virt_to_gfn(data); 198 op->source.domid = DOMID_SELF; 199 } 200 201 op->source.offset = xen_offset_in_page(data); 202 op->dest.u.ref = req->gref; 203 op->dest.domid = queue->vif->domid; 204 op->dest.offset = offset; 205 op->len = len; 206 207 queue->rx_copy.idx[queue->rx_copy.num] = queue->rx.req_cons; 208 queue->rx_copy.num++; 209 } 210 211 static unsigned int xenvif_gso_type(struct sk_buff *skb) 212 { 213 if (skb_is_gso(skb)) { 214 if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) 215 return XEN_NETIF_GSO_TYPE_TCPV4; 216 else 217 return XEN_NETIF_GSO_TYPE_TCPV6; 218 } 219 return XEN_NETIF_GSO_TYPE_NONE; 220 } 221 222 struct xenvif_pkt_state { 223 struct sk_buff *skb; 224 size_t remaining_len; 225 struct sk_buff *frag_iter; 226 int frag; /* frag == -1 => frag_iter->head */ 227 unsigned int frag_offset; 228 struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1]; 229 unsigned int extra_count; 230 unsigned int slot; 231 }; 232 233 static void xenvif_rx_next_skb(struct xenvif_queue *queue, 234 struct xenvif_pkt_state *pkt) 235 { 236 struct sk_buff *skb; 237 unsigned int gso_type; 238 239 skb = xenvif_rx_dequeue(queue); 240 241 queue->stats.tx_bytes += skb->len; 242 queue->stats.tx_packets++; 243 244 /* Reset packet state. */ 245 memset(pkt, 0, sizeof(struct xenvif_pkt_state)); 246 247 pkt->skb = skb; 248 pkt->frag_iter = skb; 249 pkt->remaining_len = skb->len; 250 pkt->frag = -1; 251 252 gso_type = xenvif_gso_type(skb); 253 if ((1 << gso_type) & queue->vif->gso_mask) { 254 struct xen_netif_extra_info *extra; 255 256 extra = &pkt->extras[XEN_NETIF_EXTRA_TYPE_GSO - 1]; 257 258 extra->u.gso.type = gso_type; 259 extra->u.gso.size = skb_shinfo(skb)->gso_size; 260 extra->u.gso.pad = 0; 261 extra->u.gso.features = 0; 262 extra->type = XEN_NETIF_EXTRA_TYPE_GSO; 263 extra->flags = 0; 264 265 pkt->extra_count++; 266 } 267 268 if (queue->vif->xdp_headroom) { 269 struct xen_netif_extra_info *extra; 270 271 extra = &pkt->extras[XEN_NETIF_EXTRA_TYPE_XDP - 1]; 272 273 memset(extra, 0, sizeof(struct xen_netif_extra_info)); 274 extra->u.xdp.headroom = queue->vif->xdp_headroom; 275 extra->type = XEN_NETIF_EXTRA_TYPE_XDP; 276 extra->flags = 0; 277 278 pkt->extra_count++; 279 } 280 281 if (skb->sw_hash) { 282 struct xen_netif_extra_info *extra; 283 284 extra = &pkt->extras[XEN_NETIF_EXTRA_TYPE_HASH - 1]; 285 286 extra->u.hash.algorithm = 287 XEN_NETIF_CTRL_HASH_ALGORITHM_TOEPLITZ; 288 289 if (skb->l4_hash) 290 extra->u.hash.type = 291 skb->protocol == htons(ETH_P_IP) ? 292 _XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP : 293 _XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP; 294 else 295 extra->u.hash.type = 296 skb->protocol == htons(ETH_P_IP) ? 297 _XEN_NETIF_CTRL_HASH_TYPE_IPV4 : 298 _XEN_NETIF_CTRL_HASH_TYPE_IPV6; 299 300 *(uint32_t *)extra->u.hash.value = skb_get_hash_raw(skb); 301 302 extra->type = XEN_NETIF_EXTRA_TYPE_HASH; 303 extra->flags = 0; 304 305 pkt->extra_count++; 306 } 307 } 308 309 static void xenvif_rx_complete(struct xenvif_queue *queue, 310 struct xenvif_pkt_state *pkt) 311 { 312 /* All responses are ready to be pushed. */ 313 queue->rx.rsp_prod_pvt = queue->rx.req_cons; 314 315 __skb_queue_tail(queue->rx_copy.completed, pkt->skb); 316 } 317 318 static void xenvif_rx_next_frag(struct xenvif_pkt_state *pkt) 319 { 320 struct sk_buff *frag_iter = pkt->frag_iter; 321 unsigned int nr_frags = skb_shinfo(frag_iter)->nr_frags; 322 323 pkt->frag++; 324 pkt->frag_offset = 0; 325 326 if (pkt->frag >= nr_frags) { 327 if (frag_iter == pkt->skb) 328 pkt->frag_iter = skb_shinfo(frag_iter)->frag_list; 329 else 330 pkt->frag_iter = frag_iter->next; 331 332 pkt->frag = -1; 333 } 334 } 335 336 static void xenvif_rx_next_chunk(struct xenvif_queue *queue, 337 struct xenvif_pkt_state *pkt, 338 unsigned int offset, void **data, 339 size_t *len) 340 { 341 struct sk_buff *frag_iter = pkt->frag_iter; 342 void *frag_data; 343 size_t frag_len, chunk_len; 344 345 BUG_ON(!frag_iter); 346 347 if (pkt->frag == -1) { 348 frag_data = frag_iter->data; 349 frag_len = skb_headlen(frag_iter); 350 } else { 351 skb_frag_t *frag = &skb_shinfo(frag_iter)->frags[pkt->frag]; 352 353 frag_data = skb_frag_address(frag); 354 frag_len = skb_frag_size(frag); 355 } 356 357 frag_data += pkt->frag_offset; 358 frag_len -= pkt->frag_offset; 359 360 chunk_len = min_t(size_t, frag_len, XEN_PAGE_SIZE - offset); 361 chunk_len = min_t(size_t, chunk_len, XEN_PAGE_SIZE - 362 xen_offset_in_page(frag_data)); 363 364 pkt->frag_offset += chunk_len; 365 366 /* Advance to next frag? */ 367 if (frag_len == chunk_len) 368 xenvif_rx_next_frag(pkt); 369 370 *data = frag_data; 371 *len = chunk_len; 372 } 373 374 static void xenvif_rx_data_slot(struct xenvif_queue *queue, 375 struct xenvif_pkt_state *pkt, 376 struct xen_netif_rx_request *req, 377 struct xen_netif_rx_response *rsp) 378 { 379 unsigned int offset = queue->vif->xdp_headroom; 380 unsigned int flags; 381 382 do { 383 size_t len; 384 void *data; 385 386 xenvif_rx_next_chunk(queue, pkt, offset, &data, &len); 387 xenvif_rx_copy_add(queue, req, offset, data, len); 388 389 offset += len; 390 pkt->remaining_len -= len; 391 392 } while (offset < XEN_PAGE_SIZE && pkt->remaining_len > 0); 393 394 if (pkt->remaining_len > 0) 395 flags = XEN_NETRXF_more_data; 396 else 397 flags = 0; 398 399 if (pkt->slot == 0) { 400 struct sk_buff *skb = pkt->skb; 401 402 if (skb->ip_summed == CHECKSUM_PARTIAL) 403 flags |= XEN_NETRXF_csum_blank | 404 XEN_NETRXF_data_validated; 405 else if (skb->ip_summed == CHECKSUM_UNNECESSARY) 406 flags |= XEN_NETRXF_data_validated; 407 408 if (pkt->extra_count != 0) 409 flags |= XEN_NETRXF_extra_info; 410 } 411 412 rsp->offset = 0; 413 rsp->flags = flags; 414 rsp->id = req->id; 415 rsp->status = (s16)offset; 416 } 417 418 static void xenvif_rx_extra_slot(struct xenvif_queue *queue, 419 struct xenvif_pkt_state *pkt, 420 struct xen_netif_rx_request *req, 421 struct xen_netif_rx_response *rsp) 422 { 423 struct xen_netif_extra_info *extra = (void *)rsp; 424 unsigned int i; 425 426 pkt->extra_count--; 427 428 for (i = 0; i < ARRAY_SIZE(pkt->extras); i++) { 429 if (pkt->extras[i].type) { 430 *extra = pkt->extras[i]; 431 432 if (pkt->extra_count != 0) 433 extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE; 434 435 pkt->extras[i].type = 0; 436 return; 437 } 438 } 439 BUG(); 440 } 441 442 static void xenvif_rx_skb(struct xenvif_queue *queue) 443 { 444 struct xenvif_pkt_state pkt; 445 446 xenvif_rx_next_skb(queue, &pkt); 447 448 queue->last_rx_time = jiffies; 449 450 do { 451 struct xen_netif_rx_request *req; 452 struct xen_netif_rx_response *rsp; 453 454 req = RING_GET_REQUEST(&queue->rx, queue->rx.req_cons); 455 rsp = RING_GET_RESPONSE(&queue->rx, queue->rx.req_cons); 456 457 /* Extras must go after the first data slot */ 458 if (pkt.slot != 0 && pkt.extra_count != 0) 459 xenvif_rx_extra_slot(queue, &pkt, req, rsp); 460 else 461 xenvif_rx_data_slot(queue, &pkt, req, rsp); 462 463 queue->rx.req_cons++; 464 pkt.slot++; 465 } while (pkt.remaining_len > 0 || pkt.extra_count != 0); 466 467 xenvif_rx_complete(queue, &pkt); 468 } 469 470 #define RX_BATCH_SIZE 64 471 472 void xenvif_rx_action(struct xenvif_queue *queue) 473 { 474 struct sk_buff_head completed_skbs; 475 unsigned int work_done = 0; 476 477 __skb_queue_head_init(&completed_skbs); 478 queue->rx_copy.completed = &completed_skbs; 479 480 while (xenvif_rx_ring_slots_available(queue) && 481 work_done < RX_BATCH_SIZE) { 482 xenvif_rx_skb(queue); 483 work_done++; 484 } 485 486 /* Flush any pending copies and complete all skbs. */ 487 xenvif_rx_copy_flush(queue); 488 } 489 490 static bool xenvif_rx_queue_stalled(struct xenvif_queue *queue) 491 { 492 RING_IDX prod, cons; 493 494 prod = queue->rx.sring->req_prod; 495 cons = queue->rx.req_cons; 496 497 return !queue->stalled && 498 prod - cons < 1 && 499 time_after(jiffies, 500 queue->last_rx_time + queue->vif->stall_timeout); 501 } 502 503 static bool xenvif_rx_queue_ready(struct xenvif_queue *queue) 504 { 505 RING_IDX prod, cons; 506 507 prod = queue->rx.sring->req_prod; 508 cons = queue->rx.req_cons; 509 510 return queue->stalled && prod - cons >= 1; 511 } 512 513 bool xenvif_have_rx_work(struct xenvif_queue *queue, bool test_kthread) 514 { 515 return xenvif_rx_ring_slots_available(queue) || 516 (queue->vif->stall_timeout && 517 (xenvif_rx_queue_stalled(queue) || 518 xenvif_rx_queue_ready(queue))) || 519 (test_kthread && kthread_should_stop()) || 520 queue->vif->disabled; 521 } 522 523 static long xenvif_rx_queue_timeout(struct xenvif_queue *queue) 524 { 525 struct sk_buff *skb; 526 long timeout; 527 528 skb = skb_peek(&queue->rx_queue); 529 if (!skb) 530 return MAX_SCHEDULE_TIMEOUT; 531 532 timeout = XENVIF_RX_CB(skb)->expires - jiffies; 533 return timeout < 0 ? 0 : timeout; 534 } 535 536 /* Wait until the guest Rx thread has work. 537 * 538 * The timeout needs to be adjusted based on the current head of the 539 * queue (and not just the head at the beginning). In particular, if 540 * the queue is initially empty an infinite timeout is used and this 541 * needs to be reduced when a skb is queued. 542 * 543 * This cannot be done with wait_event_timeout() because it only 544 * calculates the timeout once. 545 */ 546 static void xenvif_wait_for_rx_work(struct xenvif_queue *queue) 547 { 548 DEFINE_WAIT(wait); 549 550 if (xenvif_have_rx_work(queue, true)) 551 return; 552 553 for (;;) { 554 long ret; 555 556 prepare_to_wait(&queue->wq, &wait, TASK_INTERRUPTIBLE); 557 if (xenvif_have_rx_work(queue, true)) 558 break; 559 if (atomic_fetch_andnot(NETBK_RX_EOI | NETBK_COMMON_EOI, 560 &queue->eoi_pending) & 561 (NETBK_RX_EOI | NETBK_COMMON_EOI)) 562 xen_irq_lateeoi(queue->rx_irq, 0); 563 564 ret = schedule_timeout(xenvif_rx_queue_timeout(queue)); 565 if (!ret) 566 break; 567 } 568 finish_wait(&queue->wq, &wait); 569 } 570 571 static void xenvif_queue_carrier_off(struct xenvif_queue *queue) 572 { 573 struct xenvif *vif = queue->vif; 574 575 queue->stalled = true; 576 577 /* At least one queue has stalled? Disable the carrier. */ 578 spin_lock(&vif->lock); 579 if (vif->stalled_queues++ == 0) { 580 netdev_info(vif->dev, "Guest Rx stalled"); 581 netif_carrier_off(vif->dev); 582 } 583 spin_unlock(&vif->lock); 584 } 585 586 static void xenvif_queue_carrier_on(struct xenvif_queue *queue) 587 { 588 struct xenvif *vif = queue->vif; 589 590 queue->last_rx_time = jiffies; /* Reset Rx stall detection. */ 591 queue->stalled = false; 592 593 /* All queues are ready? Enable the carrier. */ 594 spin_lock(&vif->lock); 595 if (--vif->stalled_queues == 0) { 596 netdev_info(vif->dev, "Guest Rx ready"); 597 netif_carrier_on(vif->dev); 598 } 599 spin_unlock(&vif->lock); 600 } 601 602 int xenvif_kthread_guest_rx(void *data) 603 { 604 struct xenvif_queue *queue = data; 605 struct xenvif *vif = queue->vif; 606 607 if (!vif->stall_timeout) 608 xenvif_queue_carrier_on(queue); 609 610 for (;;) { 611 xenvif_wait_for_rx_work(queue); 612 613 if (kthread_should_stop()) 614 break; 615 616 /* This frontend is found to be rogue, disable it in 617 * kthread context. Currently this is only set when 618 * netback finds out frontend sends malformed packet, 619 * but we cannot disable the interface in softirq 620 * context so we defer it here, if this thread is 621 * associated with queue 0. 622 */ 623 if (unlikely(vif->disabled && queue->id == 0)) { 624 xenvif_carrier_off(vif); 625 break; 626 } 627 628 if (!skb_queue_empty(&queue->rx_queue)) 629 xenvif_rx_action(queue); 630 631 /* If the guest hasn't provided any Rx slots for a 632 * while it's probably not responsive, drop the 633 * carrier so packets are dropped earlier. 634 */ 635 if (vif->stall_timeout) { 636 if (xenvif_rx_queue_stalled(queue)) 637 xenvif_queue_carrier_off(queue); 638 else if (xenvif_rx_queue_ready(queue)) 639 xenvif_queue_carrier_on(queue); 640 } 641 642 /* Queued packets may have foreign pages from other 643 * domains. These cannot be queued indefinitely as 644 * this would starve guests of grant refs and transmit 645 * slots. 646 */ 647 xenvif_rx_queue_drop_expired(queue); 648 649 cond_resched(); 650 } 651 652 /* Bin any remaining skbs */ 653 xenvif_rx_queue_purge(queue); 654 655 return 0; 656 } 657