1 /* 2 * Copyright (c) 2016 Citrix Systems Inc. 3 * Copyright (c) 2002-2005, K A Fraser 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License version 2 7 * as published by the Free Software Foundation; or, when distributed 8 * separately from the Linux kernel or incorporated into other 9 * software packages, subject to the following license: 10 * 11 * Permission is hereby granted, free of charge, to any person obtaining a copy 12 * of this source file (the "Software"), to deal in the Software without 13 * restriction, including without limitation the rights to use, copy, modify, 14 * merge, publish, distribute, sublicense, and/or sell copies of the Software, 15 * and to permit persons to whom the Software is furnished to do so, subject to 16 * the following conditions: 17 * 18 * The above copyright notice and this permission notice shall be included in 19 * all copies or substantial portions of the Software. 20 * 21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 22 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 23 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 24 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 25 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 26 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 27 * IN THE SOFTWARE. 28 */ 29 #include "common.h" 30 31 #include <linux/kthread.h> 32 33 #include <xen/xen.h> 34 #include <xen/events.h> 35 36 static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue) 37 { 38 RING_IDX prod, cons; 39 struct sk_buff *skb; 40 int needed; 41 42 skb = skb_peek(&queue->rx_queue); 43 if (!skb) 44 return false; 45 46 needed = DIV_ROUND_UP(skb->len, XEN_PAGE_SIZE); 47 if (skb_is_gso(skb)) 48 needed++; 49 if (skb->sw_hash) 50 needed++; 51 52 do { 53 prod = queue->rx.sring->req_prod; 54 cons = queue->rx.req_cons; 55 56 if (prod - cons >= needed) 57 return true; 58 59 queue->rx.sring->req_event = prod + 1; 60 61 /* Make sure event is visible before we check prod 62 * again. 63 */ 64 mb(); 65 } while (queue->rx.sring->req_prod != prod); 66 67 return false; 68 } 69 70 void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb) 71 { 72 unsigned long flags; 73 74 spin_lock_irqsave(&queue->rx_queue.lock, flags); 75 76 __skb_queue_tail(&queue->rx_queue, skb); 77 78 queue->rx_queue_len += skb->len; 79 if (queue->rx_queue_len > queue->rx_queue_max) { 80 struct net_device *dev = queue->vif->dev; 81 82 netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id)); 83 } 84 85 spin_unlock_irqrestore(&queue->rx_queue.lock, flags); 86 } 87 88 static struct sk_buff *xenvif_rx_dequeue(struct xenvif_queue *queue) 89 { 90 struct sk_buff *skb; 91 92 spin_lock_irq(&queue->rx_queue.lock); 93 94 skb = __skb_dequeue(&queue->rx_queue); 95 if (skb) { 96 queue->rx_queue_len -= skb->len; 97 if (queue->rx_queue_len < queue->rx_queue_max) { 98 struct netdev_queue *txq; 99 100 txq = netdev_get_tx_queue(queue->vif->dev, queue->id); 101 netif_tx_wake_queue(txq); 102 } 103 } 104 105 spin_unlock_irq(&queue->rx_queue.lock); 106 107 return skb; 108 } 109 110 static void xenvif_rx_queue_purge(struct xenvif_queue *queue) 111 { 112 struct sk_buff *skb; 113 114 while ((skb = xenvif_rx_dequeue(queue)) != NULL) 115 kfree_skb(skb); 116 } 117 118 static void xenvif_rx_queue_drop_expired(struct xenvif_queue *queue) 119 { 120 struct sk_buff *skb; 121 122 for (;;) { 123 skb = skb_peek(&queue->rx_queue); 124 if (!skb) 125 break; 126 if (time_before(jiffies, XENVIF_RX_CB(skb)->expires)) 127 break; 128 xenvif_rx_dequeue(queue); 129 kfree_skb(skb); 130 } 131 } 132 133 static void xenvif_rx_copy_flush(struct xenvif_queue *queue) 134 { 135 unsigned int i; 136 int notify; 137 138 gnttab_batch_copy(queue->rx_copy.op, queue->rx_copy.num); 139 140 for (i = 0; i < queue->rx_copy.num; i++) { 141 struct gnttab_copy *op; 142 143 op = &queue->rx_copy.op[i]; 144 145 /* If the copy failed, overwrite the status field in 146 * the corresponding response. 147 */ 148 if (unlikely(op->status != GNTST_okay)) { 149 struct xen_netif_rx_response *rsp; 150 151 rsp = RING_GET_RESPONSE(&queue->rx, 152 queue->rx_copy.idx[i]); 153 rsp->status = op->status; 154 } 155 } 156 157 queue->rx_copy.num = 0; 158 159 /* Push responses for all completed packets. */ 160 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->rx, notify); 161 if (notify) 162 notify_remote_via_irq(queue->rx_irq); 163 164 __skb_queue_purge(queue->rx_copy.completed); 165 } 166 167 static void xenvif_rx_copy_add(struct xenvif_queue *queue, 168 struct xen_netif_rx_request *req, 169 unsigned int offset, void *data, size_t len) 170 { 171 struct gnttab_copy *op; 172 struct page *page; 173 struct xen_page_foreign *foreign; 174 175 if (queue->rx_copy.num == COPY_BATCH_SIZE) 176 xenvif_rx_copy_flush(queue); 177 178 op = &queue->rx_copy.op[queue->rx_copy.num]; 179 180 page = virt_to_page(data); 181 182 op->flags = GNTCOPY_dest_gref; 183 184 foreign = xen_page_foreign(page); 185 if (foreign) { 186 op->source.domid = foreign->domid; 187 op->source.u.ref = foreign->gref; 188 op->flags |= GNTCOPY_source_gref; 189 } else { 190 op->source.u.gmfn = virt_to_gfn(data); 191 op->source.domid = DOMID_SELF; 192 } 193 194 op->source.offset = xen_offset_in_page(data); 195 op->dest.u.ref = req->gref; 196 op->dest.domid = queue->vif->domid; 197 op->dest.offset = offset; 198 op->len = len; 199 200 queue->rx_copy.idx[queue->rx_copy.num] = queue->rx.req_cons; 201 queue->rx_copy.num++; 202 } 203 204 static unsigned int xenvif_gso_type(struct sk_buff *skb) 205 { 206 if (skb_is_gso(skb)) { 207 if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) 208 return XEN_NETIF_GSO_TYPE_TCPV4; 209 else 210 return XEN_NETIF_GSO_TYPE_TCPV6; 211 } 212 return XEN_NETIF_GSO_TYPE_NONE; 213 } 214 215 struct xenvif_pkt_state { 216 struct sk_buff *skb; 217 size_t remaining_len; 218 struct sk_buff *frag_iter; 219 int frag; /* frag == -1 => frag_iter->head */ 220 unsigned int frag_offset; 221 struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1]; 222 unsigned int extra_count; 223 unsigned int slot; 224 }; 225 226 static void xenvif_rx_next_skb(struct xenvif_queue *queue, 227 struct xenvif_pkt_state *pkt) 228 { 229 struct sk_buff *skb; 230 unsigned int gso_type; 231 232 skb = xenvif_rx_dequeue(queue); 233 234 queue->stats.tx_bytes += skb->len; 235 queue->stats.tx_packets++; 236 237 /* Reset packet state. */ 238 memset(pkt, 0, sizeof(struct xenvif_pkt_state)); 239 240 pkt->skb = skb; 241 pkt->frag_iter = skb; 242 pkt->remaining_len = skb->len; 243 pkt->frag = -1; 244 245 gso_type = xenvif_gso_type(skb); 246 if ((1 << gso_type) & queue->vif->gso_mask) { 247 struct xen_netif_extra_info *extra; 248 249 extra = &pkt->extras[XEN_NETIF_EXTRA_TYPE_GSO - 1]; 250 251 extra->u.gso.type = gso_type; 252 extra->u.gso.size = skb_shinfo(skb)->gso_size; 253 extra->u.gso.pad = 0; 254 extra->u.gso.features = 0; 255 extra->type = XEN_NETIF_EXTRA_TYPE_GSO; 256 extra->flags = 0; 257 258 pkt->extra_count++; 259 } 260 261 if (skb->sw_hash) { 262 struct xen_netif_extra_info *extra; 263 264 extra = &pkt->extras[XEN_NETIF_EXTRA_TYPE_HASH - 1]; 265 266 extra->u.hash.algorithm = 267 XEN_NETIF_CTRL_HASH_ALGORITHM_TOEPLITZ; 268 269 if (skb->l4_hash) 270 extra->u.hash.type = 271 skb->protocol == htons(ETH_P_IP) ? 272 _XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP : 273 _XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP; 274 else 275 extra->u.hash.type = 276 skb->protocol == htons(ETH_P_IP) ? 277 _XEN_NETIF_CTRL_HASH_TYPE_IPV4 : 278 _XEN_NETIF_CTRL_HASH_TYPE_IPV6; 279 280 *(uint32_t *)extra->u.hash.value = skb_get_hash_raw(skb); 281 282 extra->type = XEN_NETIF_EXTRA_TYPE_HASH; 283 extra->flags = 0; 284 285 pkt->extra_count++; 286 } 287 } 288 289 static void xenvif_rx_complete(struct xenvif_queue *queue, 290 struct xenvif_pkt_state *pkt) 291 { 292 /* All responses are ready to be pushed. */ 293 queue->rx.rsp_prod_pvt = queue->rx.req_cons; 294 295 __skb_queue_tail(queue->rx_copy.completed, pkt->skb); 296 } 297 298 static void xenvif_rx_next_frag(struct xenvif_pkt_state *pkt) 299 { 300 struct sk_buff *frag_iter = pkt->frag_iter; 301 unsigned int nr_frags = skb_shinfo(frag_iter)->nr_frags; 302 303 pkt->frag++; 304 pkt->frag_offset = 0; 305 306 if (pkt->frag >= nr_frags) { 307 if (frag_iter == pkt->skb) 308 pkt->frag_iter = skb_shinfo(frag_iter)->frag_list; 309 else 310 pkt->frag_iter = frag_iter->next; 311 312 pkt->frag = -1; 313 } 314 } 315 316 static void xenvif_rx_next_chunk(struct xenvif_queue *queue, 317 struct xenvif_pkt_state *pkt, 318 unsigned int offset, void **data, 319 size_t *len) 320 { 321 struct sk_buff *frag_iter = pkt->frag_iter; 322 void *frag_data; 323 size_t frag_len, chunk_len; 324 325 BUG_ON(!frag_iter); 326 327 if (pkt->frag == -1) { 328 frag_data = frag_iter->data; 329 frag_len = skb_headlen(frag_iter); 330 } else { 331 skb_frag_t *frag = &skb_shinfo(frag_iter)->frags[pkt->frag]; 332 333 frag_data = skb_frag_address(frag); 334 frag_len = skb_frag_size(frag); 335 } 336 337 frag_data += pkt->frag_offset; 338 frag_len -= pkt->frag_offset; 339 340 chunk_len = min_t(size_t, frag_len, XEN_PAGE_SIZE - offset); 341 chunk_len = min_t(size_t, chunk_len, XEN_PAGE_SIZE - 342 xen_offset_in_page(frag_data)); 343 344 pkt->frag_offset += chunk_len; 345 346 /* Advance to next frag? */ 347 if (frag_len == chunk_len) 348 xenvif_rx_next_frag(pkt); 349 350 *data = frag_data; 351 *len = chunk_len; 352 } 353 354 static void xenvif_rx_data_slot(struct xenvif_queue *queue, 355 struct xenvif_pkt_state *pkt, 356 struct xen_netif_rx_request *req, 357 struct xen_netif_rx_response *rsp) 358 { 359 unsigned int offset = 0; 360 unsigned int flags; 361 362 do { 363 size_t len; 364 void *data; 365 366 xenvif_rx_next_chunk(queue, pkt, offset, &data, &len); 367 xenvif_rx_copy_add(queue, req, offset, data, len); 368 369 offset += len; 370 pkt->remaining_len -= len; 371 372 } while (offset < XEN_PAGE_SIZE && pkt->remaining_len > 0); 373 374 if (pkt->remaining_len > 0) 375 flags = XEN_NETRXF_more_data; 376 else 377 flags = 0; 378 379 if (pkt->slot == 0) { 380 struct sk_buff *skb = pkt->skb; 381 382 if (skb->ip_summed == CHECKSUM_PARTIAL) 383 flags |= XEN_NETRXF_csum_blank | 384 XEN_NETRXF_data_validated; 385 else if (skb->ip_summed == CHECKSUM_UNNECESSARY) 386 flags |= XEN_NETRXF_data_validated; 387 388 if (pkt->extra_count != 0) 389 flags |= XEN_NETRXF_extra_info; 390 } 391 392 rsp->offset = 0; 393 rsp->flags = flags; 394 rsp->id = req->id; 395 rsp->status = (s16)offset; 396 } 397 398 static void xenvif_rx_extra_slot(struct xenvif_queue *queue, 399 struct xenvif_pkt_state *pkt, 400 struct xen_netif_rx_request *req, 401 struct xen_netif_rx_response *rsp) 402 { 403 struct xen_netif_extra_info *extra = (void *)rsp; 404 unsigned int i; 405 406 pkt->extra_count--; 407 408 for (i = 0; i < ARRAY_SIZE(pkt->extras); i++) { 409 if (pkt->extras[i].type) { 410 *extra = pkt->extras[i]; 411 412 if (pkt->extra_count != 0) 413 extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE; 414 415 pkt->extras[i].type = 0; 416 return; 417 } 418 } 419 BUG(); 420 } 421 422 static void xenvif_rx_skb(struct xenvif_queue *queue) 423 { 424 struct xenvif_pkt_state pkt; 425 426 xenvif_rx_next_skb(queue, &pkt); 427 428 queue->last_rx_time = jiffies; 429 430 do { 431 struct xen_netif_rx_request *req; 432 struct xen_netif_rx_response *rsp; 433 434 req = RING_GET_REQUEST(&queue->rx, queue->rx.req_cons); 435 rsp = RING_GET_RESPONSE(&queue->rx, queue->rx.req_cons); 436 437 /* Extras must go after the first data slot */ 438 if (pkt.slot != 0 && pkt.extra_count != 0) 439 xenvif_rx_extra_slot(queue, &pkt, req, rsp); 440 else 441 xenvif_rx_data_slot(queue, &pkt, req, rsp); 442 443 queue->rx.req_cons++; 444 pkt.slot++; 445 } while (pkt.remaining_len > 0 || pkt.extra_count != 0); 446 447 xenvif_rx_complete(queue, &pkt); 448 } 449 450 #define RX_BATCH_SIZE 64 451 452 void xenvif_rx_action(struct xenvif_queue *queue) 453 { 454 struct sk_buff_head completed_skbs; 455 unsigned int work_done = 0; 456 457 __skb_queue_head_init(&completed_skbs); 458 queue->rx_copy.completed = &completed_skbs; 459 460 while (xenvif_rx_ring_slots_available(queue) && 461 work_done < RX_BATCH_SIZE) { 462 xenvif_rx_skb(queue); 463 work_done++; 464 } 465 466 /* Flush any pending copies and complete all skbs. */ 467 xenvif_rx_copy_flush(queue); 468 } 469 470 static bool xenvif_rx_queue_stalled(struct xenvif_queue *queue) 471 { 472 RING_IDX prod, cons; 473 474 prod = queue->rx.sring->req_prod; 475 cons = queue->rx.req_cons; 476 477 return !queue->stalled && 478 prod - cons < 1 && 479 time_after(jiffies, 480 queue->last_rx_time + queue->vif->stall_timeout); 481 } 482 483 static bool xenvif_rx_queue_ready(struct xenvif_queue *queue) 484 { 485 RING_IDX prod, cons; 486 487 prod = queue->rx.sring->req_prod; 488 cons = queue->rx.req_cons; 489 490 return queue->stalled && prod - cons >= 1; 491 } 492 493 static bool xenvif_have_rx_work(struct xenvif_queue *queue) 494 { 495 return xenvif_rx_ring_slots_available(queue) || 496 (queue->vif->stall_timeout && 497 (xenvif_rx_queue_stalled(queue) || 498 xenvif_rx_queue_ready(queue))) || 499 kthread_should_stop() || 500 queue->vif->disabled; 501 } 502 503 static long xenvif_rx_queue_timeout(struct xenvif_queue *queue) 504 { 505 struct sk_buff *skb; 506 long timeout; 507 508 skb = skb_peek(&queue->rx_queue); 509 if (!skb) 510 return MAX_SCHEDULE_TIMEOUT; 511 512 timeout = XENVIF_RX_CB(skb)->expires - jiffies; 513 return timeout < 0 ? 0 : timeout; 514 } 515 516 /* Wait until the guest Rx thread has work. 517 * 518 * The timeout needs to be adjusted based on the current head of the 519 * queue (and not just the head at the beginning). In particular, if 520 * the queue is initially empty an infinite timeout is used and this 521 * needs to be reduced when a skb is queued. 522 * 523 * This cannot be done with wait_event_timeout() because it only 524 * calculates the timeout once. 525 */ 526 static void xenvif_wait_for_rx_work(struct xenvif_queue *queue) 527 { 528 DEFINE_WAIT(wait); 529 530 if (xenvif_have_rx_work(queue)) 531 return; 532 533 for (;;) { 534 long ret; 535 536 prepare_to_wait(&queue->wq, &wait, TASK_INTERRUPTIBLE); 537 if (xenvif_have_rx_work(queue)) 538 break; 539 ret = schedule_timeout(xenvif_rx_queue_timeout(queue)); 540 if (!ret) 541 break; 542 } 543 finish_wait(&queue->wq, &wait); 544 } 545 546 static void xenvif_queue_carrier_off(struct xenvif_queue *queue) 547 { 548 struct xenvif *vif = queue->vif; 549 550 queue->stalled = true; 551 552 /* At least one queue has stalled? Disable the carrier. */ 553 spin_lock(&vif->lock); 554 if (vif->stalled_queues++ == 0) { 555 netdev_info(vif->dev, "Guest Rx stalled"); 556 netif_carrier_off(vif->dev); 557 } 558 spin_unlock(&vif->lock); 559 } 560 561 static void xenvif_queue_carrier_on(struct xenvif_queue *queue) 562 { 563 struct xenvif *vif = queue->vif; 564 565 queue->last_rx_time = jiffies; /* Reset Rx stall detection. */ 566 queue->stalled = false; 567 568 /* All queues are ready? Enable the carrier. */ 569 spin_lock(&vif->lock); 570 if (--vif->stalled_queues == 0) { 571 netdev_info(vif->dev, "Guest Rx ready"); 572 netif_carrier_on(vif->dev); 573 } 574 spin_unlock(&vif->lock); 575 } 576 577 int xenvif_kthread_guest_rx(void *data) 578 { 579 struct xenvif_queue *queue = data; 580 struct xenvif *vif = queue->vif; 581 582 if (!vif->stall_timeout) 583 xenvif_queue_carrier_on(queue); 584 585 for (;;) { 586 xenvif_wait_for_rx_work(queue); 587 588 if (kthread_should_stop()) 589 break; 590 591 /* This frontend is found to be rogue, disable it in 592 * kthread context. Currently this is only set when 593 * netback finds out frontend sends malformed packet, 594 * but we cannot disable the interface in softirq 595 * context so we defer it here, if this thread is 596 * associated with queue 0. 597 */ 598 if (unlikely(vif->disabled && queue->id == 0)) { 599 xenvif_carrier_off(vif); 600 break; 601 } 602 603 if (!skb_queue_empty(&queue->rx_queue)) 604 xenvif_rx_action(queue); 605 606 /* If the guest hasn't provided any Rx slots for a 607 * while it's probably not responsive, drop the 608 * carrier so packets are dropped earlier. 609 */ 610 if (vif->stall_timeout) { 611 if (xenvif_rx_queue_stalled(queue)) 612 xenvif_queue_carrier_off(queue); 613 else if (xenvif_rx_queue_ready(queue)) 614 xenvif_queue_carrier_on(queue); 615 } 616 617 /* Queued packets may have foreign pages from other 618 * domains. These cannot be queued indefinitely as 619 * this would starve guests of grant refs and transmit 620 * slots. 621 */ 622 xenvif_rx_queue_drop_expired(queue); 623 624 cond_resched(); 625 } 626 627 /* Bin any remaining skbs */ 628 xenvif_rx_queue_purge(queue); 629 630 return 0; 631 } 632