1 // SPDX-License-Identifier: (GPL-2.0 OR MIT) 2 /* Google virtual Ethernet (gve) driver 3 * 4 * Copyright (C) 2015-2019 Google, Inc. 5 */ 6 7 #include "gve.h" 8 #include "gve_adminq.h" 9 #include <linux/etherdevice.h> 10 11 static void gve_rx_remove_from_block(struct gve_priv *priv, int queue_idx) 12 { 13 struct gve_notify_block *block = 14 &priv->ntfy_blocks[gve_rx_idx_to_ntfy(priv, queue_idx)]; 15 16 block->rx = NULL; 17 } 18 19 static void gve_rx_free_ring(struct gve_priv *priv, int idx) 20 { 21 struct gve_rx_ring *rx = &priv->rx[idx]; 22 struct device *dev = &priv->pdev->dev; 23 size_t bytes; 24 u32 slots; 25 26 gve_rx_remove_from_block(priv, idx); 27 28 bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt; 29 dma_free_coherent(dev, bytes, rx->desc.desc_ring, rx->desc.bus); 30 rx->desc.desc_ring = NULL; 31 32 dma_free_coherent(dev, sizeof(*rx->q_resources), 33 rx->q_resources, rx->q_resources_bus); 34 rx->q_resources = NULL; 35 36 gve_unassign_qpl(priv, rx->data.qpl->id); 37 rx->data.qpl = NULL; 38 kvfree(rx->data.page_info); 39 40 slots = rx->mask + 1; 41 bytes = sizeof(*rx->data.data_ring) * slots; 42 dma_free_coherent(dev, bytes, rx->data.data_ring, 43 rx->data.data_bus); 44 rx->data.data_ring = NULL; 45 netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx); 46 } 47 48 static void gve_setup_rx_buffer(struct gve_rx_slot_page_info *page_info, 49 struct gve_rx_data_slot *slot, 50 dma_addr_t addr, struct page *page) 51 { 52 page_info->page = page; 53 page_info->page_offset = 0; 54 page_info->page_address = page_address(page); 55 slot->qpl_offset = cpu_to_be64(addr); 56 } 57 58 static int gve_prefill_rx_pages(struct gve_rx_ring *rx) 59 { 60 struct gve_priv *priv = rx->gve; 61 u32 slots; 62 int i; 63 64 /* Allocate one page per Rx queue slot. Each page is split into two 65 * packet buffers, when possible we "page flip" between the two. 66 */ 67 slots = rx->mask + 1; 68 69 rx->data.page_info = kvzalloc(slots * 70 sizeof(*rx->data.page_info), GFP_KERNEL); 71 if (!rx->data.page_info) 72 return -ENOMEM; 73 74 rx->data.qpl = gve_assign_rx_qpl(priv); 75 76 for (i = 0; i < slots; i++) { 77 struct page *page = rx->data.qpl->pages[i]; 78 dma_addr_t addr = i * PAGE_SIZE; 79 80 gve_setup_rx_buffer(&rx->data.page_info[i], 81 &rx->data.data_ring[i], addr, page); 82 } 83 84 return slots; 85 } 86 87 static void gve_rx_add_to_block(struct gve_priv *priv, int queue_idx) 88 { 89 u32 ntfy_idx = gve_rx_idx_to_ntfy(priv, queue_idx); 90 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 91 struct gve_rx_ring *rx = &priv->rx[queue_idx]; 92 93 block->rx = rx; 94 rx->ntfy_id = ntfy_idx; 95 } 96 97 static int gve_rx_alloc_ring(struct gve_priv *priv, int idx) 98 { 99 struct gve_rx_ring *rx = &priv->rx[idx]; 100 struct device *hdev = &priv->pdev->dev; 101 u32 slots, npages; 102 int filled_pages; 103 size_t bytes; 104 int err; 105 106 netif_dbg(priv, drv, priv->dev, "allocating rx ring\n"); 107 /* Make sure everything is zeroed to start with */ 108 memset(rx, 0, sizeof(*rx)); 109 110 rx->gve = priv; 111 rx->q_num = idx; 112 113 slots = priv->rx_pages_per_qpl; 114 rx->mask = slots - 1; 115 116 /* alloc rx data ring */ 117 bytes = sizeof(*rx->data.data_ring) * slots; 118 rx->data.data_ring = dma_alloc_coherent(hdev, bytes, 119 &rx->data.data_bus, 120 GFP_KERNEL); 121 if (!rx->data.data_ring) 122 return -ENOMEM; 123 filled_pages = gve_prefill_rx_pages(rx); 124 if (filled_pages < 0) { 125 err = -ENOMEM; 126 goto abort_with_slots; 127 } 128 rx->fill_cnt = filled_pages; 129 /* Ensure data ring slots (packet buffers) are visible. */ 130 dma_wmb(); 131 132 /* Alloc gve_queue_resources */ 133 rx->q_resources = 134 dma_alloc_coherent(hdev, 135 sizeof(*rx->q_resources), 136 &rx->q_resources_bus, 137 GFP_KERNEL); 138 if (!rx->q_resources) { 139 err = -ENOMEM; 140 goto abort_filled; 141 } 142 netif_dbg(priv, drv, priv->dev, "rx[%d]->data.data_bus=%lx\n", idx, 143 (unsigned long)rx->data.data_bus); 144 145 /* alloc rx desc ring */ 146 bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt; 147 npages = bytes / PAGE_SIZE; 148 if (npages * PAGE_SIZE != bytes) { 149 err = -EIO; 150 goto abort_with_q_resources; 151 } 152 153 rx->desc.desc_ring = dma_alloc_coherent(hdev, bytes, &rx->desc.bus, 154 GFP_KERNEL); 155 if (!rx->desc.desc_ring) { 156 err = -ENOMEM; 157 goto abort_with_q_resources; 158 } 159 rx->mask = slots - 1; 160 rx->cnt = 0; 161 rx->desc.seqno = 1; 162 gve_rx_add_to_block(priv, idx); 163 164 return 0; 165 166 abort_with_q_resources: 167 dma_free_coherent(hdev, sizeof(*rx->q_resources), 168 rx->q_resources, rx->q_resources_bus); 169 rx->q_resources = NULL; 170 abort_filled: 171 kvfree(rx->data.page_info); 172 abort_with_slots: 173 bytes = sizeof(*rx->data.data_ring) * slots; 174 dma_free_coherent(hdev, bytes, rx->data.data_ring, rx->data.data_bus); 175 rx->data.data_ring = NULL; 176 177 return err; 178 } 179 180 int gve_rx_alloc_rings(struct gve_priv *priv) 181 { 182 int err = 0; 183 int i; 184 185 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 186 err = gve_rx_alloc_ring(priv, i); 187 if (err) { 188 netif_err(priv, drv, priv->dev, 189 "Failed to alloc rx ring=%d: err=%d\n", 190 i, err); 191 break; 192 } 193 } 194 /* Unallocate if there was an error */ 195 if (err) { 196 int j; 197 198 for (j = 0; j < i; j++) 199 gve_rx_free_ring(priv, j); 200 } 201 return err; 202 } 203 204 void gve_rx_free_rings(struct gve_priv *priv) 205 { 206 int i; 207 208 for (i = 0; i < priv->rx_cfg.num_queues; i++) 209 gve_rx_free_ring(priv, i); 210 } 211 212 void gve_rx_write_doorbell(struct gve_priv *priv, struct gve_rx_ring *rx) 213 { 214 u32 db_idx = be32_to_cpu(rx->q_resources->db_index); 215 216 iowrite32be(rx->fill_cnt, &priv->db_bar2[db_idx]); 217 } 218 219 static enum pkt_hash_types gve_rss_type(__be16 pkt_flags) 220 { 221 if (likely(pkt_flags & (GVE_RXF_TCP | GVE_RXF_UDP))) 222 return PKT_HASH_TYPE_L4; 223 if (pkt_flags & (GVE_RXF_IPV4 | GVE_RXF_IPV6)) 224 return PKT_HASH_TYPE_L3; 225 return PKT_HASH_TYPE_L2; 226 } 227 228 static struct sk_buff *gve_rx_copy(struct gve_rx_ring *rx, 229 struct net_device *dev, 230 struct napi_struct *napi, 231 struct gve_rx_slot_page_info *page_info, 232 u16 len) 233 { 234 struct sk_buff *skb = napi_alloc_skb(napi, len); 235 void *va = page_info->page_address + GVE_RX_PAD + 236 page_info->page_offset; 237 238 if (unlikely(!skb)) 239 return NULL; 240 241 __skb_put(skb, len); 242 243 skb_copy_to_linear_data(skb, va, len); 244 245 skb->protocol = eth_type_trans(skb, dev); 246 247 u64_stats_update_begin(&rx->statss); 248 rx->rx_copied_pkt++; 249 u64_stats_update_end(&rx->statss); 250 251 return skb; 252 } 253 254 static struct sk_buff *gve_rx_add_frags(struct net_device *dev, 255 struct napi_struct *napi, 256 struct gve_rx_slot_page_info *page_info, 257 u16 len) 258 { 259 struct sk_buff *skb = napi_get_frags(napi); 260 261 if (unlikely(!skb)) 262 return NULL; 263 264 skb_add_rx_frag(skb, 0, page_info->page, 265 page_info->page_offset + 266 GVE_RX_PAD, len, PAGE_SIZE / 2); 267 268 return skb; 269 } 270 271 static void gve_rx_flip_buff(struct gve_rx_slot_page_info *page_info, 272 struct gve_rx_data_slot *data_ring) 273 { 274 u64 addr = be64_to_cpu(data_ring->qpl_offset); 275 276 page_info->page_offset ^= PAGE_SIZE / 2; 277 addr ^= PAGE_SIZE / 2; 278 data_ring->qpl_offset = cpu_to_be64(addr); 279 } 280 281 static bool gve_rx(struct gve_rx_ring *rx, struct gve_rx_desc *rx_desc, 282 netdev_features_t feat, u32 idx) 283 { 284 struct gve_rx_slot_page_info *page_info; 285 struct gve_priv *priv = rx->gve; 286 struct napi_struct *napi = &priv->ntfy_blocks[rx->ntfy_id].napi; 287 struct net_device *dev = priv->dev; 288 struct sk_buff *skb; 289 int pagecount; 290 u16 len; 291 292 /* drop this packet */ 293 if (unlikely(rx_desc->flags_seq & GVE_RXF_ERR)) { 294 u64_stats_update_begin(&rx->statss); 295 rx->rx_desc_err_dropped_pkt++; 296 u64_stats_update_end(&rx->statss); 297 return true; 298 } 299 300 len = be16_to_cpu(rx_desc->len) - GVE_RX_PAD; 301 page_info = &rx->data.page_info[idx]; 302 dma_sync_single_for_cpu(&priv->pdev->dev, rx->data.qpl->page_buses[idx], 303 PAGE_SIZE, DMA_FROM_DEVICE); 304 305 /* gvnic can only receive into registered segments. If the buffer 306 * can't be recycled, our only choice is to copy the data out of 307 * it so that we can return it to the device. 308 */ 309 310 if (PAGE_SIZE == 4096) { 311 if (len <= priv->rx_copybreak) { 312 /* Just copy small packets */ 313 skb = gve_rx_copy(rx, dev, napi, page_info, len); 314 u64_stats_update_begin(&rx->statss); 315 rx->rx_copybreak_pkt++; 316 u64_stats_update_end(&rx->statss); 317 goto have_skb; 318 } 319 if (unlikely(!gve_can_recycle_pages(dev))) { 320 skb = gve_rx_copy(rx, dev, napi, page_info, len); 321 goto have_skb; 322 } 323 pagecount = page_count(page_info->page); 324 if (pagecount == 1) { 325 /* No part of this page is used by any SKBs; we attach 326 * the page fragment to a new SKB and pass it up the 327 * stack. 328 */ 329 skb = gve_rx_add_frags(dev, napi, page_info, len); 330 if (!skb) { 331 u64_stats_update_begin(&rx->statss); 332 rx->rx_skb_alloc_fail++; 333 u64_stats_update_end(&rx->statss); 334 return true; 335 } 336 /* Make sure the kernel stack can't release the page */ 337 get_page(page_info->page); 338 /* "flip" to other packet buffer on this page */ 339 gve_rx_flip_buff(page_info, &rx->data.data_ring[idx]); 340 } else if (pagecount >= 2) { 341 /* We have previously passed the other half of this 342 * page up the stack, but it has not yet been freed. 343 */ 344 skb = gve_rx_copy(rx, dev, napi, page_info, len); 345 } else { 346 WARN(pagecount < 1, "Pagecount should never be < 1"); 347 return false; 348 } 349 } else { 350 skb = gve_rx_copy(rx, dev, napi, page_info, len); 351 } 352 353 have_skb: 354 /* We didn't manage to allocate an skb but we haven't had any 355 * reset worthy failures. 356 */ 357 if (!skb) { 358 u64_stats_update_begin(&rx->statss); 359 rx->rx_skb_alloc_fail++; 360 u64_stats_update_end(&rx->statss); 361 return true; 362 } 363 364 if (likely(feat & NETIF_F_RXCSUM)) { 365 /* NIC passes up the partial sum */ 366 if (rx_desc->csum) 367 skb->ip_summed = CHECKSUM_COMPLETE; 368 else 369 skb->ip_summed = CHECKSUM_NONE; 370 skb->csum = csum_unfold(rx_desc->csum); 371 } 372 373 /* parse flags & pass relevant info up */ 374 if (likely(feat & NETIF_F_RXHASH) && 375 gve_needs_rss(rx_desc->flags_seq)) 376 skb_set_hash(skb, be32_to_cpu(rx_desc->rss_hash), 377 gve_rss_type(rx_desc->flags_seq)); 378 379 if (skb_is_nonlinear(skb)) 380 napi_gro_frags(napi); 381 else 382 napi_gro_receive(napi, skb); 383 return true; 384 } 385 386 static bool gve_rx_work_pending(struct gve_rx_ring *rx) 387 { 388 struct gve_rx_desc *desc; 389 __be16 flags_seq; 390 u32 next_idx; 391 392 next_idx = rx->cnt & rx->mask; 393 desc = rx->desc.desc_ring + next_idx; 394 395 flags_seq = desc->flags_seq; 396 /* Make sure we have synchronized the seq no with the device */ 397 smp_rmb(); 398 399 return (GVE_SEQNO(flags_seq) == rx->desc.seqno); 400 } 401 402 bool gve_clean_rx_done(struct gve_rx_ring *rx, int budget, 403 netdev_features_t feat) 404 { 405 struct gve_priv *priv = rx->gve; 406 struct gve_rx_desc *desc; 407 u32 cnt = rx->cnt; 408 u32 idx = cnt & rx->mask; 409 u32 work_done = 0; 410 u64 bytes = 0; 411 412 desc = rx->desc.desc_ring + idx; 413 while ((GVE_SEQNO(desc->flags_seq) == rx->desc.seqno) && 414 work_done < budget) { 415 netif_info(priv, rx_status, priv->dev, 416 "[%d] idx=%d desc=%p desc->flags_seq=0x%x\n", 417 rx->q_num, idx, desc, desc->flags_seq); 418 netif_info(priv, rx_status, priv->dev, 419 "[%d] seqno=%d rx->desc.seqno=%d\n", 420 rx->q_num, GVE_SEQNO(desc->flags_seq), 421 rx->desc.seqno); 422 bytes += be16_to_cpu(desc->len) - GVE_RX_PAD; 423 if (!gve_rx(rx, desc, feat, idx)) 424 gve_schedule_reset(priv); 425 cnt++; 426 idx = cnt & rx->mask; 427 desc = rx->desc.desc_ring + idx; 428 rx->desc.seqno = gve_next_seqno(rx->desc.seqno); 429 work_done++; 430 } 431 432 if (!work_done) 433 return false; 434 435 u64_stats_update_begin(&rx->statss); 436 rx->rpackets += work_done; 437 rx->rbytes += bytes; 438 u64_stats_update_end(&rx->statss); 439 rx->cnt = cnt; 440 rx->fill_cnt += work_done; 441 442 gve_rx_write_doorbell(priv, rx); 443 return gve_rx_work_pending(rx); 444 } 445 446 bool gve_rx_poll(struct gve_notify_block *block, int budget) 447 { 448 struct gve_rx_ring *rx = block->rx; 449 netdev_features_t feat; 450 bool repoll = false; 451 452 feat = block->napi.dev->features; 453 454 /* If budget is 0, do all the work */ 455 if (budget == 0) 456 budget = INT_MAX; 457 458 if (budget > 0) 459 repoll |= gve_clean_rx_done(rx, budget, feat); 460 else 461 repoll |= gve_rx_work_pending(rx); 462 return repoll; 463 } 464