1 // SPDX-License-Identifier: (GPL-2.0 OR MIT) 2 /* Google virtual Ethernet (gve) driver 3 * 4 * Copyright (C) 2015-2019 Google, Inc. 5 */ 6 7 #include "gve.h" 8 #include "gve_adminq.h" 9 #include <linux/etherdevice.h> 10 11 static void gve_rx_remove_from_block(struct gve_priv *priv, int queue_idx) 12 { 13 struct gve_notify_block *block = 14 &priv->ntfy_blocks[gve_rx_idx_to_ntfy(priv, queue_idx)]; 15 16 block->rx = NULL; 17 } 18 19 static void gve_rx_free_ring(struct gve_priv *priv, int idx) 20 { 21 struct gve_rx_ring *rx = &priv->rx[idx]; 22 struct device *dev = &priv->pdev->dev; 23 size_t bytes; 24 u32 slots; 25 26 gve_rx_remove_from_block(priv, idx); 27 28 bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt; 29 dma_free_coherent(dev, bytes, rx->desc.desc_ring, rx->desc.bus); 30 rx->desc.desc_ring = NULL; 31 32 dma_free_coherent(dev, sizeof(*rx->q_resources), 33 rx->q_resources, rx->q_resources_bus); 34 rx->q_resources = NULL; 35 36 gve_unassign_qpl(priv, rx->data.qpl->id); 37 rx->data.qpl = NULL; 38 kvfree(rx->data.page_info); 39 40 slots = rx->mask + 1; 41 bytes = sizeof(*rx->data.data_ring) * slots; 42 dma_free_coherent(dev, bytes, rx->data.data_ring, 43 rx->data.data_bus); 44 rx->data.data_ring = NULL; 45 netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx); 46 } 47 48 static void gve_setup_rx_buffer(struct gve_rx_slot_page_info *page_info, 49 struct gve_rx_data_slot *slot, 50 dma_addr_t addr, struct page *page) 51 { 52 page_info->page = page; 53 page_info->page_offset = 0; 54 page_info->page_address = page_address(page); 55 slot->qpl_offset = cpu_to_be64(addr); 56 } 57 58 static int gve_prefill_rx_pages(struct gve_rx_ring *rx) 59 { 60 struct gve_priv *priv = rx->gve; 61 u32 slots; 62 int i; 63 64 /* Allocate one page per Rx queue slot. Each page is split into two 65 * packet buffers, when possible we "page flip" between the two. 66 */ 67 slots = rx->mask + 1; 68 69 rx->data.page_info = kvzalloc(slots * 70 sizeof(*rx->data.page_info), GFP_KERNEL); 71 if (!rx->data.page_info) 72 return -ENOMEM; 73 74 rx->data.qpl = gve_assign_rx_qpl(priv); 75 76 for (i = 0; i < slots; i++) { 77 struct page *page = rx->data.qpl->pages[i]; 78 dma_addr_t addr = i * PAGE_SIZE; 79 80 gve_setup_rx_buffer(&rx->data.page_info[i], 81 &rx->data.data_ring[i], addr, page); 82 } 83 84 return slots; 85 } 86 87 static void gve_rx_add_to_block(struct gve_priv *priv, int queue_idx) 88 { 89 u32 ntfy_idx = gve_rx_idx_to_ntfy(priv, queue_idx); 90 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 91 struct gve_rx_ring *rx = &priv->rx[queue_idx]; 92 93 block->rx = rx; 94 rx->ntfy_id = ntfy_idx; 95 } 96 97 static int gve_rx_alloc_ring(struct gve_priv *priv, int idx) 98 { 99 struct gve_rx_ring *rx = &priv->rx[idx]; 100 struct device *hdev = &priv->pdev->dev; 101 u32 slots, npages; 102 int filled_pages; 103 size_t bytes; 104 int err; 105 106 netif_dbg(priv, drv, priv->dev, "allocating rx ring\n"); 107 /* Make sure everything is zeroed to start with */ 108 memset(rx, 0, sizeof(*rx)); 109 110 rx->gve = priv; 111 rx->q_num = idx; 112 113 slots = priv->rx_pages_per_qpl; 114 rx->mask = slots - 1; 115 116 /* alloc rx data ring */ 117 bytes = sizeof(*rx->data.data_ring) * slots; 118 rx->data.data_ring = dma_alloc_coherent(hdev, bytes, 119 &rx->data.data_bus, 120 GFP_KERNEL); 121 if (!rx->data.data_ring) 122 return -ENOMEM; 123 filled_pages = gve_prefill_rx_pages(rx); 124 if (filled_pages < 0) { 125 err = -ENOMEM; 126 goto abort_with_slots; 127 } 128 rx->fill_cnt = filled_pages; 129 /* Ensure data ring slots (packet buffers) are visible. */ 130 dma_wmb(); 131 132 /* Alloc gve_queue_resources */ 133 rx->q_resources = 134 dma_alloc_coherent(hdev, 135 sizeof(*rx->q_resources), 136 &rx->q_resources_bus, 137 GFP_KERNEL); 138 if (!rx->q_resources) { 139 err = -ENOMEM; 140 goto abort_filled; 141 } 142 netif_dbg(priv, drv, priv->dev, "rx[%d]->data.data_bus=%lx\n", idx, 143 (unsigned long)rx->data.data_bus); 144 145 /* alloc rx desc ring */ 146 bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt; 147 npages = bytes / PAGE_SIZE; 148 if (npages * PAGE_SIZE != bytes) { 149 err = -EIO; 150 goto abort_with_q_resources; 151 } 152 153 rx->desc.desc_ring = dma_alloc_coherent(hdev, bytes, &rx->desc.bus, 154 GFP_KERNEL); 155 if (!rx->desc.desc_ring) { 156 err = -ENOMEM; 157 goto abort_with_q_resources; 158 } 159 rx->mask = slots - 1; 160 rx->cnt = 0; 161 rx->desc.seqno = 1; 162 gve_rx_add_to_block(priv, idx); 163 164 return 0; 165 166 abort_with_q_resources: 167 dma_free_coherent(hdev, sizeof(*rx->q_resources), 168 rx->q_resources, rx->q_resources_bus); 169 rx->q_resources = NULL; 170 abort_filled: 171 kvfree(rx->data.page_info); 172 abort_with_slots: 173 bytes = sizeof(*rx->data.data_ring) * slots; 174 dma_free_coherent(hdev, bytes, rx->data.data_ring, rx->data.data_bus); 175 rx->data.data_ring = NULL; 176 177 return err; 178 } 179 180 int gve_rx_alloc_rings(struct gve_priv *priv) 181 { 182 int err = 0; 183 int i; 184 185 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 186 err = gve_rx_alloc_ring(priv, i); 187 if (err) { 188 netif_err(priv, drv, priv->dev, 189 "Failed to alloc rx ring=%d: err=%d\n", 190 i, err); 191 break; 192 } 193 } 194 /* Unallocate if there was an error */ 195 if (err) { 196 int j; 197 198 for (j = 0; j < i; j++) 199 gve_rx_free_ring(priv, j); 200 } 201 return err; 202 } 203 204 void gve_rx_free_rings(struct gve_priv *priv) 205 { 206 int i; 207 208 for (i = 0; i < priv->rx_cfg.num_queues; i++) 209 gve_rx_free_ring(priv, i); 210 } 211 212 void gve_rx_write_doorbell(struct gve_priv *priv, struct gve_rx_ring *rx) 213 { 214 u32 db_idx = be32_to_cpu(rx->q_resources->db_index); 215 216 iowrite32be(rx->fill_cnt, &priv->db_bar2[db_idx]); 217 } 218 219 static enum pkt_hash_types gve_rss_type(__be16 pkt_flags) 220 { 221 if (likely(pkt_flags & (GVE_RXF_TCP | GVE_RXF_UDP))) 222 return PKT_HASH_TYPE_L4; 223 if (pkt_flags & (GVE_RXF_IPV4 | GVE_RXF_IPV6)) 224 return PKT_HASH_TYPE_L3; 225 return PKT_HASH_TYPE_L2; 226 } 227 228 static struct sk_buff *gve_rx_copy(struct net_device *dev, 229 struct napi_struct *napi, 230 struct gve_rx_slot_page_info *page_info, 231 u16 len) 232 { 233 struct sk_buff *skb = napi_alloc_skb(napi, len); 234 void *va = page_info->page_address + GVE_RX_PAD + 235 page_info->page_offset; 236 237 if (unlikely(!skb)) 238 return NULL; 239 240 __skb_put(skb, len); 241 242 skb_copy_to_linear_data(skb, va, len); 243 244 skb->protocol = eth_type_trans(skb, dev); 245 return skb; 246 } 247 248 static struct sk_buff *gve_rx_add_frags(struct net_device *dev, 249 struct napi_struct *napi, 250 struct gve_rx_slot_page_info *page_info, 251 u16 len) 252 { 253 struct sk_buff *skb = napi_get_frags(napi); 254 255 if (unlikely(!skb)) 256 return NULL; 257 258 skb_add_rx_frag(skb, 0, page_info->page, 259 page_info->page_offset + 260 GVE_RX_PAD, len, PAGE_SIZE / 2); 261 262 return skb; 263 } 264 265 static void gve_rx_flip_buff(struct gve_rx_slot_page_info *page_info, 266 struct gve_rx_data_slot *data_ring) 267 { 268 u64 addr = be64_to_cpu(data_ring->qpl_offset); 269 270 page_info->page_offset ^= PAGE_SIZE / 2; 271 addr ^= PAGE_SIZE / 2; 272 data_ring->qpl_offset = cpu_to_be64(addr); 273 } 274 275 static bool gve_rx(struct gve_rx_ring *rx, struct gve_rx_desc *rx_desc, 276 netdev_features_t feat, u32 idx) 277 { 278 struct gve_rx_slot_page_info *page_info; 279 struct gve_priv *priv = rx->gve; 280 struct napi_struct *napi = &priv->ntfy_blocks[rx->ntfy_id].napi; 281 struct net_device *dev = priv->dev; 282 struct sk_buff *skb; 283 int pagecount; 284 u16 len; 285 286 /* drop this packet */ 287 if (unlikely(rx_desc->flags_seq & GVE_RXF_ERR)) 288 return true; 289 290 len = be16_to_cpu(rx_desc->len) - GVE_RX_PAD; 291 page_info = &rx->data.page_info[idx]; 292 dma_sync_single_for_cpu(&priv->pdev->dev, rx->data.qpl->page_buses[idx], 293 PAGE_SIZE, DMA_FROM_DEVICE); 294 295 /* gvnic can only receive into registered segments. If the buffer 296 * can't be recycled, our only choice is to copy the data out of 297 * it so that we can return it to the device. 298 */ 299 300 if (PAGE_SIZE == 4096) { 301 if (len <= priv->rx_copybreak) { 302 /* Just copy small packets */ 303 skb = gve_rx_copy(dev, napi, page_info, len); 304 goto have_skb; 305 } 306 if (unlikely(!gve_can_recycle_pages(dev))) { 307 skb = gve_rx_copy(dev, napi, page_info, len); 308 goto have_skb; 309 } 310 pagecount = page_count(page_info->page); 311 if (pagecount == 1) { 312 /* No part of this page is used by any SKBs; we attach 313 * the page fragment to a new SKB and pass it up the 314 * stack. 315 */ 316 skb = gve_rx_add_frags(dev, napi, page_info, len); 317 if (!skb) 318 return true; 319 /* Make sure the kernel stack can't release the page */ 320 get_page(page_info->page); 321 /* "flip" to other packet buffer on this page */ 322 gve_rx_flip_buff(page_info, &rx->data.data_ring[idx]); 323 } else if (pagecount >= 2) { 324 /* We have previously passed the other half of this 325 * page up the stack, but it has not yet been freed. 326 */ 327 skb = gve_rx_copy(dev, napi, page_info, len); 328 } else { 329 WARN(pagecount < 1, "Pagecount should never be < 1"); 330 return false; 331 } 332 } else { 333 skb = gve_rx_copy(dev, napi, page_info, len); 334 } 335 336 have_skb: 337 /* We didn't manage to allocate an skb but we haven't had any 338 * reset worthy failures. 339 */ 340 if (!skb) 341 return true; 342 343 if (likely(feat & NETIF_F_RXCSUM)) { 344 /* NIC passes up the partial sum */ 345 if (rx_desc->csum) 346 skb->ip_summed = CHECKSUM_COMPLETE; 347 else 348 skb->ip_summed = CHECKSUM_NONE; 349 skb->csum = csum_unfold(rx_desc->csum); 350 } 351 352 /* parse flags & pass relevant info up */ 353 if (likely(feat & NETIF_F_RXHASH) && 354 gve_needs_rss(rx_desc->flags_seq)) 355 skb_set_hash(skb, be32_to_cpu(rx_desc->rss_hash), 356 gve_rss_type(rx_desc->flags_seq)); 357 358 if (skb_is_nonlinear(skb)) 359 napi_gro_frags(napi); 360 else 361 napi_gro_receive(napi, skb); 362 return true; 363 } 364 365 static bool gve_rx_work_pending(struct gve_rx_ring *rx) 366 { 367 struct gve_rx_desc *desc; 368 __be16 flags_seq; 369 u32 next_idx; 370 371 next_idx = rx->cnt & rx->mask; 372 desc = rx->desc.desc_ring + next_idx; 373 374 flags_seq = desc->flags_seq; 375 /* Make sure we have synchronized the seq no with the device */ 376 smp_rmb(); 377 378 return (GVE_SEQNO(flags_seq) == rx->desc.seqno); 379 } 380 381 bool gve_clean_rx_done(struct gve_rx_ring *rx, int budget, 382 netdev_features_t feat) 383 { 384 struct gve_priv *priv = rx->gve; 385 struct gve_rx_desc *desc; 386 u32 cnt = rx->cnt; 387 u32 idx = cnt & rx->mask; 388 u32 work_done = 0; 389 u64 bytes = 0; 390 391 desc = rx->desc.desc_ring + idx; 392 while ((GVE_SEQNO(desc->flags_seq) == rx->desc.seqno) && 393 work_done < budget) { 394 netif_info(priv, rx_status, priv->dev, 395 "[%d] idx=%d desc=%p desc->flags_seq=0x%x\n", 396 rx->q_num, idx, desc, desc->flags_seq); 397 netif_info(priv, rx_status, priv->dev, 398 "[%d] seqno=%d rx->desc.seqno=%d\n", 399 rx->q_num, GVE_SEQNO(desc->flags_seq), 400 rx->desc.seqno); 401 bytes += be16_to_cpu(desc->len) - GVE_RX_PAD; 402 if (!gve_rx(rx, desc, feat, idx)) 403 gve_schedule_reset(priv); 404 cnt++; 405 idx = cnt & rx->mask; 406 desc = rx->desc.desc_ring + idx; 407 rx->desc.seqno = gve_next_seqno(rx->desc.seqno); 408 work_done++; 409 } 410 411 if (!work_done) 412 return false; 413 414 u64_stats_update_begin(&rx->statss); 415 rx->rpackets += work_done; 416 rx->rbytes += bytes; 417 u64_stats_update_end(&rx->statss); 418 rx->cnt = cnt; 419 rx->fill_cnt += work_done; 420 421 /* restock desc ring slots */ 422 dma_wmb(); /* Ensure descs are visible before ringing doorbell */ 423 gve_rx_write_doorbell(priv, rx); 424 return gve_rx_work_pending(rx); 425 } 426 427 bool gve_rx_poll(struct gve_notify_block *block, int budget) 428 { 429 struct gve_rx_ring *rx = block->rx; 430 netdev_features_t feat; 431 bool repoll = false; 432 433 feat = block->napi.dev->features; 434 435 /* If budget is 0, do all the work */ 436 if (budget == 0) 437 budget = INT_MAX; 438 439 if (budget > 0) 440 repoll |= gve_clean_rx_done(rx, budget, feat); 441 else 442 repoll |= gve_rx_work_pending(rx); 443 return repoll; 444 } 445