1 // SPDX-License-Identifier: (GPL-2.0 OR MIT) 2 /* Google virtual Ethernet (gve) driver 3 * 4 * Copyright (C) 2015-2019 Google, Inc. 5 */ 6 7 #include "gve.h" 8 #include "gve_adminq.h" 9 #include <linux/etherdevice.h> 10 11 static void gve_rx_remove_from_block(struct gve_priv *priv, int queue_idx) 12 { 13 struct gve_notify_block *block = 14 &priv->ntfy_blocks[gve_rx_idx_to_ntfy(priv, queue_idx)]; 15 16 block->rx = NULL; 17 } 18 19 static void gve_rx_free_ring(struct gve_priv *priv, int idx) 20 { 21 struct gve_rx_ring *rx = &priv->rx[idx]; 22 struct device *dev = &priv->pdev->dev; 23 size_t bytes; 24 u32 slots; 25 26 gve_rx_remove_from_block(priv, idx); 27 28 bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt; 29 dma_free_coherent(dev, bytes, rx->desc.desc_ring, rx->desc.bus); 30 rx->desc.desc_ring = NULL; 31 32 dma_free_coherent(dev, sizeof(*rx->q_resources), 33 rx->q_resources, rx->q_resources_bus); 34 rx->q_resources = NULL; 35 36 gve_unassign_qpl(priv, rx->data.qpl->id); 37 rx->data.qpl = NULL; 38 kvfree(rx->data.page_info); 39 40 slots = rx->data.mask + 1; 41 bytes = sizeof(*rx->data.data_ring) * slots; 42 dma_free_coherent(dev, bytes, rx->data.data_ring, 43 rx->data.data_bus); 44 rx->data.data_ring = NULL; 45 netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx); 46 } 47 48 static void gve_setup_rx_buffer(struct gve_rx_slot_page_info *page_info, 49 struct gve_rx_data_slot *slot, 50 dma_addr_t addr, struct page *page) 51 { 52 page_info->page = page; 53 page_info->page_offset = 0; 54 page_info->page_address = page_address(page); 55 slot->qpl_offset = cpu_to_be64(addr); 56 } 57 58 static int gve_prefill_rx_pages(struct gve_rx_ring *rx) 59 { 60 struct gve_priv *priv = rx->gve; 61 u32 slots; 62 int i; 63 64 /* Allocate one page per Rx queue slot. Each page is split into two 65 * packet buffers, when possible we "page flip" between the two. 66 */ 67 slots = rx->data.mask + 1; 68 69 rx->data.page_info = kvzalloc(slots * 70 sizeof(*rx->data.page_info), GFP_KERNEL); 71 if (!rx->data.page_info) 72 return -ENOMEM; 73 74 rx->data.qpl = gve_assign_rx_qpl(priv); 75 76 for (i = 0; i < slots; i++) { 77 struct page *page = rx->data.qpl->pages[i]; 78 dma_addr_t addr = i * PAGE_SIZE; 79 80 gve_setup_rx_buffer(&rx->data.page_info[i], 81 &rx->data.data_ring[i], addr, page); 82 } 83 84 return slots; 85 } 86 87 static void gve_rx_add_to_block(struct gve_priv *priv, int queue_idx) 88 { 89 u32 ntfy_idx = gve_rx_idx_to_ntfy(priv, queue_idx); 90 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 91 struct gve_rx_ring *rx = &priv->rx[queue_idx]; 92 93 block->rx = rx; 94 rx->ntfy_id = ntfy_idx; 95 } 96 97 static int gve_rx_alloc_ring(struct gve_priv *priv, int idx) 98 { 99 struct gve_rx_ring *rx = &priv->rx[idx]; 100 struct device *hdev = &priv->pdev->dev; 101 u32 slots, npages; 102 int filled_pages; 103 size_t bytes; 104 int err; 105 106 netif_dbg(priv, drv, priv->dev, "allocating rx ring\n"); 107 /* Make sure everything is zeroed to start with */ 108 memset(rx, 0, sizeof(*rx)); 109 110 rx->gve = priv; 111 rx->q_num = idx; 112 113 slots = priv->rx_pages_per_qpl; 114 rx->data.mask = slots - 1; 115 116 /* alloc rx data ring */ 117 bytes = sizeof(*rx->data.data_ring) * slots; 118 rx->data.data_ring = dma_alloc_coherent(hdev, bytes, 119 &rx->data.data_bus, 120 GFP_KERNEL); 121 if (!rx->data.data_ring) 122 return -ENOMEM; 123 filled_pages = gve_prefill_rx_pages(rx); 124 if (filled_pages < 0) { 125 err = -ENOMEM; 126 goto abort_with_slots; 127 } 128 rx->desc.fill_cnt = filled_pages; 129 /* Ensure data ring slots (packet buffers) are visible. */ 130 dma_wmb(); 131 132 /* Alloc gve_queue_resources */ 133 rx->q_resources = 134 dma_alloc_coherent(hdev, 135 sizeof(*rx->q_resources), 136 &rx->q_resources_bus, 137 GFP_KERNEL); 138 if (!rx->q_resources) { 139 err = -ENOMEM; 140 goto abort_filled; 141 } 142 netif_dbg(priv, drv, priv->dev, "rx[%d]->data.data_bus=%lx\n", idx, 143 (unsigned long)rx->data.data_bus); 144 145 /* alloc rx desc ring */ 146 bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt; 147 npages = bytes / PAGE_SIZE; 148 if (npages * PAGE_SIZE != bytes) { 149 err = -EIO; 150 goto abort_with_q_resources; 151 } 152 153 rx->desc.desc_ring = dma_alloc_coherent(hdev, bytes, &rx->desc.bus, 154 GFP_KERNEL); 155 if (!rx->desc.desc_ring) { 156 err = -ENOMEM; 157 goto abort_with_q_resources; 158 } 159 rx->desc.mask = slots - 1; 160 rx->desc.cnt = 0; 161 rx->desc.seqno = 1; 162 gve_rx_add_to_block(priv, idx); 163 164 return 0; 165 166 abort_with_q_resources: 167 dma_free_coherent(hdev, sizeof(*rx->q_resources), 168 rx->q_resources, rx->q_resources_bus); 169 rx->q_resources = NULL; 170 abort_filled: 171 kvfree(rx->data.page_info); 172 abort_with_slots: 173 bytes = sizeof(*rx->data.data_ring) * slots; 174 dma_free_coherent(hdev, bytes, rx->data.data_ring, rx->data.data_bus); 175 rx->data.data_ring = NULL; 176 177 return err; 178 } 179 180 int gve_rx_alloc_rings(struct gve_priv *priv) 181 { 182 int err = 0; 183 int i; 184 185 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 186 err = gve_rx_alloc_ring(priv, i); 187 if (err) { 188 netif_err(priv, drv, priv->dev, 189 "Failed to alloc rx ring=%d: err=%d\n", 190 i, err); 191 break; 192 } 193 } 194 /* Unallocate if there was an error */ 195 if (err) { 196 int j; 197 198 for (j = 0; j < i; j++) 199 gve_rx_free_ring(priv, j); 200 } 201 return err; 202 } 203 204 void gve_rx_free_rings(struct gve_priv *priv) 205 { 206 int i; 207 208 for (i = 0; i < priv->rx_cfg.num_queues; i++) 209 gve_rx_free_ring(priv, i); 210 } 211 212 void gve_rx_write_doorbell(struct gve_priv *priv, struct gve_rx_ring *rx) 213 { 214 u32 db_idx = be32_to_cpu(rx->q_resources->db_index); 215 216 iowrite32be(rx->desc.fill_cnt, &priv->db_bar2[db_idx]); 217 } 218 219 static enum pkt_hash_types gve_rss_type(__be16 pkt_flags) 220 { 221 if (likely(pkt_flags & (GVE_RXF_TCP | GVE_RXF_UDP))) 222 return PKT_HASH_TYPE_L4; 223 if (pkt_flags & (GVE_RXF_IPV4 | GVE_RXF_IPV6)) 224 return PKT_HASH_TYPE_L3; 225 return PKT_HASH_TYPE_L2; 226 } 227 228 static struct sk_buff *gve_rx_copy(struct net_device *dev, 229 struct napi_struct *napi, 230 struct gve_rx_slot_page_info *page_info, 231 u16 len) 232 { 233 struct sk_buff *skb = napi_alloc_skb(napi, len); 234 void *va = page_info->page_address + GVE_RX_PAD + 235 page_info->page_offset; 236 237 if (unlikely(!skb)) 238 return NULL; 239 240 __skb_put(skb, len); 241 242 skb_copy_to_linear_data(skb, va, len); 243 244 skb->protocol = eth_type_trans(skb, dev); 245 return skb; 246 } 247 248 static struct sk_buff *gve_rx_add_frags(struct net_device *dev, 249 struct napi_struct *napi, 250 struct gve_rx_slot_page_info *page_info, 251 u16 len) 252 { 253 struct sk_buff *skb = napi_get_frags(napi); 254 255 if (unlikely(!skb)) 256 return NULL; 257 258 skb_add_rx_frag(skb, 0, page_info->page, 259 page_info->page_offset + 260 GVE_RX_PAD, len, PAGE_SIZE / 2); 261 262 return skb; 263 } 264 265 static void gve_rx_flip_buff(struct gve_rx_slot_page_info *page_info, 266 struct gve_rx_data_slot *data_ring) 267 { 268 u64 addr = be64_to_cpu(data_ring->qpl_offset); 269 270 page_info->page_offset ^= PAGE_SIZE / 2; 271 addr ^= PAGE_SIZE / 2; 272 data_ring->qpl_offset = cpu_to_be64(addr); 273 } 274 275 static bool gve_rx(struct gve_rx_ring *rx, struct gve_rx_desc *rx_desc, 276 netdev_features_t feat) 277 { 278 struct gve_rx_slot_page_info *page_info; 279 struct gve_priv *priv = rx->gve; 280 struct napi_struct *napi = &priv->ntfy_blocks[rx->ntfy_id].napi; 281 struct net_device *dev = priv->dev; 282 struct sk_buff *skb; 283 int pagecount; 284 u16 len; 285 u32 idx; 286 287 /* drop this packet */ 288 if (unlikely(rx_desc->flags_seq & GVE_RXF_ERR)) 289 return true; 290 291 len = be16_to_cpu(rx_desc->len) - GVE_RX_PAD; 292 idx = rx->data.cnt & rx->data.mask; 293 page_info = &rx->data.page_info[idx]; 294 295 /* gvnic can only receive into registered segments. If the buffer 296 * can't be recycled, our only choice is to copy the data out of 297 * it so that we can return it to the device. 298 */ 299 300 if (PAGE_SIZE == 4096) { 301 if (len <= priv->rx_copybreak) { 302 /* Just copy small packets */ 303 skb = gve_rx_copy(dev, napi, page_info, len); 304 goto have_skb; 305 } 306 if (unlikely(!gve_can_recycle_pages(dev))) { 307 skb = gve_rx_copy(dev, napi, page_info, len); 308 goto have_skb; 309 } 310 pagecount = page_count(page_info->page); 311 if (pagecount == 1) { 312 /* No part of this page is used by any SKBs; we attach 313 * the page fragment to a new SKB and pass it up the 314 * stack. 315 */ 316 skb = gve_rx_add_frags(dev, napi, page_info, len); 317 if (!skb) 318 return true; 319 /* Make sure the kernel stack can't release the page */ 320 get_page(page_info->page); 321 /* "flip" to other packet buffer on this page */ 322 gve_rx_flip_buff(page_info, &rx->data.data_ring[idx]); 323 } else if (pagecount >= 2) { 324 /* We have previously passed the other half of this 325 * page up the stack, but it has not yet been freed. 326 */ 327 skb = gve_rx_copy(dev, napi, page_info, len); 328 } else { 329 WARN(pagecount < 1, "Pagecount should never be < 1"); 330 return false; 331 } 332 } else { 333 skb = gve_rx_copy(dev, napi, page_info, len); 334 } 335 336 have_skb: 337 /* We didn't manage to allocate an skb but we haven't had any 338 * reset worthy failures. 339 */ 340 if (!skb) 341 return true; 342 343 rx->data.cnt++; 344 345 if (likely(feat & NETIF_F_RXCSUM)) { 346 /* NIC passes up the partial sum */ 347 if (rx_desc->csum) 348 skb->ip_summed = CHECKSUM_COMPLETE; 349 else 350 skb->ip_summed = CHECKSUM_NONE; 351 skb->csum = csum_unfold(rx_desc->csum); 352 } 353 354 /* parse flags & pass relevant info up */ 355 if (likely(feat & NETIF_F_RXHASH) && 356 gve_needs_rss(rx_desc->flags_seq)) 357 skb_set_hash(skb, be32_to_cpu(rx_desc->rss_hash), 358 gve_rss_type(rx_desc->flags_seq)); 359 360 if (skb_is_nonlinear(skb)) 361 napi_gro_frags(napi); 362 else 363 napi_gro_receive(napi, skb); 364 return true; 365 } 366 367 static bool gve_rx_work_pending(struct gve_rx_ring *rx) 368 { 369 struct gve_rx_desc *desc; 370 __be16 flags_seq; 371 u32 next_idx; 372 373 next_idx = rx->desc.cnt & rx->desc.mask; 374 desc = rx->desc.desc_ring + next_idx; 375 376 flags_seq = desc->flags_seq; 377 /* Make sure we have synchronized the seq no with the device */ 378 smp_rmb(); 379 380 return (GVE_SEQNO(flags_seq) == rx->desc.seqno); 381 } 382 383 bool gve_clean_rx_done(struct gve_rx_ring *rx, int budget, 384 netdev_features_t feat) 385 { 386 struct gve_priv *priv = rx->gve; 387 struct gve_rx_desc *desc; 388 u32 cnt = rx->desc.cnt; 389 u32 idx = cnt & rx->desc.mask; 390 u32 work_done = 0; 391 u64 bytes = 0; 392 393 desc = rx->desc.desc_ring + idx; 394 while ((GVE_SEQNO(desc->flags_seq) == rx->desc.seqno) && 395 work_done < budget) { 396 netif_info(priv, rx_status, priv->dev, 397 "[%d] idx=%d desc=%p desc->flags_seq=0x%x\n", 398 rx->q_num, idx, desc, desc->flags_seq); 399 netif_info(priv, rx_status, priv->dev, 400 "[%d] seqno=%d rx->desc.seqno=%d\n", 401 rx->q_num, GVE_SEQNO(desc->flags_seq), 402 rx->desc.seqno); 403 bytes += be16_to_cpu(desc->len) - GVE_RX_PAD; 404 if (!gve_rx(rx, desc, feat)) 405 gve_schedule_reset(priv); 406 cnt++; 407 idx = cnt & rx->desc.mask; 408 desc = rx->desc.desc_ring + idx; 409 rx->desc.seqno = gve_next_seqno(rx->desc.seqno); 410 work_done++; 411 } 412 413 if (!work_done) 414 return false; 415 416 u64_stats_update_begin(&rx->statss); 417 rx->rpackets += work_done; 418 rx->rbytes += bytes; 419 u64_stats_update_end(&rx->statss); 420 rx->desc.cnt = cnt; 421 rx->desc.fill_cnt += work_done; 422 423 /* restock desc ring slots */ 424 dma_wmb(); /* Ensure descs are visible before ringing doorbell */ 425 gve_rx_write_doorbell(priv, rx); 426 return gve_rx_work_pending(rx); 427 } 428 429 bool gve_rx_poll(struct gve_notify_block *block, int budget) 430 { 431 struct gve_rx_ring *rx = block->rx; 432 netdev_features_t feat; 433 bool repoll = false; 434 435 feat = block->napi.dev->features; 436 437 /* If budget is 0, do all the work */ 438 if (budget == 0) 439 budget = INT_MAX; 440 441 if (budget > 0) 442 repoll |= gve_clean_rx_done(rx, budget, feat); 443 else 444 repoll |= gve_rx_work_pending(rx); 445 return repoll; 446 } 447