1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 2 /* Copyright (c) 2021, Microsoft Corporation. */ 3 4 #include <linux/inetdevice.h> 5 #include <linux/etherdevice.h> 6 #include <linux/ethtool.h> 7 #include <linux/mm.h> 8 9 #include <net/checksum.h> 10 #include <net/ip6_checksum.h> 11 12 #include "mana.h" 13 14 /* Microsoft Azure Network Adapter (MANA) functions */ 15 16 static int mana_open(struct net_device *ndev) 17 { 18 struct mana_port_context *apc = netdev_priv(ndev); 19 int err; 20 21 err = mana_alloc_queues(ndev); 22 if (err) 23 return err; 24 25 apc->port_is_up = true; 26 27 /* Ensure port state updated before txq state */ 28 smp_wmb(); 29 30 netif_carrier_on(ndev); 31 netif_tx_wake_all_queues(ndev); 32 33 return 0; 34 } 35 36 static int mana_close(struct net_device *ndev) 37 { 38 struct mana_port_context *apc = netdev_priv(ndev); 39 40 if (!apc->port_is_up) 41 return 0; 42 43 return mana_detach(ndev, true); 44 } 45 46 static bool mana_can_tx(struct gdma_queue *wq) 47 { 48 return mana_gd_wq_avail_space(wq) >= MAX_TX_WQE_SIZE; 49 } 50 51 static unsigned int mana_checksum_info(struct sk_buff *skb) 52 { 53 if (skb->protocol == htons(ETH_P_IP)) { 54 struct iphdr *ip = ip_hdr(skb); 55 56 if (ip->protocol == IPPROTO_TCP) 57 return IPPROTO_TCP; 58 59 if (ip->protocol == IPPROTO_UDP) 60 return IPPROTO_UDP; 61 } else if (skb->protocol == htons(ETH_P_IPV6)) { 62 struct ipv6hdr *ip6 = ipv6_hdr(skb); 63 64 if (ip6->nexthdr == IPPROTO_TCP) 65 return IPPROTO_TCP; 66 67 if (ip6->nexthdr == IPPROTO_UDP) 68 return IPPROTO_UDP; 69 } 70 71 /* No csum offloading */ 72 return 0; 73 } 74 75 static int mana_map_skb(struct sk_buff *skb, struct mana_port_context *apc, 76 struct mana_tx_package *tp) 77 { 78 struct mana_skb_head *ash = (struct mana_skb_head *)skb->head; 79 struct gdma_dev *gd = apc->ac->gdma_dev; 80 struct gdma_context *gc; 81 struct device *dev; 82 skb_frag_t *frag; 83 dma_addr_t da; 84 int i; 85 86 gc = gd->gdma_context; 87 dev = gc->dev; 88 da = dma_map_single(dev, skb->data, skb_headlen(skb), DMA_TO_DEVICE); 89 90 if (dma_mapping_error(dev, da)) 91 return -ENOMEM; 92 93 ash->dma_handle[0] = da; 94 ash->size[0] = skb_headlen(skb); 95 96 tp->wqe_req.sgl[0].address = ash->dma_handle[0]; 97 tp->wqe_req.sgl[0].mem_key = gd->gpa_mkey; 98 tp->wqe_req.sgl[0].size = ash->size[0]; 99 100 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 101 frag = &skb_shinfo(skb)->frags[i]; 102 da = skb_frag_dma_map(dev, frag, 0, skb_frag_size(frag), 103 DMA_TO_DEVICE); 104 105 if (dma_mapping_error(dev, da)) 106 goto frag_err; 107 108 ash->dma_handle[i + 1] = da; 109 ash->size[i + 1] = skb_frag_size(frag); 110 111 tp->wqe_req.sgl[i + 1].address = ash->dma_handle[i + 1]; 112 tp->wqe_req.sgl[i + 1].mem_key = gd->gpa_mkey; 113 tp->wqe_req.sgl[i + 1].size = ash->size[i + 1]; 114 } 115 116 return 0; 117 118 frag_err: 119 for (i = i - 1; i >= 0; i--) 120 dma_unmap_page(dev, ash->dma_handle[i + 1], ash->size[i + 1], 121 DMA_TO_DEVICE); 122 123 dma_unmap_single(dev, ash->dma_handle[0], ash->size[0], DMA_TO_DEVICE); 124 125 return -ENOMEM; 126 } 127 128 int mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) 129 { 130 enum mana_tx_pkt_format pkt_fmt = MANA_SHORT_PKT_FMT; 131 struct mana_port_context *apc = netdev_priv(ndev); 132 u16 txq_idx = skb_get_queue_mapping(skb); 133 struct gdma_dev *gd = apc->ac->gdma_dev; 134 bool ipv4 = false, ipv6 = false; 135 struct mana_tx_package pkg = {}; 136 struct netdev_queue *net_txq; 137 struct mana_stats *tx_stats; 138 struct gdma_queue *gdma_sq; 139 unsigned int csum_type; 140 struct mana_txq *txq; 141 struct mana_cq *cq; 142 int err, len; 143 144 if (unlikely(!apc->port_is_up)) 145 goto tx_drop; 146 147 if (skb_cow_head(skb, MANA_HEADROOM)) 148 goto tx_drop_count; 149 150 txq = &apc->tx_qp[txq_idx].txq; 151 gdma_sq = txq->gdma_sq; 152 cq = &apc->tx_qp[txq_idx].tx_cq; 153 154 pkg.tx_oob.s_oob.vcq_num = cq->gdma_id; 155 pkg.tx_oob.s_oob.vsq_frame = txq->vsq_frame; 156 157 if (txq->vp_offset > MANA_SHORT_VPORT_OFFSET_MAX) { 158 pkg.tx_oob.l_oob.long_vp_offset = txq->vp_offset; 159 pkt_fmt = MANA_LONG_PKT_FMT; 160 } else { 161 pkg.tx_oob.s_oob.short_vp_offset = txq->vp_offset; 162 } 163 164 pkg.tx_oob.s_oob.pkt_fmt = pkt_fmt; 165 166 if (pkt_fmt == MANA_SHORT_PKT_FMT) 167 pkg.wqe_req.inline_oob_size = sizeof(struct mana_tx_short_oob); 168 else 169 pkg.wqe_req.inline_oob_size = sizeof(struct mana_tx_oob); 170 171 pkg.wqe_req.inline_oob_data = &pkg.tx_oob; 172 pkg.wqe_req.flags = 0; 173 pkg.wqe_req.client_data_unit = 0; 174 175 pkg.wqe_req.num_sge = 1 + skb_shinfo(skb)->nr_frags; 176 WARN_ON_ONCE(pkg.wqe_req.num_sge > 30); 177 178 if (pkg.wqe_req.num_sge <= ARRAY_SIZE(pkg.sgl_array)) { 179 pkg.wqe_req.sgl = pkg.sgl_array; 180 } else { 181 pkg.sgl_ptr = kmalloc_array(pkg.wqe_req.num_sge, 182 sizeof(struct gdma_sge), 183 GFP_ATOMIC); 184 if (!pkg.sgl_ptr) 185 goto tx_drop_count; 186 187 pkg.wqe_req.sgl = pkg.sgl_ptr; 188 } 189 190 if (skb->protocol == htons(ETH_P_IP)) 191 ipv4 = true; 192 else if (skb->protocol == htons(ETH_P_IPV6)) 193 ipv6 = true; 194 195 if (skb_is_gso(skb)) { 196 pkg.tx_oob.s_oob.is_outer_ipv4 = ipv4; 197 pkg.tx_oob.s_oob.is_outer_ipv6 = ipv6; 198 199 pkg.tx_oob.s_oob.comp_iphdr_csum = 1; 200 pkg.tx_oob.s_oob.comp_tcp_csum = 1; 201 pkg.tx_oob.s_oob.trans_off = skb_transport_offset(skb); 202 203 pkg.wqe_req.client_data_unit = skb_shinfo(skb)->gso_size; 204 pkg.wqe_req.flags = GDMA_WR_OOB_IN_SGL | GDMA_WR_PAD_BY_SGE0; 205 if (ipv4) { 206 ip_hdr(skb)->tot_len = 0; 207 ip_hdr(skb)->check = 0; 208 tcp_hdr(skb)->check = 209 ~csum_tcpudp_magic(ip_hdr(skb)->saddr, 210 ip_hdr(skb)->daddr, 0, 211 IPPROTO_TCP, 0); 212 } else { 213 ipv6_hdr(skb)->payload_len = 0; 214 tcp_hdr(skb)->check = 215 ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, 216 &ipv6_hdr(skb)->daddr, 0, 217 IPPROTO_TCP, 0); 218 } 219 } else if (skb->ip_summed == CHECKSUM_PARTIAL) { 220 csum_type = mana_checksum_info(skb); 221 222 if (csum_type == IPPROTO_TCP) { 223 pkg.tx_oob.s_oob.is_outer_ipv4 = ipv4; 224 pkg.tx_oob.s_oob.is_outer_ipv6 = ipv6; 225 226 pkg.tx_oob.s_oob.comp_tcp_csum = 1; 227 pkg.tx_oob.s_oob.trans_off = skb_transport_offset(skb); 228 229 } else if (csum_type == IPPROTO_UDP) { 230 pkg.tx_oob.s_oob.is_outer_ipv4 = ipv4; 231 pkg.tx_oob.s_oob.is_outer_ipv6 = ipv6; 232 233 pkg.tx_oob.s_oob.comp_udp_csum = 1; 234 } else { 235 /* Can't do offload of this type of checksum */ 236 if (skb_checksum_help(skb)) 237 goto free_sgl_ptr; 238 } 239 } 240 241 if (mana_map_skb(skb, apc, &pkg)) 242 goto free_sgl_ptr; 243 244 skb_queue_tail(&txq->pending_skbs, skb); 245 246 len = skb->len; 247 net_txq = netdev_get_tx_queue(ndev, txq_idx); 248 249 err = mana_gd_post_work_request(gdma_sq, &pkg.wqe_req, 250 (struct gdma_posted_wqe_info *)skb->cb); 251 if (!mana_can_tx(gdma_sq)) { 252 netif_tx_stop_queue(net_txq); 253 apc->eth_stats.stop_queue++; 254 } 255 256 if (err) { 257 (void)skb_dequeue_tail(&txq->pending_skbs); 258 netdev_warn(ndev, "Failed to post TX OOB: %d\n", err); 259 err = NETDEV_TX_BUSY; 260 goto tx_busy; 261 } 262 263 err = NETDEV_TX_OK; 264 atomic_inc(&txq->pending_sends); 265 266 mana_gd_wq_ring_doorbell(gd->gdma_context, gdma_sq); 267 268 /* skb may be freed after mana_gd_post_work_request. Do not use it. */ 269 skb = NULL; 270 271 tx_stats = &txq->stats; 272 u64_stats_update_begin(&tx_stats->syncp); 273 tx_stats->packets++; 274 tx_stats->bytes += len; 275 u64_stats_update_end(&tx_stats->syncp); 276 277 tx_busy: 278 if (netif_tx_queue_stopped(net_txq) && mana_can_tx(gdma_sq)) { 279 netif_tx_wake_queue(net_txq); 280 apc->eth_stats.wake_queue++; 281 } 282 283 kfree(pkg.sgl_ptr); 284 return err; 285 286 free_sgl_ptr: 287 kfree(pkg.sgl_ptr); 288 tx_drop_count: 289 ndev->stats.tx_dropped++; 290 tx_drop: 291 dev_kfree_skb_any(skb); 292 return NETDEV_TX_OK; 293 } 294 295 static void mana_get_stats64(struct net_device *ndev, 296 struct rtnl_link_stats64 *st) 297 { 298 struct mana_port_context *apc = netdev_priv(ndev); 299 unsigned int num_queues = apc->num_queues; 300 struct mana_stats *stats; 301 unsigned int start; 302 u64 packets, bytes; 303 int q; 304 305 if (!apc->port_is_up) 306 return; 307 308 netdev_stats_to_stats64(st, &ndev->stats); 309 310 for (q = 0; q < num_queues; q++) { 311 stats = &apc->rxqs[q]->stats; 312 313 do { 314 start = u64_stats_fetch_begin_irq(&stats->syncp); 315 packets = stats->packets; 316 bytes = stats->bytes; 317 } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); 318 319 st->rx_packets += packets; 320 st->rx_bytes += bytes; 321 } 322 323 for (q = 0; q < num_queues; q++) { 324 stats = &apc->tx_qp[q].txq.stats; 325 326 do { 327 start = u64_stats_fetch_begin_irq(&stats->syncp); 328 packets = stats->packets; 329 bytes = stats->bytes; 330 } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); 331 332 st->tx_packets += packets; 333 st->tx_bytes += bytes; 334 } 335 } 336 337 static int mana_get_tx_queue(struct net_device *ndev, struct sk_buff *skb, 338 int old_q) 339 { 340 struct mana_port_context *apc = netdev_priv(ndev); 341 u32 hash = skb_get_hash(skb); 342 struct sock *sk = skb->sk; 343 int txq; 344 345 txq = apc->indir_table[hash & MANA_INDIRECT_TABLE_MASK]; 346 347 if (txq != old_q && sk && sk_fullsock(sk) && 348 rcu_access_pointer(sk->sk_dst_cache)) 349 sk_tx_queue_set(sk, txq); 350 351 return txq; 352 } 353 354 static u16 mana_select_queue(struct net_device *ndev, struct sk_buff *skb, 355 struct net_device *sb_dev) 356 { 357 int txq; 358 359 if (ndev->real_num_tx_queues == 1) 360 return 0; 361 362 txq = sk_tx_queue_get(skb->sk); 363 364 if (txq < 0 || skb->ooo_okay || txq >= ndev->real_num_tx_queues) { 365 if (skb_rx_queue_recorded(skb)) 366 txq = skb_get_rx_queue(skb); 367 else 368 txq = mana_get_tx_queue(ndev, skb, txq); 369 } 370 371 return txq; 372 } 373 374 static const struct net_device_ops mana_devops = { 375 .ndo_open = mana_open, 376 .ndo_stop = mana_close, 377 .ndo_select_queue = mana_select_queue, 378 .ndo_start_xmit = mana_start_xmit, 379 .ndo_validate_addr = eth_validate_addr, 380 .ndo_get_stats64 = mana_get_stats64, 381 .ndo_bpf = mana_bpf, 382 }; 383 384 static void mana_cleanup_port_context(struct mana_port_context *apc) 385 { 386 kfree(apc->rxqs); 387 apc->rxqs = NULL; 388 } 389 390 static int mana_init_port_context(struct mana_port_context *apc) 391 { 392 apc->rxqs = kcalloc(apc->num_queues, sizeof(struct mana_rxq *), 393 GFP_KERNEL); 394 395 return !apc->rxqs ? -ENOMEM : 0; 396 } 397 398 static int mana_send_request(struct mana_context *ac, void *in_buf, 399 u32 in_len, void *out_buf, u32 out_len) 400 { 401 struct gdma_context *gc = ac->gdma_dev->gdma_context; 402 struct gdma_resp_hdr *resp = out_buf; 403 struct gdma_req_hdr *req = in_buf; 404 struct device *dev = gc->dev; 405 static atomic_t activity_id; 406 int err; 407 408 req->dev_id = gc->mana.dev_id; 409 req->activity_id = atomic_inc_return(&activity_id); 410 411 err = mana_gd_send_request(gc, in_len, in_buf, out_len, 412 out_buf); 413 if (err || resp->status) { 414 dev_err(dev, "Failed to send mana message: %d, 0x%x\n", 415 err, resp->status); 416 return err ? err : -EPROTO; 417 } 418 419 if (req->dev_id.as_uint32 != resp->dev_id.as_uint32 || 420 req->activity_id != resp->activity_id) { 421 dev_err(dev, "Unexpected mana message response: %x,%x,%x,%x\n", 422 req->dev_id.as_uint32, resp->dev_id.as_uint32, 423 req->activity_id, resp->activity_id); 424 return -EPROTO; 425 } 426 427 return 0; 428 } 429 430 static int mana_verify_resp_hdr(const struct gdma_resp_hdr *resp_hdr, 431 const enum mana_command_code expected_code, 432 const u32 min_size) 433 { 434 if (resp_hdr->response.msg_type != expected_code) 435 return -EPROTO; 436 437 if (resp_hdr->response.msg_version < GDMA_MESSAGE_V1) 438 return -EPROTO; 439 440 if (resp_hdr->response.msg_size < min_size) 441 return -EPROTO; 442 443 return 0; 444 } 445 446 static int mana_query_device_cfg(struct mana_context *ac, u32 proto_major_ver, 447 u32 proto_minor_ver, u32 proto_micro_ver, 448 u16 *max_num_vports) 449 { 450 struct gdma_context *gc = ac->gdma_dev->gdma_context; 451 struct mana_query_device_cfg_resp resp = {}; 452 struct mana_query_device_cfg_req req = {}; 453 struct device *dev = gc->dev; 454 int err = 0; 455 456 mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_DEV_CONFIG, 457 sizeof(req), sizeof(resp)); 458 req.proto_major_ver = proto_major_ver; 459 req.proto_minor_ver = proto_minor_ver; 460 req.proto_micro_ver = proto_micro_ver; 461 462 err = mana_send_request(ac, &req, sizeof(req), &resp, sizeof(resp)); 463 if (err) { 464 dev_err(dev, "Failed to query config: %d", err); 465 return err; 466 } 467 468 err = mana_verify_resp_hdr(&resp.hdr, MANA_QUERY_DEV_CONFIG, 469 sizeof(resp)); 470 if (err || resp.hdr.status) { 471 dev_err(dev, "Invalid query result: %d, 0x%x\n", err, 472 resp.hdr.status); 473 if (!err) 474 err = -EPROTO; 475 return err; 476 } 477 478 *max_num_vports = resp.max_num_vports; 479 480 return 0; 481 } 482 483 static int mana_query_vport_cfg(struct mana_port_context *apc, u32 vport_index, 484 u32 *max_sq, u32 *max_rq, u32 *num_indir_entry) 485 { 486 struct mana_query_vport_cfg_resp resp = {}; 487 struct mana_query_vport_cfg_req req = {}; 488 int err; 489 490 mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_VPORT_CONFIG, 491 sizeof(req), sizeof(resp)); 492 493 req.vport_index = vport_index; 494 495 err = mana_send_request(apc->ac, &req, sizeof(req), &resp, 496 sizeof(resp)); 497 if (err) 498 return err; 499 500 err = mana_verify_resp_hdr(&resp.hdr, MANA_QUERY_VPORT_CONFIG, 501 sizeof(resp)); 502 if (err) 503 return err; 504 505 if (resp.hdr.status) 506 return -EPROTO; 507 508 *max_sq = resp.max_num_sq; 509 *max_rq = resp.max_num_rq; 510 *num_indir_entry = resp.num_indirection_ent; 511 512 apc->port_handle = resp.vport; 513 ether_addr_copy(apc->mac_addr, resp.mac_addr); 514 515 return 0; 516 } 517 518 static int mana_cfg_vport(struct mana_port_context *apc, u32 protection_dom_id, 519 u32 doorbell_pg_id) 520 { 521 struct mana_config_vport_resp resp = {}; 522 struct mana_config_vport_req req = {}; 523 int err; 524 525 mana_gd_init_req_hdr(&req.hdr, MANA_CONFIG_VPORT_TX, 526 sizeof(req), sizeof(resp)); 527 req.vport = apc->port_handle; 528 req.pdid = protection_dom_id; 529 req.doorbell_pageid = doorbell_pg_id; 530 531 err = mana_send_request(apc->ac, &req, sizeof(req), &resp, 532 sizeof(resp)); 533 if (err) { 534 netdev_err(apc->ndev, "Failed to configure vPort: %d\n", err); 535 goto out; 536 } 537 538 err = mana_verify_resp_hdr(&resp.hdr, MANA_CONFIG_VPORT_TX, 539 sizeof(resp)); 540 if (err || resp.hdr.status) { 541 netdev_err(apc->ndev, "Failed to configure vPort: %d, 0x%x\n", 542 err, resp.hdr.status); 543 if (!err) 544 err = -EPROTO; 545 546 goto out; 547 } 548 549 apc->tx_shortform_allowed = resp.short_form_allowed; 550 apc->tx_vp_offset = resp.tx_vport_offset; 551 out: 552 return err; 553 } 554 555 static int mana_cfg_vport_steering(struct mana_port_context *apc, 556 enum TRI_STATE rx, 557 bool update_default_rxobj, bool update_key, 558 bool update_tab) 559 { 560 u16 num_entries = MANA_INDIRECT_TABLE_SIZE; 561 struct mana_cfg_rx_steer_req *req = NULL; 562 struct mana_cfg_rx_steer_resp resp = {}; 563 struct net_device *ndev = apc->ndev; 564 mana_handle_t *req_indir_tab; 565 u32 req_buf_size; 566 int err; 567 568 req_buf_size = sizeof(*req) + sizeof(mana_handle_t) * num_entries; 569 req = kzalloc(req_buf_size, GFP_KERNEL); 570 if (!req) 571 return -ENOMEM; 572 573 mana_gd_init_req_hdr(&req->hdr, MANA_CONFIG_VPORT_RX, req_buf_size, 574 sizeof(resp)); 575 576 req->vport = apc->port_handle; 577 req->num_indir_entries = num_entries; 578 req->indir_tab_offset = sizeof(*req); 579 req->rx_enable = rx; 580 req->rss_enable = apc->rss_state; 581 req->update_default_rxobj = update_default_rxobj; 582 req->update_hashkey = update_key; 583 req->update_indir_tab = update_tab; 584 req->default_rxobj = apc->default_rxobj; 585 586 if (update_key) 587 memcpy(&req->hashkey, apc->hashkey, MANA_HASH_KEY_SIZE); 588 589 if (update_tab) { 590 req_indir_tab = (mana_handle_t *)(req + 1); 591 memcpy(req_indir_tab, apc->rxobj_table, 592 req->num_indir_entries * sizeof(mana_handle_t)); 593 } 594 595 err = mana_send_request(apc->ac, req, req_buf_size, &resp, 596 sizeof(resp)); 597 if (err) { 598 netdev_err(ndev, "Failed to configure vPort RX: %d\n", err); 599 goto out; 600 } 601 602 err = mana_verify_resp_hdr(&resp.hdr, MANA_CONFIG_VPORT_RX, 603 sizeof(resp)); 604 if (err) { 605 netdev_err(ndev, "vPort RX configuration failed: %d\n", err); 606 goto out; 607 } 608 609 if (resp.hdr.status) { 610 netdev_err(ndev, "vPort RX configuration failed: 0x%x\n", 611 resp.hdr.status); 612 err = -EPROTO; 613 } 614 out: 615 kfree(req); 616 return err; 617 } 618 619 static int mana_create_wq_obj(struct mana_port_context *apc, 620 mana_handle_t vport, 621 u32 wq_type, struct mana_obj_spec *wq_spec, 622 struct mana_obj_spec *cq_spec, 623 mana_handle_t *wq_obj) 624 { 625 struct mana_create_wqobj_resp resp = {}; 626 struct mana_create_wqobj_req req = {}; 627 struct net_device *ndev = apc->ndev; 628 int err; 629 630 mana_gd_init_req_hdr(&req.hdr, MANA_CREATE_WQ_OBJ, 631 sizeof(req), sizeof(resp)); 632 req.vport = vport; 633 req.wq_type = wq_type; 634 req.wq_gdma_region = wq_spec->gdma_region; 635 req.cq_gdma_region = cq_spec->gdma_region; 636 req.wq_size = wq_spec->queue_size; 637 req.cq_size = cq_spec->queue_size; 638 req.cq_moderation_ctx_id = cq_spec->modr_ctx_id; 639 req.cq_parent_qid = cq_spec->attached_eq; 640 641 err = mana_send_request(apc->ac, &req, sizeof(req), &resp, 642 sizeof(resp)); 643 if (err) { 644 netdev_err(ndev, "Failed to create WQ object: %d\n", err); 645 goto out; 646 } 647 648 err = mana_verify_resp_hdr(&resp.hdr, MANA_CREATE_WQ_OBJ, 649 sizeof(resp)); 650 if (err || resp.hdr.status) { 651 netdev_err(ndev, "Failed to create WQ object: %d, 0x%x\n", err, 652 resp.hdr.status); 653 if (!err) 654 err = -EPROTO; 655 goto out; 656 } 657 658 if (resp.wq_obj == INVALID_MANA_HANDLE) { 659 netdev_err(ndev, "Got an invalid WQ object handle\n"); 660 err = -EPROTO; 661 goto out; 662 } 663 664 *wq_obj = resp.wq_obj; 665 wq_spec->queue_index = resp.wq_id; 666 cq_spec->queue_index = resp.cq_id; 667 668 return 0; 669 out: 670 return err; 671 } 672 673 static void mana_destroy_wq_obj(struct mana_port_context *apc, u32 wq_type, 674 mana_handle_t wq_obj) 675 { 676 struct mana_destroy_wqobj_resp resp = {}; 677 struct mana_destroy_wqobj_req req = {}; 678 struct net_device *ndev = apc->ndev; 679 int err; 680 681 mana_gd_init_req_hdr(&req.hdr, MANA_DESTROY_WQ_OBJ, 682 sizeof(req), sizeof(resp)); 683 req.wq_type = wq_type; 684 req.wq_obj_handle = wq_obj; 685 686 err = mana_send_request(apc->ac, &req, sizeof(req), &resp, 687 sizeof(resp)); 688 if (err) { 689 netdev_err(ndev, "Failed to destroy WQ object: %d\n", err); 690 return; 691 } 692 693 err = mana_verify_resp_hdr(&resp.hdr, MANA_DESTROY_WQ_OBJ, 694 sizeof(resp)); 695 if (err || resp.hdr.status) 696 netdev_err(ndev, "Failed to destroy WQ object: %d, 0x%x\n", err, 697 resp.hdr.status); 698 } 699 700 static void mana_destroy_eq(struct mana_context *ac) 701 { 702 struct gdma_context *gc = ac->gdma_dev->gdma_context; 703 struct gdma_queue *eq; 704 int i; 705 706 if (!ac->eqs) 707 return; 708 709 for (i = 0; i < gc->max_num_queues; i++) { 710 eq = ac->eqs[i].eq; 711 if (!eq) 712 continue; 713 714 mana_gd_destroy_queue(gc, eq); 715 } 716 717 kfree(ac->eqs); 718 ac->eqs = NULL; 719 } 720 721 static int mana_create_eq(struct mana_context *ac) 722 { 723 struct gdma_dev *gd = ac->gdma_dev; 724 struct gdma_context *gc = gd->gdma_context; 725 struct gdma_queue_spec spec = {}; 726 int err; 727 int i; 728 729 ac->eqs = kcalloc(gc->max_num_queues, sizeof(struct mana_eq), 730 GFP_KERNEL); 731 if (!ac->eqs) 732 return -ENOMEM; 733 734 spec.type = GDMA_EQ; 735 spec.monitor_avl_buf = false; 736 spec.queue_size = EQ_SIZE; 737 spec.eq.callback = NULL; 738 spec.eq.context = ac->eqs; 739 spec.eq.log2_throttle_limit = LOG2_EQ_THROTTLE; 740 741 for (i = 0; i < gc->max_num_queues; i++) { 742 err = mana_gd_create_mana_eq(gd, &spec, &ac->eqs[i].eq); 743 if (err) 744 goto out; 745 } 746 747 return 0; 748 out: 749 mana_destroy_eq(ac); 750 return err; 751 } 752 753 static int mana_move_wq_tail(struct gdma_queue *wq, u32 num_units) 754 { 755 u32 used_space_old; 756 u32 used_space_new; 757 758 used_space_old = wq->head - wq->tail; 759 used_space_new = wq->head - (wq->tail + num_units); 760 761 if (WARN_ON_ONCE(used_space_new > used_space_old)) 762 return -ERANGE; 763 764 wq->tail += num_units; 765 return 0; 766 } 767 768 static void mana_unmap_skb(struct sk_buff *skb, struct mana_port_context *apc) 769 { 770 struct mana_skb_head *ash = (struct mana_skb_head *)skb->head; 771 struct gdma_context *gc = apc->ac->gdma_dev->gdma_context; 772 struct device *dev = gc->dev; 773 int i; 774 775 dma_unmap_single(dev, ash->dma_handle[0], ash->size[0], DMA_TO_DEVICE); 776 777 for (i = 1; i < skb_shinfo(skb)->nr_frags + 1; i++) 778 dma_unmap_page(dev, ash->dma_handle[i], ash->size[i], 779 DMA_TO_DEVICE); 780 } 781 782 static void mana_poll_tx_cq(struct mana_cq *cq) 783 { 784 struct gdma_comp *completions = cq->gdma_comp_buf; 785 struct gdma_posted_wqe_info *wqe_info; 786 unsigned int pkt_transmitted = 0; 787 unsigned int wqe_unit_cnt = 0; 788 struct mana_txq *txq = cq->txq; 789 struct mana_port_context *apc; 790 struct netdev_queue *net_txq; 791 struct gdma_queue *gdma_wq; 792 unsigned int avail_space; 793 struct net_device *ndev; 794 struct sk_buff *skb; 795 bool txq_stopped; 796 int comp_read; 797 int i; 798 799 ndev = txq->ndev; 800 apc = netdev_priv(ndev); 801 802 comp_read = mana_gd_poll_cq(cq->gdma_cq, completions, 803 CQE_POLLING_BUFFER); 804 805 if (comp_read < 1) 806 return; 807 808 for (i = 0; i < comp_read; i++) { 809 struct mana_tx_comp_oob *cqe_oob; 810 811 if (WARN_ON_ONCE(!completions[i].is_sq)) 812 return; 813 814 cqe_oob = (struct mana_tx_comp_oob *)completions[i].cqe_data; 815 if (WARN_ON_ONCE(cqe_oob->cqe_hdr.client_type != 816 MANA_CQE_COMPLETION)) 817 return; 818 819 switch (cqe_oob->cqe_hdr.cqe_type) { 820 case CQE_TX_OKAY: 821 break; 822 823 case CQE_TX_SA_DROP: 824 case CQE_TX_MTU_DROP: 825 case CQE_TX_INVALID_OOB: 826 case CQE_TX_INVALID_ETH_TYPE: 827 case CQE_TX_HDR_PROCESSING_ERROR: 828 case CQE_TX_VF_DISABLED: 829 case CQE_TX_VPORT_IDX_OUT_OF_RANGE: 830 case CQE_TX_VPORT_DISABLED: 831 case CQE_TX_VLAN_TAGGING_VIOLATION: 832 WARN_ONCE(1, "TX: CQE error %d: ignored.\n", 833 cqe_oob->cqe_hdr.cqe_type); 834 break; 835 836 default: 837 /* If the CQE type is unexpected, log an error, assert, 838 * and go through the error path. 839 */ 840 WARN_ONCE(1, "TX: Unexpected CQE type %d: HW BUG?\n", 841 cqe_oob->cqe_hdr.cqe_type); 842 return; 843 } 844 845 if (WARN_ON_ONCE(txq->gdma_txq_id != completions[i].wq_num)) 846 return; 847 848 skb = skb_dequeue(&txq->pending_skbs); 849 if (WARN_ON_ONCE(!skb)) 850 return; 851 852 wqe_info = (struct gdma_posted_wqe_info *)skb->cb; 853 wqe_unit_cnt += wqe_info->wqe_size_in_bu; 854 855 mana_unmap_skb(skb, apc); 856 857 napi_consume_skb(skb, cq->budget); 858 859 pkt_transmitted++; 860 } 861 862 if (WARN_ON_ONCE(wqe_unit_cnt == 0)) 863 return; 864 865 mana_move_wq_tail(txq->gdma_sq, wqe_unit_cnt); 866 867 gdma_wq = txq->gdma_sq; 868 avail_space = mana_gd_wq_avail_space(gdma_wq); 869 870 /* Ensure tail updated before checking q stop */ 871 smp_mb(); 872 873 net_txq = txq->net_txq; 874 txq_stopped = netif_tx_queue_stopped(net_txq); 875 876 /* Ensure checking txq_stopped before apc->port_is_up. */ 877 smp_rmb(); 878 879 if (txq_stopped && apc->port_is_up && avail_space >= MAX_TX_WQE_SIZE) { 880 netif_tx_wake_queue(net_txq); 881 apc->eth_stats.wake_queue++; 882 } 883 884 if (atomic_sub_return(pkt_transmitted, &txq->pending_sends) < 0) 885 WARN_ON_ONCE(1); 886 887 cq->work_done = pkt_transmitted; 888 } 889 890 static void mana_post_pkt_rxq(struct mana_rxq *rxq) 891 { 892 struct mana_recv_buf_oob *recv_buf_oob; 893 u32 curr_index; 894 int err; 895 896 curr_index = rxq->buf_index++; 897 if (rxq->buf_index == rxq->num_rx_buf) 898 rxq->buf_index = 0; 899 900 recv_buf_oob = &rxq->rx_oobs[curr_index]; 901 902 err = mana_gd_post_and_ring(rxq->gdma_rq, &recv_buf_oob->wqe_req, 903 &recv_buf_oob->wqe_inf); 904 if (WARN_ON_ONCE(err)) 905 return; 906 907 WARN_ON_ONCE(recv_buf_oob->wqe_inf.wqe_size_in_bu != 1); 908 } 909 910 static struct sk_buff *mana_build_skb(void *buf_va, uint pkt_len, 911 struct xdp_buff *xdp) 912 { 913 struct sk_buff *skb = build_skb(buf_va, PAGE_SIZE); 914 915 if (!skb) 916 return NULL; 917 918 if (xdp->data_hard_start) { 919 skb_reserve(skb, xdp->data - xdp->data_hard_start); 920 skb_put(skb, xdp->data_end - xdp->data); 921 } else { 922 skb_reserve(skb, XDP_PACKET_HEADROOM); 923 skb_put(skb, pkt_len); 924 } 925 926 return skb; 927 } 928 929 static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe, 930 struct mana_rxq *rxq) 931 { 932 struct mana_stats *rx_stats = &rxq->stats; 933 struct net_device *ndev = rxq->ndev; 934 uint pkt_len = cqe->ppi[0].pkt_len; 935 u16 rxq_idx = rxq->rxq_idx; 936 struct napi_struct *napi; 937 struct xdp_buff xdp = {}; 938 struct sk_buff *skb; 939 u32 hash_value; 940 u32 act; 941 942 rxq->rx_cq.work_done++; 943 napi = &rxq->rx_cq.napi; 944 945 if (!buf_va) { 946 ++ndev->stats.rx_dropped; 947 return; 948 } 949 950 act = mana_run_xdp(ndev, rxq, &xdp, buf_va, pkt_len); 951 952 if (act != XDP_PASS && act != XDP_TX) 953 goto drop; 954 955 skb = mana_build_skb(buf_va, pkt_len, &xdp); 956 957 if (!skb) 958 goto drop; 959 960 skb->dev = napi->dev; 961 962 skb->protocol = eth_type_trans(skb, ndev); 963 skb_checksum_none_assert(skb); 964 skb_record_rx_queue(skb, rxq_idx); 965 966 if ((ndev->features & NETIF_F_RXCSUM) && cqe->rx_iphdr_csum_succeed) { 967 if (cqe->rx_tcp_csum_succeed || cqe->rx_udp_csum_succeed) 968 skb->ip_summed = CHECKSUM_UNNECESSARY; 969 } 970 971 if (cqe->rx_hashtype != 0 && (ndev->features & NETIF_F_RXHASH)) { 972 hash_value = cqe->ppi[0].pkt_hash; 973 974 if (cqe->rx_hashtype & MANA_HASH_L4) 975 skb_set_hash(skb, hash_value, PKT_HASH_TYPE_L4); 976 else 977 skb_set_hash(skb, hash_value, PKT_HASH_TYPE_L3); 978 } 979 980 if (act == XDP_TX) { 981 skb_set_queue_mapping(skb, rxq_idx); 982 mana_xdp_tx(skb, ndev); 983 return; 984 } 985 986 napi_gro_receive(napi, skb); 987 988 u64_stats_update_begin(&rx_stats->syncp); 989 rx_stats->packets++; 990 rx_stats->bytes += pkt_len; 991 u64_stats_update_end(&rx_stats->syncp); 992 return; 993 994 drop: 995 free_page((unsigned long)buf_va); 996 ++ndev->stats.rx_dropped; 997 return; 998 } 999 1000 static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq, 1001 struct gdma_comp *cqe) 1002 { 1003 struct mana_rxcomp_oob *oob = (struct mana_rxcomp_oob *)cqe->cqe_data; 1004 struct gdma_context *gc = rxq->gdma_rq->gdma_dev->gdma_context; 1005 struct net_device *ndev = rxq->ndev; 1006 struct mana_recv_buf_oob *rxbuf_oob; 1007 struct device *dev = gc->dev; 1008 void *new_buf, *old_buf; 1009 struct page *new_page; 1010 u32 curr, pktlen; 1011 dma_addr_t da; 1012 1013 switch (oob->cqe_hdr.cqe_type) { 1014 case CQE_RX_OKAY: 1015 break; 1016 1017 case CQE_RX_TRUNCATED: 1018 netdev_err(ndev, "Dropped a truncated packet\n"); 1019 return; 1020 1021 case CQE_RX_COALESCED_4: 1022 netdev_err(ndev, "RX coalescing is unsupported\n"); 1023 return; 1024 1025 case CQE_RX_OBJECT_FENCE: 1026 netdev_err(ndev, "RX Fencing is unsupported\n"); 1027 return; 1028 1029 default: 1030 netdev_err(ndev, "Unknown RX CQE type = %d\n", 1031 oob->cqe_hdr.cqe_type); 1032 return; 1033 } 1034 1035 if (oob->cqe_hdr.cqe_type != CQE_RX_OKAY) 1036 return; 1037 1038 pktlen = oob->ppi[0].pkt_len; 1039 1040 if (pktlen == 0) { 1041 /* data packets should never have packetlength of zero */ 1042 netdev_err(ndev, "RX pkt len=0, rq=%u, cq=%u, rxobj=0x%llx\n", 1043 rxq->gdma_id, cq->gdma_id, rxq->rxobj); 1044 return; 1045 } 1046 1047 curr = rxq->buf_index; 1048 rxbuf_oob = &rxq->rx_oobs[curr]; 1049 WARN_ON_ONCE(rxbuf_oob->wqe_inf.wqe_size_in_bu != 1); 1050 1051 new_page = alloc_page(GFP_ATOMIC); 1052 1053 if (new_page) { 1054 da = dma_map_page(dev, new_page, XDP_PACKET_HEADROOM, rxq->datasize, 1055 DMA_FROM_DEVICE); 1056 1057 if (dma_mapping_error(dev, da)) { 1058 __free_page(new_page); 1059 new_page = NULL; 1060 } 1061 } 1062 1063 new_buf = new_page ? page_to_virt(new_page) : NULL; 1064 1065 if (new_buf) { 1066 dma_unmap_page(dev, rxbuf_oob->buf_dma_addr, rxq->datasize, 1067 DMA_FROM_DEVICE); 1068 1069 old_buf = rxbuf_oob->buf_va; 1070 1071 /* refresh the rxbuf_oob with the new page */ 1072 rxbuf_oob->buf_va = new_buf; 1073 rxbuf_oob->buf_dma_addr = da; 1074 rxbuf_oob->sgl[0].address = rxbuf_oob->buf_dma_addr; 1075 } else { 1076 old_buf = NULL; /* drop the packet if no memory */ 1077 } 1078 1079 mana_rx_skb(old_buf, oob, rxq); 1080 1081 mana_move_wq_tail(rxq->gdma_rq, rxbuf_oob->wqe_inf.wqe_size_in_bu); 1082 1083 mana_post_pkt_rxq(rxq); 1084 } 1085 1086 static void mana_poll_rx_cq(struct mana_cq *cq) 1087 { 1088 struct gdma_comp *comp = cq->gdma_comp_buf; 1089 int comp_read, i; 1090 1091 comp_read = mana_gd_poll_cq(cq->gdma_cq, comp, CQE_POLLING_BUFFER); 1092 WARN_ON_ONCE(comp_read > CQE_POLLING_BUFFER); 1093 1094 for (i = 0; i < comp_read; i++) { 1095 if (WARN_ON_ONCE(comp[i].is_sq)) 1096 return; 1097 1098 /* verify recv cqe references the right rxq */ 1099 if (WARN_ON_ONCE(comp[i].wq_num != cq->rxq->gdma_id)) 1100 return; 1101 1102 mana_process_rx_cqe(cq->rxq, cq, &comp[i]); 1103 } 1104 } 1105 1106 static void mana_cq_handler(void *context, struct gdma_queue *gdma_queue) 1107 { 1108 struct mana_cq *cq = context; 1109 u8 arm_bit; 1110 1111 WARN_ON_ONCE(cq->gdma_cq != gdma_queue); 1112 1113 if (cq->type == MANA_CQ_TYPE_RX) 1114 mana_poll_rx_cq(cq); 1115 else 1116 mana_poll_tx_cq(cq); 1117 1118 if (cq->work_done < cq->budget && 1119 napi_complete_done(&cq->napi, cq->work_done)) { 1120 arm_bit = SET_ARM_BIT; 1121 } else { 1122 arm_bit = 0; 1123 } 1124 1125 mana_gd_ring_cq(gdma_queue, arm_bit); 1126 } 1127 1128 static int mana_poll(struct napi_struct *napi, int budget) 1129 { 1130 struct mana_cq *cq = container_of(napi, struct mana_cq, napi); 1131 1132 cq->work_done = 0; 1133 cq->budget = budget; 1134 1135 mana_cq_handler(cq, cq->gdma_cq); 1136 1137 return min(cq->work_done, budget); 1138 } 1139 1140 static void mana_schedule_napi(void *context, struct gdma_queue *gdma_queue) 1141 { 1142 struct mana_cq *cq = context; 1143 1144 napi_schedule_irqoff(&cq->napi); 1145 } 1146 1147 static void mana_deinit_cq(struct mana_port_context *apc, struct mana_cq *cq) 1148 { 1149 struct gdma_dev *gd = apc->ac->gdma_dev; 1150 1151 if (!cq->gdma_cq) 1152 return; 1153 1154 mana_gd_destroy_queue(gd->gdma_context, cq->gdma_cq); 1155 } 1156 1157 static void mana_deinit_txq(struct mana_port_context *apc, struct mana_txq *txq) 1158 { 1159 struct gdma_dev *gd = apc->ac->gdma_dev; 1160 1161 if (!txq->gdma_sq) 1162 return; 1163 1164 mana_gd_destroy_queue(gd->gdma_context, txq->gdma_sq); 1165 } 1166 1167 static void mana_destroy_txq(struct mana_port_context *apc) 1168 { 1169 struct napi_struct *napi; 1170 int i; 1171 1172 if (!apc->tx_qp) 1173 return; 1174 1175 for (i = 0; i < apc->num_queues; i++) { 1176 napi = &apc->tx_qp[i].tx_cq.napi; 1177 napi_synchronize(napi); 1178 napi_disable(napi); 1179 netif_napi_del(napi); 1180 1181 mana_destroy_wq_obj(apc, GDMA_SQ, apc->tx_qp[i].tx_object); 1182 1183 mana_deinit_cq(apc, &apc->tx_qp[i].tx_cq); 1184 1185 mana_deinit_txq(apc, &apc->tx_qp[i].txq); 1186 } 1187 1188 kfree(apc->tx_qp); 1189 apc->tx_qp = NULL; 1190 } 1191 1192 static int mana_create_txq(struct mana_port_context *apc, 1193 struct net_device *net) 1194 { 1195 struct mana_context *ac = apc->ac; 1196 struct gdma_dev *gd = ac->gdma_dev; 1197 struct mana_obj_spec wq_spec; 1198 struct mana_obj_spec cq_spec; 1199 struct gdma_queue_spec spec; 1200 struct gdma_context *gc; 1201 struct mana_txq *txq; 1202 struct mana_cq *cq; 1203 u32 txq_size; 1204 u32 cq_size; 1205 int err; 1206 int i; 1207 1208 apc->tx_qp = kcalloc(apc->num_queues, sizeof(struct mana_tx_qp), 1209 GFP_KERNEL); 1210 if (!apc->tx_qp) 1211 return -ENOMEM; 1212 1213 /* The minimum size of the WQE is 32 bytes, hence 1214 * MAX_SEND_BUFFERS_PER_QUEUE represents the maximum number of WQEs 1215 * the SQ can store. This value is then used to size other queues 1216 * to prevent overflow. 1217 */ 1218 txq_size = MAX_SEND_BUFFERS_PER_QUEUE * 32; 1219 BUILD_BUG_ON(!PAGE_ALIGNED(txq_size)); 1220 1221 cq_size = MAX_SEND_BUFFERS_PER_QUEUE * COMP_ENTRY_SIZE; 1222 cq_size = PAGE_ALIGN(cq_size); 1223 1224 gc = gd->gdma_context; 1225 1226 for (i = 0; i < apc->num_queues; i++) { 1227 apc->tx_qp[i].tx_object = INVALID_MANA_HANDLE; 1228 1229 /* Create SQ */ 1230 txq = &apc->tx_qp[i].txq; 1231 1232 u64_stats_init(&txq->stats.syncp); 1233 txq->ndev = net; 1234 txq->net_txq = netdev_get_tx_queue(net, i); 1235 txq->vp_offset = apc->tx_vp_offset; 1236 skb_queue_head_init(&txq->pending_skbs); 1237 1238 memset(&spec, 0, sizeof(spec)); 1239 spec.type = GDMA_SQ; 1240 spec.monitor_avl_buf = true; 1241 spec.queue_size = txq_size; 1242 err = mana_gd_create_mana_wq_cq(gd, &spec, &txq->gdma_sq); 1243 if (err) 1244 goto out; 1245 1246 /* Create SQ's CQ */ 1247 cq = &apc->tx_qp[i].tx_cq; 1248 cq->type = MANA_CQ_TYPE_TX; 1249 1250 cq->txq = txq; 1251 1252 memset(&spec, 0, sizeof(spec)); 1253 spec.type = GDMA_CQ; 1254 spec.monitor_avl_buf = false; 1255 spec.queue_size = cq_size; 1256 spec.cq.callback = mana_schedule_napi; 1257 spec.cq.parent_eq = ac->eqs[i].eq; 1258 spec.cq.context = cq; 1259 err = mana_gd_create_mana_wq_cq(gd, &spec, &cq->gdma_cq); 1260 if (err) 1261 goto out; 1262 1263 memset(&wq_spec, 0, sizeof(wq_spec)); 1264 memset(&cq_spec, 0, sizeof(cq_spec)); 1265 1266 wq_spec.gdma_region = txq->gdma_sq->mem_info.gdma_region; 1267 wq_spec.queue_size = txq->gdma_sq->queue_size; 1268 1269 cq_spec.gdma_region = cq->gdma_cq->mem_info.gdma_region; 1270 cq_spec.queue_size = cq->gdma_cq->queue_size; 1271 cq_spec.modr_ctx_id = 0; 1272 cq_spec.attached_eq = cq->gdma_cq->cq.parent->id; 1273 1274 err = mana_create_wq_obj(apc, apc->port_handle, GDMA_SQ, 1275 &wq_spec, &cq_spec, 1276 &apc->tx_qp[i].tx_object); 1277 1278 if (err) 1279 goto out; 1280 1281 txq->gdma_sq->id = wq_spec.queue_index; 1282 cq->gdma_cq->id = cq_spec.queue_index; 1283 1284 txq->gdma_sq->mem_info.gdma_region = GDMA_INVALID_DMA_REGION; 1285 cq->gdma_cq->mem_info.gdma_region = GDMA_INVALID_DMA_REGION; 1286 1287 txq->gdma_txq_id = txq->gdma_sq->id; 1288 1289 cq->gdma_id = cq->gdma_cq->id; 1290 1291 if (WARN_ON(cq->gdma_id >= gc->max_num_cqs)) { 1292 err = -EINVAL; 1293 goto out; 1294 } 1295 1296 gc->cq_table[cq->gdma_id] = cq->gdma_cq; 1297 1298 netif_tx_napi_add(net, &cq->napi, mana_poll, NAPI_POLL_WEIGHT); 1299 napi_enable(&cq->napi); 1300 1301 mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT); 1302 } 1303 1304 return 0; 1305 out: 1306 mana_destroy_txq(apc); 1307 return err; 1308 } 1309 1310 static void mana_destroy_rxq(struct mana_port_context *apc, 1311 struct mana_rxq *rxq, bool validate_state) 1312 1313 { 1314 struct gdma_context *gc = apc->ac->gdma_dev->gdma_context; 1315 struct mana_recv_buf_oob *rx_oob; 1316 struct device *dev = gc->dev; 1317 struct napi_struct *napi; 1318 int i; 1319 1320 if (!rxq) 1321 return; 1322 1323 napi = &rxq->rx_cq.napi; 1324 1325 if (validate_state) 1326 napi_synchronize(napi); 1327 1328 napi_disable(napi); 1329 1330 xdp_rxq_info_unreg(&rxq->xdp_rxq); 1331 1332 netif_napi_del(napi); 1333 1334 mana_destroy_wq_obj(apc, GDMA_RQ, rxq->rxobj); 1335 1336 mana_deinit_cq(apc, &rxq->rx_cq); 1337 1338 for (i = 0; i < rxq->num_rx_buf; i++) { 1339 rx_oob = &rxq->rx_oobs[i]; 1340 1341 if (!rx_oob->buf_va) 1342 continue; 1343 1344 dma_unmap_page(dev, rx_oob->buf_dma_addr, rxq->datasize, 1345 DMA_FROM_DEVICE); 1346 1347 free_page((unsigned long)rx_oob->buf_va); 1348 rx_oob->buf_va = NULL; 1349 } 1350 1351 if (rxq->gdma_rq) 1352 mana_gd_destroy_queue(gc, rxq->gdma_rq); 1353 1354 kfree(rxq); 1355 } 1356 1357 #define MANA_WQE_HEADER_SIZE 16 1358 #define MANA_WQE_SGE_SIZE 16 1359 1360 static int mana_alloc_rx_wqe(struct mana_port_context *apc, 1361 struct mana_rxq *rxq, u32 *rxq_size, u32 *cq_size) 1362 { 1363 struct gdma_context *gc = apc->ac->gdma_dev->gdma_context; 1364 struct mana_recv_buf_oob *rx_oob; 1365 struct device *dev = gc->dev; 1366 struct page *page; 1367 dma_addr_t da; 1368 u32 buf_idx; 1369 1370 WARN_ON(rxq->datasize == 0 || rxq->datasize > PAGE_SIZE); 1371 1372 *rxq_size = 0; 1373 *cq_size = 0; 1374 1375 for (buf_idx = 0; buf_idx < rxq->num_rx_buf; buf_idx++) { 1376 rx_oob = &rxq->rx_oobs[buf_idx]; 1377 memset(rx_oob, 0, sizeof(*rx_oob)); 1378 1379 page = alloc_page(GFP_KERNEL); 1380 if (!page) 1381 return -ENOMEM; 1382 1383 da = dma_map_page(dev, page, XDP_PACKET_HEADROOM, rxq->datasize, 1384 DMA_FROM_DEVICE); 1385 1386 if (dma_mapping_error(dev, da)) { 1387 __free_page(page); 1388 return -ENOMEM; 1389 } 1390 1391 rx_oob->buf_va = page_to_virt(page); 1392 rx_oob->buf_dma_addr = da; 1393 1394 rx_oob->num_sge = 1; 1395 rx_oob->sgl[0].address = rx_oob->buf_dma_addr; 1396 rx_oob->sgl[0].size = rxq->datasize; 1397 rx_oob->sgl[0].mem_key = apc->ac->gdma_dev->gpa_mkey; 1398 1399 rx_oob->wqe_req.sgl = rx_oob->sgl; 1400 rx_oob->wqe_req.num_sge = rx_oob->num_sge; 1401 rx_oob->wqe_req.inline_oob_size = 0; 1402 rx_oob->wqe_req.inline_oob_data = NULL; 1403 rx_oob->wqe_req.flags = 0; 1404 rx_oob->wqe_req.client_data_unit = 0; 1405 1406 *rxq_size += ALIGN(MANA_WQE_HEADER_SIZE + 1407 MANA_WQE_SGE_SIZE * rx_oob->num_sge, 32); 1408 *cq_size += COMP_ENTRY_SIZE; 1409 } 1410 1411 return 0; 1412 } 1413 1414 static int mana_push_wqe(struct mana_rxq *rxq) 1415 { 1416 struct mana_recv_buf_oob *rx_oob; 1417 u32 buf_idx; 1418 int err; 1419 1420 for (buf_idx = 0; buf_idx < rxq->num_rx_buf; buf_idx++) { 1421 rx_oob = &rxq->rx_oobs[buf_idx]; 1422 1423 err = mana_gd_post_and_ring(rxq->gdma_rq, &rx_oob->wqe_req, 1424 &rx_oob->wqe_inf); 1425 if (err) 1426 return -ENOSPC; 1427 } 1428 1429 return 0; 1430 } 1431 1432 static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc, 1433 u32 rxq_idx, struct mana_eq *eq, 1434 struct net_device *ndev) 1435 { 1436 struct gdma_dev *gd = apc->ac->gdma_dev; 1437 struct mana_obj_spec wq_spec; 1438 struct mana_obj_spec cq_spec; 1439 struct gdma_queue_spec spec; 1440 struct mana_cq *cq = NULL; 1441 struct gdma_context *gc; 1442 u32 cq_size, rq_size; 1443 struct mana_rxq *rxq; 1444 int err; 1445 1446 gc = gd->gdma_context; 1447 1448 rxq = kzalloc(struct_size(rxq, rx_oobs, RX_BUFFERS_PER_QUEUE), 1449 GFP_KERNEL); 1450 if (!rxq) 1451 return NULL; 1452 1453 rxq->ndev = ndev; 1454 rxq->num_rx_buf = RX_BUFFERS_PER_QUEUE; 1455 rxq->rxq_idx = rxq_idx; 1456 rxq->datasize = ALIGN(MAX_FRAME_SIZE, 64); 1457 rxq->rxobj = INVALID_MANA_HANDLE; 1458 1459 err = mana_alloc_rx_wqe(apc, rxq, &rq_size, &cq_size); 1460 if (err) 1461 goto out; 1462 1463 rq_size = PAGE_ALIGN(rq_size); 1464 cq_size = PAGE_ALIGN(cq_size); 1465 1466 /* Create RQ */ 1467 memset(&spec, 0, sizeof(spec)); 1468 spec.type = GDMA_RQ; 1469 spec.monitor_avl_buf = true; 1470 spec.queue_size = rq_size; 1471 err = mana_gd_create_mana_wq_cq(gd, &spec, &rxq->gdma_rq); 1472 if (err) 1473 goto out; 1474 1475 /* Create RQ's CQ */ 1476 cq = &rxq->rx_cq; 1477 cq->type = MANA_CQ_TYPE_RX; 1478 cq->rxq = rxq; 1479 1480 memset(&spec, 0, sizeof(spec)); 1481 spec.type = GDMA_CQ; 1482 spec.monitor_avl_buf = false; 1483 spec.queue_size = cq_size; 1484 spec.cq.callback = mana_schedule_napi; 1485 spec.cq.parent_eq = eq->eq; 1486 spec.cq.context = cq; 1487 err = mana_gd_create_mana_wq_cq(gd, &spec, &cq->gdma_cq); 1488 if (err) 1489 goto out; 1490 1491 memset(&wq_spec, 0, sizeof(wq_spec)); 1492 memset(&cq_spec, 0, sizeof(cq_spec)); 1493 wq_spec.gdma_region = rxq->gdma_rq->mem_info.gdma_region; 1494 wq_spec.queue_size = rxq->gdma_rq->queue_size; 1495 1496 cq_spec.gdma_region = cq->gdma_cq->mem_info.gdma_region; 1497 cq_spec.queue_size = cq->gdma_cq->queue_size; 1498 cq_spec.modr_ctx_id = 0; 1499 cq_spec.attached_eq = cq->gdma_cq->cq.parent->id; 1500 1501 err = mana_create_wq_obj(apc, apc->port_handle, GDMA_RQ, 1502 &wq_spec, &cq_spec, &rxq->rxobj); 1503 if (err) 1504 goto out; 1505 1506 rxq->gdma_rq->id = wq_spec.queue_index; 1507 cq->gdma_cq->id = cq_spec.queue_index; 1508 1509 rxq->gdma_rq->mem_info.gdma_region = GDMA_INVALID_DMA_REGION; 1510 cq->gdma_cq->mem_info.gdma_region = GDMA_INVALID_DMA_REGION; 1511 1512 rxq->gdma_id = rxq->gdma_rq->id; 1513 cq->gdma_id = cq->gdma_cq->id; 1514 1515 err = mana_push_wqe(rxq); 1516 if (err) 1517 goto out; 1518 1519 if (WARN_ON(cq->gdma_id >= gc->max_num_cqs)) { 1520 err = -EINVAL; 1521 goto out; 1522 } 1523 1524 gc->cq_table[cq->gdma_id] = cq->gdma_cq; 1525 1526 netif_napi_add(ndev, &cq->napi, mana_poll, 1); 1527 1528 WARN_ON(xdp_rxq_info_reg(&rxq->xdp_rxq, ndev, rxq_idx, 1529 cq->napi.napi_id)); 1530 WARN_ON(xdp_rxq_info_reg_mem_model(&rxq->xdp_rxq, 1531 MEM_TYPE_PAGE_SHARED, NULL)); 1532 1533 napi_enable(&cq->napi); 1534 1535 mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT); 1536 out: 1537 if (!err) 1538 return rxq; 1539 1540 netdev_err(ndev, "Failed to create RXQ: err = %d\n", err); 1541 1542 mana_destroy_rxq(apc, rxq, false); 1543 1544 if (cq) 1545 mana_deinit_cq(apc, cq); 1546 1547 return NULL; 1548 } 1549 1550 static int mana_add_rx_queues(struct mana_port_context *apc, 1551 struct net_device *ndev) 1552 { 1553 struct mana_context *ac = apc->ac; 1554 struct mana_rxq *rxq; 1555 int err = 0; 1556 int i; 1557 1558 for (i = 0; i < apc->num_queues; i++) { 1559 rxq = mana_create_rxq(apc, i, &ac->eqs[i], ndev); 1560 if (!rxq) { 1561 err = -ENOMEM; 1562 goto out; 1563 } 1564 1565 u64_stats_init(&rxq->stats.syncp); 1566 1567 apc->rxqs[i] = rxq; 1568 } 1569 1570 apc->default_rxobj = apc->rxqs[0]->rxobj; 1571 out: 1572 return err; 1573 } 1574 1575 static void mana_destroy_vport(struct mana_port_context *apc) 1576 { 1577 struct mana_rxq *rxq; 1578 u32 rxq_idx; 1579 1580 for (rxq_idx = 0; rxq_idx < apc->num_queues; rxq_idx++) { 1581 rxq = apc->rxqs[rxq_idx]; 1582 if (!rxq) 1583 continue; 1584 1585 mana_destroy_rxq(apc, rxq, true); 1586 apc->rxqs[rxq_idx] = NULL; 1587 } 1588 1589 mana_destroy_txq(apc); 1590 } 1591 1592 static int mana_create_vport(struct mana_port_context *apc, 1593 struct net_device *net) 1594 { 1595 struct gdma_dev *gd = apc->ac->gdma_dev; 1596 int err; 1597 1598 apc->default_rxobj = INVALID_MANA_HANDLE; 1599 1600 err = mana_cfg_vport(apc, gd->pdid, gd->doorbell); 1601 if (err) 1602 return err; 1603 1604 return mana_create_txq(apc, net); 1605 } 1606 1607 static void mana_rss_table_init(struct mana_port_context *apc) 1608 { 1609 int i; 1610 1611 for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) 1612 apc->indir_table[i] = 1613 ethtool_rxfh_indir_default(i, apc->num_queues); 1614 } 1615 1616 int mana_config_rss(struct mana_port_context *apc, enum TRI_STATE rx, 1617 bool update_hash, bool update_tab) 1618 { 1619 u32 queue_idx; 1620 int i; 1621 1622 if (update_tab) { 1623 for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) { 1624 queue_idx = apc->indir_table[i]; 1625 apc->rxobj_table[i] = apc->rxqs[queue_idx]->rxobj; 1626 } 1627 } 1628 1629 return mana_cfg_vport_steering(apc, rx, true, update_hash, update_tab); 1630 } 1631 1632 static int mana_init_port(struct net_device *ndev) 1633 { 1634 struct mana_port_context *apc = netdev_priv(ndev); 1635 u32 max_txq, max_rxq, max_queues; 1636 int port_idx = apc->port_idx; 1637 u32 num_indirect_entries; 1638 int err; 1639 1640 err = mana_init_port_context(apc); 1641 if (err) 1642 return err; 1643 1644 err = mana_query_vport_cfg(apc, port_idx, &max_txq, &max_rxq, 1645 &num_indirect_entries); 1646 if (err) { 1647 netdev_err(ndev, "Failed to query info for vPort %d\n", 1648 port_idx); 1649 goto reset_apc; 1650 } 1651 1652 max_queues = min_t(u32, max_txq, max_rxq); 1653 if (apc->max_queues > max_queues) 1654 apc->max_queues = max_queues; 1655 1656 if (apc->num_queues > apc->max_queues) 1657 apc->num_queues = apc->max_queues; 1658 1659 eth_hw_addr_set(ndev, apc->mac_addr); 1660 1661 return 0; 1662 1663 reset_apc: 1664 kfree(apc->rxqs); 1665 apc->rxqs = NULL; 1666 return err; 1667 } 1668 1669 int mana_alloc_queues(struct net_device *ndev) 1670 { 1671 struct mana_port_context *apc = netdev_priv(ndev); 1672 int err; 1673 1674 err = mana_create_vport(apc, ndev); 1675 if (err) 1676 return err; 1677 1678 err = netif_set_real_num_tx_queues(ndev, apc->num_queues); 1679 if (err) 1680 goto destroy_vport; 1681 1682 err = mana_add_rx_queues(apc, ndev); 1683 if (err) 1684 goto destroy_vport; 1685 1686 apc->rss_state = apc->num_queues > 1 ? TRI_STATE_TRUE : TRI_STATE_FALSE; 1687 1688 err = netif_set_real_num_rx_queues(ndev, apc->num_queues); 1689 if (err) 1690 goto destroy_vport; 1691 1692 mana_rss_table_init(apc); 1693 1694 err = mana_config_rss(apc, TRI_STATE_TRUE, true, true); 1695 if (err) 1696 goto destroy_vport; 1697 1698 mana_chn_setxdp(apc, mana_xdp_get(apc)); 1699 1700 return 0; 1701 1702 destroy_vport: 1703 mana_destroy_vport(apc); 1704 return err; 1705 } 1706 1707 int mana_attach(struct net_device *ndev) 1708 { 1709 struct mana_port_context *apc = netdev_priv(ndev); 1710 int err; 1711 1712 ASSERT_RTNL(); 1713 1714 err = mana_init_port(ndev); 1715 if (err) 1716 return err; 1717 1718 if (apc->port_st_save) { 1719 err = mana_alloc_queues(ndev); 1720 if (err) { 1721 mana_cleanup_port_context(apc); 1722 return err; 1723 } 1724 } 1725 1726 apc->port_is_up = apc->port_st_save; 1727 1728 /* Ensure port state updated before txq state */ 1729 smp_wmb(); 1730 1731 if (apc->port_is_up) 1732 netif_carrier_on(ndev); 1733 1734 netif_device_attach(ndev); 1735 1736 return 0; 1737 } 1738 1739 static int mana_dealloc_queues(struct net_device *ndev) 1740 { 1741 struct mana_port_context *apc = netdev_priv(ndev); 1742 struct mana_txq *txq; 1743 int i, err; 1744 1745 if (apc->port_is_up) 1746 return -EINVAL; 1747 1748 mana_chn_setxdp(apc, NULL); 1749 1750 /* No packet can be transmitted now since apc->port_is_up is false. 1751 * There is still a tiny chance that mana_poll_tx_cq() can re-enable 1752 * a txq because it may not timely see apc->port_is_up being cleared 1753 * to false, but it doesn't matter since mana_start_xmit() drops any 1754 * new packets due to apc->port_is_up being false. 1755 * 1756 * Drain all the in-flight TX packets 1757 */ 1758 for (i = 0; i < apc->num_queues; i++) { 1759 txq = &apc->tx_qp[i].txq; 1760 1761 while (atomic_read(&txq->pending_sends) > 0) 1762 usleep_range(1000, 2000); 1763 } 1764 1765 /* We're 100% sure the queues can no longer be woken up, because 1766 * we're sure now mana_poll_tx_cq() can't be running. 1767 */ 1768 1769 apc->rss_state = TRI_STATE_FALSE; 1770 err = mana_config_rss(apc, TRI_STATE_FALSE, false, false); 1771 if (err) { 1772 netdev_err(ndev, "Failed to disable vPort: %d\n", err); 1773 return err; 1774 } 1775 1776 /* TODO: Implement RX fencing */ 1777 ssleep(1); 1778 1779 mana_destroy_vport(apc); 1780 1781 return 0; 1782 } 1783 1784 int mana_detach(struct net_device *ndev, bool from_close) 1785 { 1786 struct mana_port_context *apc = netdev_priv(ndev); 1787 int err; 1788 1789 ASSERT_RTNL(); 1790 1791 apc->port_st_save = apc->port_is_up; 1792 apc->port_is_up = false; 1793 1794 /* Ensure port state updated before txq state */ 1795 smp_wmb(); 1796 1797 netif_tx_disable(ndev); 1798 netif_carrier_off(ndev); 1799 1800 if (apc->port_st_save) { 1801 err = mana_dealloc_queues(ndev); 1802 if (err) 1803 return err; 1804 } 1805 1806 if (!from_close) { 1807 netif_device_detach(ndev); 1808 mana_cleanup_port_context(apc); 1809 } 1810 1811 return 0; 1812 } 1813 1814 static int mana_probe_port(struct mana_context *ac, int port_idx, 1815 struct net_device **ndev_storage) 1816 { 1817 struct gdma_context *gc = ac->gdma_dev->gdma_context; 1818 struct mana_port_context *apc; 1819 struct net_device *ndev; 1820 int err; 1821 1822 ndev = alloc_etherdev_mq(sizeof(struct mana_port_context), 1823 gc->max_num_queues); 1824 if (!ndev) 1825 return -ENOMEM; 1826 1827 *ndev_storage = ndev; 1828 1829 apc = netdev_priv(ndev); 1830 apc->ac = ac; 1831 apc->ndev = ndev; 1832 apc->max_queues = gc->max_num_queues; 1833 apc->num_queues = gc->max_num_queues; 1834 apc->port_handle = INVALID_MANA_HANDLE; 1835 apc->port_idx = port_idx; 1836 1837 ndev->netdev_ops = &mana_devops; 1838 ndev->ethtool_ops = &mana_ethtool_ops; 1839 ndev->mtu = ETH_DATA_LEN; 1840 ndev->max_mtu = ndev->mtu; 1841 ndev->min_mtu = ndev->mtu; 1842 ndev->needed_headroom = MANA_HEADROOM; 1843 SET_NETDEV_DEV(ndev, gc->dev); 1844 1845 netif_carrier_off(ndev); 1846 1847 netdev_rss_key_fill(apc->hashkey, MANA_HASH_KEY_SIZE); 1848 1849 err = mana_init_port(ndev); 1850 if (err) 1851 goto free_net; 1852 1853 netdev_lockdep_set_classes(ndev); 1854 1855 ndev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; 1856 ndev->hw_features |= NETIF_F_RXCSUM; 1857 ndev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6; 1858 ndev->hw_features |= NETIF_F_RXHASH; 1859 ndev->features = ndev->hw_features; 1860 ndev->vlan_features = 0; 1861 1862 err = register_netdev(ndev); 1863 if (err) { 1864 netdev_err(ndev, "Unable to register netdev.\n"); 1865 goto reset_apc; 1866 } 1867 1868 return 0; 1869 1870 reset_apc: 1871 kfree(apc->rxqs); 1872 apc->rxqs = NULL; 1873 free_net: 1874 *ndev_storage = NULL; 1875 netdev_err(ndev, "Failed to probe vPort %d: %d\n", port_idx, err); 1876 free_netdev(ndev); 1877 return err; 1878 } 1879 1880 int mana_probe(struct gdma_dev *gd, bool resuming) 1881 { 1882 struct gdma_context *gc = gd->gdma_context; 1883 struct mana_context *ac = gd->driver_data; 1884 struct device *dev = gc->dev; 1885 u16 num_ports = 0; 1886 int err; 1887 int i; 1888 1889 dev_info(dev, 1890 "Microsoft Azure Network Adapter protocol version: %d.%d.%d\n", 1891 MANA_MAJOR_VERSION, MANA_MINOR_VERSION, MANA_MICRO_VERSION); 1892 1893 err = mana_gd_register_device(gd); 1894 if (err) 1895 return err; 1896 1897 if (!resuming) { 1898 ac = kzalloc(sizeof(*ac), GFP_KERNEL); 1899 if (!ac) 1900 return -ENOMEM; 1901 1902 ac->gdma_dev = gd; 1903 gd->driver_data = ac; 1904 } 1905 1906 err = mana_create_eq(ac); 1907 if (err) 1908 goto out; 1909 1910 err = mana_query_device_cfg(ac, MANA_MAJOR_VERSION, MANA_MINOR_VERSION, 1911 MANA_MICRO_VERSION, &num_ports); 1912 if (err) 1913 goto out; 1914 1915 if (!resuming) { 1916 ac->num_ports = num_ports; 1917 } else { 1918 if (ac->num_ports != num_ports) { 1919 dev_err(dev, "The number of vPorts changed: %d->%d\n", 1920 ac->num_ports, num_ports); 1921 err = -EPROTO; 1922 goto out; 1923 } 1924 } 1925 1926 if (ac->num_ports == 0) 1927 dev_err(dev, "Failed to detect any vPort\n"); 1928 1929 if (ac->num_ports > MAX_PORTS_IN_MANA_DEV) 1930 ac->num_ports = MAX_PORTS_IN_MANA_DEV; 1931 1932 if (!resuming) { 1933 for (i = 0; i < ac->num_ports; i++) { 1934 err = mana_probe_port(ac, i, &ac->ports[i]); 1935 if (err) 1936 break; 1937 } 1938 } else { 1939 for (i = 0; i < ac->num_ports; i++) { 1940 rtnl_lock(); 1941 err = mana_attach(ac->ports[i]); 1942 rtnl_unlock(); 1943 if (err) 1944 break; 1945 } 1946 } 1947 out: 1948 if (err) 1949 mana_remove(gd, false); 1950 1951 return err; 1952 } 1953 1954 void mana_remove(struct gdma_dev *gd, bool suspending) 1955 { 1956 struct gdma_context *gc = gd->gdma_context; 1957 struct mana_context *ac = gd->driver_data; 1958 struct device *dev = gc->dev; 1959 struct net_device *ndev; 1960 int err; 1961 int i; 1962 1963 for (i = 0; i < ac->num_ports; i++) { 1964 ndev = ac->ports[i]; 1965 if (!ndev) { 1966 if (i == 0) 1967 dev_err(dev, "No net device to remove\n"); 1968 goto out; 1969 } 1970 1971 /* All cleanup actions should stay after rtnl_lock(), otherwise 1972 * other functions may access partially cleaned up data. 1973 */ 1974 rtnl_lock(); 1975 1976 err = mana_detach(ndev, false); 1977 if (err) 1978 netdev_err(ndev, "Failed to detach vPort %d: %d\n", 1979 i, err); 1980 1981 if (suspending) { 1982 /* No need to unregister the ndev. */ 1983 rtnl_unlock(); 1984 continue; 1985 } 1986 1987 unregister_netdevice(ndev); 1988 1989 rtnl_unlock(); 1990 1991 free_netdev(ndev); 1992 } 1993 1994 mana_destroy_eq(ac); 1995 1996 out: 1997 mana_gd_deregister_device(gd); 1998 1999 if (suspending) 2000 return; 2001 2002 gd->driver_data = NULL; 2003 gd->gdma_context = NULL; 2004 kfree(ac); 2005 } 2006