163fa15dbSBob Pearson // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
28700e3e7SMoni Shoua /*
38700e3e7SMoni Shoua * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
48700e3e7SMoni Shoua * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
58700e3e7SMoni Shoua */
68700e3e7SMoni Shoua
78700e3e7SMoni Shoua #include <linux/skbuff.h>
88700e3e7SMoni Shoua #include <linux/if_arp.h>
98700e3e7SMoni Shoua #include <linux/netdevice.h>
108700e3e7SMoni Shoua #include <linux/if.h>
11f1b0a8eaSMartin Wilck #include <linux/if_vlan.h>
128700e3e7SMoni Shoua #include <net/udp_tunnel.h>
138700e3e7SMoni Shoua #include <net/sch_generic.h>
148700e3e7SMoni Shoua #include <linux/netfilter.h>
158700e3e7SMoni Shoua #include <rdma/ib_addr.h>
168700e3e7SMoni Shoua
178700e3e7SMoni Shoua #include "rxe.h"
188700e3e7SMoni Shoua #include "rxe_net.h"
198700e3e7SMoni Shoua #include "rxe_loc.h"
208700e3e7SMoni Shoua
2191eab796SZhu Yanjun static struct rxe_recv_sockets recv_sockets;
228700e3e7SMoni Shoua
rxe_find_route4(struct rxe_qp * qp,struct net_device * ndev,struct in_addr * saddr,struct in_addr * daddr)2334549e88SBob Pearson static struct dst_entry *rxe_find_route4(struct rxe_qp *qp,
2434549e88SBob Pearson struct net_device *ndev,
258700e3e7SMoni Shoua struct in_addr *saddr,
268700e3e7SMoni Shoua struct in_addr *daddr)
278700e3e7SMoni Shoua {
288700e3e7SMoni Shoua struct rtable *rt;
298700e3e7SMoni Shoua struct flowi4 fl = { { 0 } };
308700e3e7SMoni Shoua
318700e3e7SMoni Shoua memset(&fl, 0, sizeof(fl));
328700e3e7SMoni Shoua fl.flowi4_oif = ndev->ifindex;
338700e3e7SMoni Shoua memcpy(&fl.saddr, saddr, sizeof(*saddr));
348700e3e7SMoni Shoua memcpy(&fl.daddr, daddr, sizeof(*daddr));
358700e3e7SMoni Shoua fl.flowi4_proto = IPPROTO_UDP;
368700e3e7SMoni Shoua
378700e3e7SMoni Shoua rt = ip_route_output_key(&init_net, &fl);
388700e3e7SMoni Shoua if (IS_ERR(rt)) {
3934549e88SBob Pearson rxe_dbg_qp(qp, "no route to %pI4\n", &daddr->s_addr);
408700e3e7SMoni Shoua return NULL;
418700e3e7SMoni Shoua }
428700e3e7SMoni Shoua
438700e3e7SMoni Shoua return &rt->dst;
448700e3e7SMoni Shoua }
458700e3e7SMoni Shoua
468700e3e7SMoni Shoua #if IS_ENABLED(CONFIG_IPV6)
rxe_find_route6(struct rxe_qp * qp,struct net_device * ndev,struct in6_addr * saddr,struct in6_addr * daddr)4734549e88SBob Pearson static struct dst_entry *rxe_find_route6(struct rxe_qp *qp,
4834549e88SBob Pearson struct net_device *ndev,
498700e3e7SMoni Shoua struct in6_addr *saddr,
508700e3e7SMoni Shoua struct in6_addr *daddr)
518700e3e7SMoni Shoua {
528700e3e7SMoni Shoua struct dst_entry *ndst;
538700e3e7SMoni Shoua struct flowi6 fl6 = { { 0 } };
548700e3e7SMoni Shoua
558700e3e7SMoni Shoua memset(&fl6, 0, sizeof(fl6));
568700e3e7SMoni Shoua fl6.flowi6_oif = ndev->ifindex;
578700e3e7SMoni Shoua memcpy(&fl6.saddr, saddr, sizeof(*saddr));
588700e3e7SMoni Shoua memcpy(&fl6.daddr, daddr, sizeof(*daddr));
598700e3e7SMoni Shoua fl6.flowi6_proto = IPPROTO_UDP;
608700e3e7SMoni Shoua
616c8991f4SSabrina Dubroca ndst = ipv6_stub->ipv6_dst_lookup_flow(sock_net(recv_sockets.sk6->sk),
626c8991f4SSabrina Dubroca recv_sockets.sk6->sk, &fl6,
636c8991f4SSabrina Dubroca NULL);
645f9e2822SBob Pearson if (IS_ERR(ndst)) {
6534549e88SBob Pearson rxe_dbg_qp(qp, "no route to %pI6\n", daddr);
666c8991f4SSabrina Dubroca return NULL;
678700e3e7SMoni Shoua }
688700e3e7SMoni Shoua
698700e3e7SMoni Shoua if (unlikely(ndst->error)) {
7034549e88SBob Pearson rxe_dbg_qp(qp, "no route to %pI6\n", daddr);
718700e3e7SMoni Shoua goto put;
728700e3e7SMoni Shoua }
738700e3e7SMoni Shoua
748700e3e7SMoni Shoua return ndst;
758700e3e7SMoni Shoua put:
768700e3e7SMoni Shoua dst_release(ndst);
778700e3e7SMoni Shoua return NULL;
788700e3e7SMoni Shoua }
798700e3e7SMoni Shoua
808700e3e7SMoni Shoua #else
818700e3e7SMoni Shoua
rxe_find_route6(struct rxe_qp * qp,struct net_device * ndev,struct in6_addr * saddr,struct in6_addr * daddr)8234549e88SBob Pearson static struct dst_entry *rxe_find_route6(struct rxe_qp *qp,
8334549e88SBob Pearson struct net_device *ndev,
848700e3e7SMoni Shoua struct in6_addr *saddr,
858700e3e7SMoni Shoua struct in6_addr *daddr)
868700e3e7SMoni Shoua {
878700e3e7SMoni Shoua return NULL;
888700e3e7SMoni Shoua }
898700e3e7SMoni Shoua
908700e3e7SMoni Shoua #endif
918700e3e7SMoni Shoua
rxe_find_route(struct net_device * ndev,struct rxe_qp * qp,struct rxe_av * av)923db2bcebSParav Pandit static struct dst_entry *rxe_find_route(struct net_device *ndev,
934ed6ad1eSyonatanc struct rxe_qp *qp,
944ed6ad1eSyonatanc struct rxe_av *av)
954ed6ad1eSyonatanc {
964ed6ad1eSyonatanc struct dst_entry *dst = NULL;
974ed6ad1eSyonatanc
984ed6ad1eSyonatanc if (qp_type(qp) == IB_QPT_RC)
994ed6ad1eSyonatanc dst = sk_dst_get(qp->sk->sk);
1004ed6ad1eSyonatanc
101b9109b7dSAndrew Boyer if (!dst || !dst_check(dst, qp->dst_cookie)) {
1024ed6ad1eSyonatanc if (dst)
1034ed6ad1eSyonatanc dst_release(dst);
1044ed6ad1eSyonatanc
105e0d696d2SJason Gunthorpe if (av->network_type == RXE_NETWORK_TYPE_IPV4) {
1064ed6ad1eSyonatanc struct in_addr *saddr;
1074ed6ad1eSyonatanc struct in_addr *daddr;
1084ed6ad1eSyonatanc
1094ed6ad1eSyonatanc saddr = &av->sgid_addr._sockaddr_in.sin_addr;
1104ed6ad1eSyonatanc daddr = &av->dgid_addr._sockaddr_in.sin_addr;
11134549e88SBob Pearson dst = rxe_find_route4(qp, ndev, saddr, daddr);
112e0d696d2SJason Gunthorpe } else if (av->network_type == RXE_NETWORK_TYPE_IPV6) {
1134ed6ad1eSyonatanc struct in6_addr *saddr6;
1144ed6ad1eSyonatanc struct in6_addr *daddr6;
1154ed6ad1eSyonatanc
1164ed6ad1eSyonatanc saddr6 = &av->sgid_addr._sockaddr_in6.sin6_addr;
1174ed6ad1eSyonatanc daddr6 = &av->dgid_addr._sockaddr_in6.sin6_addr;
11834549e88SBob Pearson dst = rxe_find_route6(qp, ndev, saddr6, daddr6);
119b9109b7dSAndrew Boyer #if IS_ENABLED(CONFIG_IPV6)
120b9109b7dSAndrew Boyer if (dst)
121b9109b7dSAndrew Boyer qp->dst_cookie =
122b9109b7dSAndrew Boyer rt6_get_cookie((struct rt6_info *)dst);
123b9109b7dSAndrew Boyer #endif
1244ed6ad1eSyonatanc }
12524c937b3SVijay Immanuel
12624c937b3SVijay Immanuel if (dst && (qp_type(qp) == IB_QPT_RC)) {
12724c937b3SVijay Immanuel dst_hold(dst);
12824c937b3SVijay Immanuel sk_dst_set(qp->sk->sk, dst);
12924c937b3SVijay Immanuel }
1304ed6ad1eSyonatanc }
1314ed6ad1eSyonatanc return dst;
1324ed6ad1eSyonatanc }
1334ed6ad1eSyonatanc
rxe_udp_encap_recv(struct sock * sk,struct sk_buff * skb)1348700e3e7SMoni Shoua static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
1358700e3e7SMoni Shoua {
1368700e3e7SMoni Shoua struct udphdr *udph;
137899aba89SBob Pearson struct rxe_dev *rxe;
1388700e3e7SMoni Shoua struct net_device *ndev = skb->dev;
1398700e3e7SMoni Shoua struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
1408700e3e7SMoni Shoua
141899aba89SBob Pearson /* takes a reference on rxe->ib_dev
142899aba89SBob Pearson * drop when skb is freed
143899aba89SBob Pearson */
144899aba89SBob Pearson rxe = rxe_get_dev_from_net(ndev);
1457289e26fSJason Gunthorpe if (!rxe && is_vlan_dev(ndev))
1467289e26fSJason Gunthorpe rxe = rxe_get_dev_from_net(vlan_dev_real_dev(ndev));
1478700e3e7SMoni Shoua if (!rxe)
1488700e3e7SMoni Shoua goto drop;
1498700e3e7SMoni Shoua
1508700e3e7SMoni Shoua if (skb_linearize(skb)) {
1514c173f59SJason Gunthorpe ib_device_put(&rxe->ib_dev);
1528700e3e7SMoni Shoua goto drop;
1538700e3e7SMoni Shoua }
1548700e3e7SMoni Shoua
1558700e3e7SMoni Shoua udph = udp_hdr(skb);
1568700e3e7SMoni Shoua pkt->rxe = rxe;
1578700e3e7SMoni Shoua pkt->port_num = 1;
1588700e3e7SMoni Shoua pkt->hdr = (u8 *)(udph + 1);
1598700e3e7SMoni Shoua pkt->mask = RXE_GRH_MASK;
1608700e3e7SMoni Shoua pkt->paylen = be16_to_cpu(udph->len) - sizeof(*udph);
1618700e3e7SMoni Shoua
1629a3763e8SBob Pearson /* remove udp header */
1639a3763e8SBob Pearson skb_pull(skb, sizeof(struct udphdr));
1649a3763e8SBob Pearson
16510c47d56SYuval Shaia rxe_rcv(skb);
16610c47d56SYuval Shaia
16710c47d56SYuval Shaia return 0;
1688700e3e7SMoni Shoua drop:
1698700e3e7SMoni Shoua kfree_skb(skb);
17010c47d56SYuval Shaia
1718700e3e7SMoni Shoua return 0;
1728700e3e7SMoni Shoua }
1738700e3e7SMoni Shoua
rxe_setup_udp_tunnel(struct net * net,__be16 port,bool ipv6)1748700e3e7SMoni Shoua static struct socket *rxe_setup_udp_tunnel(struct net *net, __be16 port,
1758700e3e7SMoni Shoua bool ipv6)
1768700e3e7SMoni Shoua {
1778700e3e7SMoni Shoua int err;
1788700e3e7SMoni Shoua struct socket *sock;
1798d8f0837SBart Van Assche struct udp_port_cfg udp_cfg = { };
1808d8f0837SBart Van Assche struct udp_tunnel_sock_cfg tnl_cfg = { };
1818700e3e7SMoni Shoua
1828700e3e7SMoni Shoua if (ipv6) {
1838700e3e7SMoni Shoua udp_cfg.family = AF_INET6;
1848700e3e7SMoni Shoua udp_cfg.ipv6_v6only = 1;
1858700e3e7SMoni Shoua } else {
1868700e3e7SMoni Shoua udp_cfg.family = AF_INET;
1878700e3e7SMoni Shoua }
1888700e3e7SMoni Shoua
1898700e3e7SMoni Shoua udp_cfg.local_udp_port = port;
1908700e3e7SMoni Shoua
1918700e3e7SMoni Shoua /* Create UDP socket */
1928700e3e7SMoni Shoua err = udp_sock_create(net, &udp_cfg, &sock);
19332a25f2eSKamal Heib if (err < 0)
1948700e3e7SMoni Shoua return ERR_PTR(err);
1958700e3e7SMoni Shoua
1968700e3e7SMoni Shoua tnl_cfg.encap_type = 1;
1978700e3e7SMoni Shoua tnl_cfg.encap_rcv = rxe_udp_encap_recv;
1988700e3e7SMoni Shoua
1998700e3e7SMoni Shoua /* Setup UDP tunnel */
2008700e3e7SMoni Shoua setup_udp_tunnel_sock(net, sock, &tnl_cfg);
2018700e3e7SMoni Shoua
2028700e3e7SMoni Shoua return sock;
2038700e3e7SMoni Shoua }
2048700e3e7SMoni Shoua
rxe_release_udp_tunnel(struct socket * sk)2058f1a72c8SZhu Yanjun static void rxe_release_udp_tunnel(struct socket *sk)
2068700e3e7SMoni Shoua {
207dfdd6158SYonatan Cohen if (sk)
2088700e3e7SMoni Shoua udp_tunnel_sock_release(sk);
2098700e3e7SMoni Shoua }
2108700e3e7SMoni Shoua
prepare_udp_hdr(struct sk_buff * skb,__be16 src_port,__be16 dst_port)2118700e3e7SMoni Shoua static void prepare_udp_hdr(struct sk_buff *skb, __be16 src_port,
2128700e3e7SMoni Shoua __be16 dst_port)
2138700e3e7SMoni Shoua {
2148700e3e7SMoni Shoua struct udphdr *udph;
2158700e3e7SMoni Shoua
2168700e3e7SMoni Shoua __skb_push(skb, sizeof(*udph));
2178700e3e7SMoni Shoua skb_reset_transport_header(skb);
2188700e3e7SMoni Shoua udph = udp_hdr(skb);
2198700e3e7SMoni Shoua
2208700e3e7SMoni Shoua udph->dest = dst_port;
2218700e3e7SMoni Shoua udph->source = src_port;
2228700e3e7SMoni Shoua udph->len = htons(skb->len);
2238700e3e7SMoni Shoua udph->check = 0;
2248700e3e7SMoni Shoua }
2258700e3e7SMoni Shoua
prepare_ipv4_hdr(struct dst_entry * dst,struct sk_buff * skb,__be32 saddr,__be32 daddr,__u8 proto,__u8 tos,__u8 ttl,__be16 df,bool xnet)2268700e3e7SMoni Shoua static void prepare_ipv4_hdr(struct dst_entry *dst, struct sk_buff *skb,
2278700e3e7SMoni Shoua __be32 saddr, __be32 daddr, __u8 proto,
2288700e3e7SMoni Shoua __u8 tos, __u8 ttl, __be16 df, bool xnet)
2298700e3e7SMoni Shoua {
2308700e3e7SMoni Shoua struct iphdr *iph;
2318700e3e7SMoni Shoua
2328700e3e7SMoni Shoua skb_scrub_packet(skb, xnet);
2338700e3e7SMoni Shoua
2348700e3e7SMoni Shoua skb_clear_hash(skb);
2354ed6ad1eSyonatanc skb_dst_set(skb, dst_clone(dst));
2368700e3e7SMoni Shoua memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
2378700e3e7SMoni Shoua
2388700e3e7SMoni Shoua skb_push(skb, sizeof(struct iphdr));
2398700e3e7SMoni Shoua skb_reset_network_header(skb);
2408700e3e7SMoni Shoua
2418700e3e7SMoni Shoua iph = ip_hdr(skb);
2428700e3e7SMoni Shoua
2438700e3e7SMoni Shoua iph->version = IPVERSION;
2448700e3e7SMoni Shoua iph->ihl = sizeof(struct iphdr) >> 2;
245ef4b96a5SBob Pearson iph->tot_len = htons(skb->len);
2468700e3e7SMoni Shoua iph->frag_off = df;
2478700e3e7SMoni Shoua iph->protocol = proto;
2488700e3e7SMoni Shoua iph->tos = tos;
2498700e3e7SMoni Shoua iph->daddr = daddr;
2508700e3e7SMoni Shoua iph->saddr = saddr;
2518700e3e7SMoni Shoua iph->ttl = ttl;
2528700e3e7SMoni Shoua __ip_select_ident(dev_net(dst->dev), iph,
2538700e3e7SMoni Shoua skb_shinfo(skb)->gso_segs ?: 1);
2548700e3e7SMoni Shoua }
2558700e3e7SMoni Shoua
prepare_ipv6_hdr(struct dst_entry * dst,struct sk_buff * skb,struct in6_addr * saddr,struct in6_addr * daddr,__u8 proto,__u8 prio,__u8 ttl)2568700e3e7SMoni Shoua static void prepare_ipv6_hdr(struct dst_entry *dst, struct sk_buff *skb,
2578700e3e7SMoni Shoua struct in6_addr *saddr, struct in6_addr *daddr,
2588700e3e7SMoni Shoua __u8 proto, __u8 prio, __u8 ttl)
2598700e3e7SMoni Shoua {
2608700e3e7SMoni Shoua struct ipv6hdr *ip6h;
2618700e3e7SMoni Shoua
2628700e3e7SMoni Shoua memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
2638700e3e7SMoni Shoua IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED
2648700e3e7SMoni Shoua | IPSKB_REROUTED);
26548c22be4SAndrew Boyer skb_dst_set(skb, dst_clone(dst));
2668700e3e7SMoni Shoua
2678700e3e7SMoni Shoua __skb_push(skb, sizeof(*ip6h));
2688700e3e7SMoni Shoua skb_reset_network_header(skb);
2698700e3e7SMoni Shoua ip6h = ipv6_hdr(skb);
2708700e3e7SMoni Shoua ip6_flow_hdr(ip6h, prio, htonl(0));
2718700e3e7SMoni Shoua ip6h->payload_len = htons(skb->len);
2728700e3e7SMoni Shoua ip6h->nexthdr = proto;
2738700e3e7SMoni Shoua ip6h->hop_limit = ttl;
2748700e3e7SMoni Shoua ip6h->daddr = *daddr;
2758700e3e7SMoni Shoua ip6h->saddr = *saddr;
2768700e3e7SMoni Shoua ip6h->payload_len = htons(skb->len - sizeof(*ip6h));
2778700e3e7SMoni Shoua }
2788700e3e7SMoni Shoua
prepare4(struct rxe_av * av,struct rxe_pkt_info * pkt,struct sk_buff * skb)27963221acbSBob Pearson static int prepare4(struct rxe_av *av, struct rxe_pkt_info *pkt,
28063221acbSBob Pearson struct sk_buff *skb)
2818700e3e7SMoni Shoua {
2824ed6ad1eSyonatanc struct rxe_qp *qp = pkt->qp;
2838700e3e7SMoni Shoua struct dst_entry *dst;
2848700e3e7SMoni Shoua bool xnet = false;
2858700e3e7SMoni Shoua __be16 df = htons(IP_DF);
2868700e3e7SMoni Shoua struct in_addr *saddr = &av->sgid_addr._sockaddr_in.sin_addr;
2878700e3e7SMoni Shoua struct in_addr *daddr = &av->dgid_addr._sockaddr_in.sin_addr;
2888700e3e7SMoni Shoua
2893db2bcebSParav Pandit dst = rxe_find_route(skb->dev, qp, av);
2908700e3e7SMoni Shoua if (!dst) {
29134549e88SBob Pearson rxe_dbg_qp(qp, "Host not reachable\n");
2928700e3e7SMoni Shoua return -EHOSTUNREACH;
2938700e3e7SMoni Shoua }
2948700e3e7SMoni Shoua
295d3c04a3aSVijay Immanuel prepare_udp_hdr(skb, cpu_to_be16(qp->src_port),
296d3c04a3aSVijay Immanuel cpu_to_be16(ROCE_V2_UDP_DPORT));
2978700e3e7SMoni Shoua
2988700e3e7SMoni Shoua prepare_ipv4_hdr(dst, skb, saddr->s_addr, daddr->s_addr, IPPROTO_UDP,
2998700e3e7SMoni Shoua av->grh.traffic_class, av->grh.hop_limit, df, xnet);
3004ed6ad1eSyonatanc
3014ed6ad1eSyonatanc dst_release(dst);
3028700e3e7SMoni Shoua return 0;
3038700e3e7SMoni Shoua }
3048700e3e7SMoni Shoua
prepare6(struct rxe_av * av,struct rxe_pkt_info * pkt,struct sk_buff * skb)30563221acbSBob Pearson static int prepare6(struct rxe_av *av, struct rxe_pkt_info *pkt,
30663221acbSBob Pearson struct sk_buff *skb)
3078700e3e7SMoni Shoua {
3084ed6ad1eSyonatanc struct rxe_qp *qp = pkt->qp;
3092418adaeSAndrew Boyer struct dst_entry *dst;
3108700e3e7SMoni Shoua struct in6_addr *saddr = &av->sgid_addr._sockaddr_in6.sin6_addr;
3118700e3e7SMoni Shoua struct in6_addr *daddr = &av->dgid_addr._sockaddr_in6.sin6_addr;
3128700e3e7SMoni Shoua
3133db2bcebSParav Pandit dst = rxe_find_route(skb->dev, qp, av);
3148700e3e7SMoni Shoua if (!dst) {
31534549e88SBob Pearson rxe_dbg_qp(qp, "Host not reachable\n");
3168700e3e7SMoni Shoua return -EHOSTUNREACH;
3178700e3e7SMoni Shoua }
3188700e3e7SMoni Shoua
319d3c04a3aSVijay Immanuel prepare_udp_hdr(skb, cpu_to_be16(qp->src_port),
320d3c04a3aSVijay Immanuel cpu_to_be16(ROCE_V2_UDP_DPORT));
3218700e3e7SMoni Shoua
3228700e3e7SMoni Shoua prepare_ipv6_hdr(dst, skb, saddr, daddr, IPPROTO_UDP,
3238700e3e7SMoni Shoua av->grh.traffic_class,
3248700e3e7SMoni Shoua av->grh.hop_limit);
3254ed6ad1eSyonatanc
3264ed6ad1eSyonatanc dst_release(dst);
3278700e3e7SMoni Shoua return 0;
3288700e3e7SMoni Shoua }
3298700e3e7SMoni Shoua
rxe_prepare(struct rxe_av * av,struct rxe_pkt_info * pkt,struct sk_buff * skb)33063221acbSBob Pearson int rxe_prepare(struct rxe_av *av, struct rxe_pkt_info *pkt,
33163221acbSBob Pearson struct sk_buff *skb)
3328700e3e7SMoni Shoua {
3338700e3e7SMoni Shoua int err = 0;
3348700e3e7SMoni Shoua
33508304d71SZhu Yanjun if (skb->protocol == htons(ETH_P_IP))
33663221acbSBob Pearson err = prepare4(av, pkt, skb);
33708304d71SZhu Yanjun else if (skb->protocol == htons(ETH_P_IPV6))
33863221acbSBob Pearson err = prepare6(av, pkt, skb);
3398700e3e7SMoni Shoua
34063221acbSBob Pearson if (ether_addr_equal(skb->dev->dev_addr, av->dmac))
341668aa15bSKamal Heib pkt->mask |= RXE_LOOPBACK_MASK;
342668aa15bSKamal Heib
3438700e3e7SMoni Shoua return err;
3448700e3e7SMoni Shoua }
3458700e3e7SMoni Shoua
rxe_skb_tx_dtor(struct sk_buff * skb)3468700e3e7SMoni Shoua static void rxe_skb_tx_dtor(struct sk_buff *skb)
3478700e3e7SMoni Shoua {
3488700e3e7SMoni Shoua struct sock *sk = skb->sk;
3498700e3e7SMoni Shoua struct rxe_qp *qp = sk->sk_user_data;
3508700e3e7SMoni Shoua int skb_out = atomic_dec_return(&qp->skb_out);
3518700e3e7SMoni Shoua
3528700e3e7SMoni Shoua if (unlikely(qp->need_req_skb &&
3538700e3e7SMoni Shoua skb_out < RXE_INFLIGHT_SKBS_PER_QP_LOW))
354dccb23f6SBob Pearson rxe_sched_task(&qp->req.task);
355fda85ce9SYonatan Cohen
3563197706aSBob Pearson rxe_put(qp);
3578700e3e7SMoni Shoua }
3588700e3e7SMoni Shoua
rxe_send(struct sk_buff * skb,struct rxe_pkt_info * pkt)35913050a0bSBob Pearson static int rxe_send(struct sk_buff *skb, struct rxe_pkt_info *pkt)
3608700e3e7SMoni Shoua {
3618700e3e7SMoni Shoua int err;
3628700e3e7SMoni Shoua
3635793b465SZhu Yanjun skb->destructor = rxe_skb_tx_dtor;
3645793b465SZhu Yanjun skb->sk = pkt->qp->sk->sk;
3658700e3e7SMoni Shoua
3663197706aSBob Pearson rxe_get(pkt->qp);
3679eb7f8e4SAndrew Boyer atomic_inc(&pkt->qp->skb_out);
3689eb7f8e4SAndrew Boyer
369*03ff3e23SBob Pearson if (skb->protocol == htons(ETH_P_IP))
3705793b465SZhu Yanjun err = ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb);
371*03ff3e23SBob Pearson else
3725793b465SZhu Yanjun err = ip6_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb);
3738700e3e7SMoni Shoua
3748700e3e7SMoni Shoua if (unlikely(net_xmit_eval(err))) {
37534549e88SBob Pearson rxe_dbg_qp(pkt->qp, "error sending packet: %d\n", err);
3768700e3e7SMoni Shoua return -EAGAIN;
3778700e3e7SMoni Shoua }
3788700e3e7SMoni Shoua
3798700e3e7SMoni Shoua return 0;
3808700e3e7SMoni Shoua }
3818700e3e7SMoni Shoua
38221e27ac8SBob Pearson /* fix up a send packet to match the packets
38321e27ac8SBob Pearson * received from UDP before looping them back
38421e27ac8SBob Pearson */
rxe_loopback(struct sk_buff * skb,struct rxe_pkt_info * pkt)38513050a0bSBob Pearson static int rxe_loopback(struct sk_buff *skb, struct rxe_pkt_info *pkt)
3868700e3e7SMoni Shoua {
38713050a0bSBob Pearson memcpy(SKB_TO_PKT(skb), pkt, sizeof(*pkt));
38821e27ac8SBob Pearson
3895120bf0aSBob Pearson if (skb->protocol == htons(ETH_P_IP))
3905120bf0aSBob Pearson skb_pull(skb, sizeof(struct iphdr));
3915120bf0aSBob Pearson else
3925120bf0aSBob Pearson skb_pull(skb, sizeof(struct ipv6hdr));
3935120bf0aSBob Pearson
39413050a0bSBob Pearson if (WARN_ON(!ib_device_try_get(&pkt->rxe->ib_dev))) {
39521e27ac8SBob Pearson kfree_skb(skb);
39613050a0bSBob Pearson return -EIO;
39713050a0bSBob Pearson }
39813050a0bSBob Pearson
3999a3763e8SBob Pearson /* remove udp header */
4009a3763e8SBob Pearson skb_pull(skb, sizeof(struct udphdr));
4019a3763e8SBob Pearson
40210c47d56SYuval Shaia rxe_rcv(skb);
40313050a0bSBob Pearson
40413050a0bSBob Pearson return 0;
4058700e3e7SMoni Shoua }
4068700e3e7SMoni Shoua
rxe_xmit_packet(struct rxe_qp * qp,struct rxe_pkt_info * pkt,struct sk_buff * skb)40736fbb03dSBob Pearson int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
40836fbb03dSBob Pearson struct sk_buff *skb)
40936fbb03dSBob Pearson {
41036fbb03dSBob Pearson int err;
41136fbb03dSBob Pearson int is_request = pkt->mask & RXE_REQ_MASK;
41236fbb03dSBob Pearson struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
413b5f3fe27SGuoqing Jiang unsigned long flags;
41436fbb03dSBob Pearson
415b5f3fe27SGuoqing Jiang spin_lock_irqsave(&qp->state_lock, flags);
41698e891b5SBob Pearson if ((is_request && (qp_state(qp) < IB_QPS_RTS)) ||
41798e891b5SBob Pearson (!is_request && (qp_state(qp) < IB_QPS_RTR))) {
418b5f3fe27SGuoqing Jiang spin_unlock_irqrestore(&qp->state_lock, flags);
41934549e88SBob Pearson rxe_dbg_qp(qp, "Packet dropped. QP is not in ready state\n");
42036fbb03dSBob Pearson goto drop;
42136fbb03dSBob Pearson }
422b5f3fe27SGuoqing Jiang spin_unlock_irqrestore(&qp->state_lock, flags);
42336fbb03dSBob Pearson
4241117f26eSBob Pearson rxe_icrc_generate(skb, pkt);
4251117f26eSBob Pearson
42613050a0bSBob Pearson if (pkt->mask & RXE_LOOPBACK_MASK)
42713050a0bSBob Pearson err = rxe_loopback(skb, pkt);
42813050a0bSBob Pearson else
42913050a0bSBob Pearson err = rxe_send(skb, pkt);
43036fbb03dSBob Pearson if (err) {
43136fbb03dSBob Pearson rxe_counter_inc(rxe, RXE_CNT_SEND_ERR);
43236fbb03dSBob Pearson return err;
43336fbb03dSBob Pearson }
43436fbb03dSBob Pearson
43536fbb03dSBob Pearson if ((qp_type(qp) != IB_QPT_RC) &&
43636fbb03dSBob Pearson (pkt->mask & RXE_END_MASK)) {
43736fbb03dSBob Pearson pkt->wqe->state = wqe_state_done;
438dccb23f6SBob Pearson rxe_sched_task(&qp->comp.task);
43936fbb03dSBob Pearson }
44036fbb03dSBob Pearson
44136fbb03dSBob Pearson rxe_counter_inc(rxe, RXE_CNT_SENT_PKTS);
44236fbb03dSBob Pearson goto done;
44336fbb03dSBob Pearson
44436fbb03dSBob Pearson drop:
44536fbb03dSBob Pearson kfree_skb(skb);
44636fbb03dSBob Pearson err = 0;
44736fbb03dSBob Pearson done:
44836fbb03dSBob Pearson return err;
44936fbb03dSBob Pearson }
45036fbb03dSBob Pearson
rxe_init_packet(struct rxe_dev * rxe,struct rxe_av * av,int paylen,struct rxe_pkt_info * pkt)451839f5ac0SBart Van Assche struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
4528700e3e7SMoni Shoua int paylen, struct rxe_pkt_info *pkt)
4538700e3e7SMoni Shoua {
4548700e3e7SMoni Shoua unsigned int hdr_len;
455dab21758SParav Pandit struct sk_buff *skb = NULL;
45643c9fc50SMartin Wilck struct net_device *ndev;
45782f82cebSParav Pandit const struct ib_gid_attr *attr;
45843c9fc50SMartin Wilck const int port_num = 1;
45943c9fc50SMartin Wilck
46082f82cebSParav Pandit attr = rdma_get_gid_attr(&rxe->ib_dev, port_num, av->grh.sgid_index);
46182f82cebSParav Pandit if (IS_ERR(attr))
46282f82cebSParav Pandit return NULL;
4638700e3e7SMoni Shoua
464edebc840SBob Pearson if (av->network_type == RXE_NETWORK_TYPE_IPV4)
4658700e3e7SMoni Shoua hdr_len = ETH_HLEN + sizeof(struct udphdr) +
4668700e3e7SMoni Shoua sizeof(struct iphdr);
4678700e3e7SMoni Shoua else
4688700e3e7SMoni Shoua hdr_len = ETH_HLEN + sizeof(struct udphdr) +
4698700e3e7SMoni Shoua sizeof(struct ipv6hdr);
4708700e3e7SMoni Shoua
471dab21758SParav Pandit rcu_read_lock();
472dab21758SParav Pandit ndev = rdma_read_gid_attr_ndev_rcu(attr);
473dab21758SParav Pandit if (IS_ERR(ndev)) {
474dab21758SParav Pandit rcu_read_unlock();
475dab21758SParav Pandit goto out;
476dab21758SParav Pandit }
47743c9fc50SMartin Wilck skb = alloc_skb(paylen + hdr_len + LL_RESERVED_SPACE(ndev),
4788700e3e7SMoni Shoua GFP_ATOMIC);
47943c9fc50SMartin Wilck
480dab21758SParav Pandit if (unlikely(!skb)) {
481dab21758SParav Pandit rcu_read_unlock();
48282f82cebSParav Pandit goto out;
483dab21758SParav Pandit }
4848700e3e7SMoni Shoua
4853bf3e2b8SParav Pandit skb_reserve(skb, hdr_len + LL_RESERVED_SPACE(ndev));
4868700e3e7SMoni Shoua
4873bf3e2b8SParav Pandit /* FIXME: hold reference to this netdev until life of this skb. */
48843c9fc50SMartin Wilck skb->dev = ndev;
489dab21758SParav Pandit rcu_read_unlock();
490dab21758SParav Pandit
491e0d696d2SJason Gunthorpe if (av->network_type == RXE_NETWORK_TYPE_IPV4)
4928700e3e7SMoni Shoua skb->protocol = htons(ETH_P_IP);
4938700e3e7SMoni Shoua else
4948700e3e7SMoni Shoua skb->protocol = htons(ETH_P_IPV6);
4958700e3e7SMoni Shoua
4968700e3e7SMoni Shoua pkt->rxe = rxe;
49743c9fc50SMartin Wilck pkt->port_num = port_num;
4982d3b2e44SBob Pearson pkt->hdr = skb_put(skb, paylen);
4998700e3e7SMoni Shoua pkt->mask |= RXE_GRH_MASK;
5008700e3e7SMoni Shoua
50182f82cebSParav Pandit out:
50282f82cebSParav Pandit rdma_put_gid_attr(attr);
5038700e3e7SMoni Shoua return skb;
5048700e3e7SMoni Shoua }
5058700e3e7SMoni Shoua
5068700e3e7SMoni Shoua /*
5078700e3e7SMoni Shoua * this is required by rxe_cfg to match rxe devices in
5088700e3e7SMoni Shoua * /sys/class/infiniband up with their underlying ethernet devices
5098700e3e7SMoni Shoua */
rxe_parent_name(struct rxe_dev * rxe,unsigned int port_num)510839f5ac0SBart Van Assche const char *rxe_parent_name(struct rxe_dev *rxe, unsigned int port_num)
5118700e3e7SMoni Shoua {
5128700e3e7SMoni Shoua return rxe->ndev->name;
5138700e3e7SMoni Shoua }
5148700e3e7SMoni Shoua
rxe_net_add(const char * ibdev_name,struct net_device * ndev)51566920e1bSSteve Wise int rxe_net_add(const char *ibdev_name, struct net_device *ndev)
5168700e3e7SMoni Shoua {
5178700e3e7SMoni Shoua int err;
5188700e3e7SMoni Shoua struct rxe_dev *rxe = NULL;
5198700e3e7SMoni Shoua
520459cc69fSLeon Romanovsky rxe = ib_alloc_device(rxe_dev, ib_dev);
5218700e3e7SMoni Shoua if (!rxe)
522ca22354bSJason Gunthorpe return -ENOMEM;
5238700e3e7SMoni Shoua
5248700e3e7SMoni Shoua rxe->ndev = ndev;
5258700e3e7SMoni Shoua
52666920e1bSSteve Wise err = rxe_add(rxe, ndev->mtu, ibdev_name);
5278700e3e7SMoni Shoua if (err) {
5288700e3e7SMoni Shoua ib_dealloc_device(&rxe->ib_dev);
529ca22354bSJason Gunthorpe return err;
5308700e3e7SMoni Shoua }
5318700e3e7SMoni Shoua
532ca22354bSJason Gunthorpe return 0;
5338700e3e7SMoni Shoua }
5348700e3e7SMoni Shoua
rxe_port_event(struct rxe_dev * rxe,enum ib_event_type event)5358700e3e7SMoni Shoua static void rxe_port_event(struct rxe_dev *rxe,
5368700e3e7SMoni Shoua enum ib_event_type event)
5378700e3e7SMoni Shoua {
5388700e3e7SMoni Shoua struct ib_event ev;
5398700e3e7SMoni Shoua
5408700e3e7SMoni Shoua ev.device = &rxe->ib_dev;
5418700e3e7SMoni Shoua ev.element.port_num = 1;
5428700e3e7SMoni Shoua ev.event = event;
5438700e3e7SMoni Shoua
5448700e3e7SMoni Shoua ib_dispatch_event(&ev);
5458700e3e7SMoni Shoua }
5468700e3e7SMoni Shoua
5478700e3e7SMoni Shoua /* Caller must hold net_info_lock */
rxe_port_up(struct rxe_dev * rxe)5488700e3e7SMoni Shoua void rxe_port_up(struct rxe_dev *rxe)
5498700e3e7SMoni Shoua {
5508700e3e7SMoni Shoua struct rxe_port *port;
5518700e3e7SMoni Shoua
5528700e3e7SMoni Shoua port = &rxe->port;
5538700e3e7SMoni Shoua port->attr.state = IB_PORT_ACTIVE;
5548700e3e7SMoni Shoua
5558700e3e7SMoni Shoua rxe_port_event(rxe, IB_EVENT_PORT_ACTIVE);
5565a738b5dSJason Gunthorpe dev_info(&rxe->ib_dev.dev, "set active\n");
5578700e3e7SMoni Shoua }
5588700e3e7SMoni Shoua
5598700e3e7SMoni Shoua /* Caller must hold net_info_lock */
rxe_port_down(struct rxe_dev * rxe)5608700e3e7SMoni Shoua void rxe_port_down(struct rxe_dev *rxe)
5618700e3e7SMoni Shoua {
5628700e3e7SMoni Shoua struct rxe_port *port;
5638700e3e7SMoni Shoua
5648700e3e7SMoni Shoua port = &rxe->port;
5658700e3e7SMoni Shoua port->attr.state = IB_PORT_DOWN;
5668700e3e7SMoni Shoua
5678700e3e7SMoni Shoua rxe_port_event(rxe, IB_EVENT_PORT_ERR);
5686e5559b2SAndrew Boyer rxe_counter_inc(rxe, RXE_CNT_LINK_DOWNED);
5695a738b5dSJason Gunthorpe dev_info(&rxe->ib_dev.dev, "set down\n");
5708700e3e7SMoni Shoua }
5718700e3e7SMoni Shoua
rxe_set_port_state(struct rxe_dev * rxe)572f55c3ec4SYuval Shaia void rxe_set_port_state(struct rxe_dev *rxe)
573f55c3ec4SYuval Shaia {
574f55c3ec4SYuval Shaia if (netif_running(rxe->ndev) && netif_carrier_ok(rxe->ndev))
575f55c3ec4SYuval Shaia rxe_port_up(rxe);
576f55c3ec4SYuval Shaia else
577f55c3ec4SYuval Shaia rxe_port_down(rxe);
578f55c3ec4SYuval Shaia }
579f55c3ec4SYuval Shaia
rxe_notify(struct notifier_block * not_blk,unsigned long event,void * arg)5808700e3e7SMoni Shoua static int rxe_notify(struct notifier_block *not_blk,
5818700e3e7SMoni Shoua unsigned long event,
5828700e3e7SMoni Shoua void *arg)
5838700e3e7SMoni Shoua {
5848700e3e7SMoni Shoua struct net_device *ndev = netdev_notifier_info_to_dev(arg);
5854c173f59SJason Gunthorpe struct rxe_dev *rxe = rxe_get_dev_from_net(ndev);
5868700e3e7SMoni Shoua
5878700e3e7SMoni Shoua if (!rxe)
5884c173f59SJason Gunthorpe return NOTIFY_OK;
5898700e3e7SMoni Shoua
5908700e3e7SMoni Shoua switch (event) {
5918700e3e7SMoni Shoua case NETDEV_UNREGISTER:
592c367074bSJason Gunthorpe ib_unregister_device_queued(&rxe->ib_dev);
593c367074bSJason Gunthorpe break;
5948700e3e7SMoni Shoua case NETDEV_UP:
5958700e3e7SMoni Shoua rxe_port_up(rxe);
5968700e3e7SMoni Shoua break;
5978700e3e7SMoni Shoua case NETDEV_DOWN:
5988700e3e7SMoni Shoua rxe_port_down(rxe);
5998700e3e7SMoni Shoua break;
6008700e3e7SMoni Shoua case NETDEV_CHANGEMTU:
601a9fb3287SBob Pearson rxe_dbg_dev(rxe, "%s changed mtu to %d\n", ndev->name, ndev->mtu);
6028700e3e7SMoni Shoua rxe_set_mtu(rxe, ndev->mtu);
6038700e3e7SMoni Shoua break;
6048700e3e7SMoni Shoua case NETDEV_CHANGE:
605f55c3ec4SYuval Shaia rxe_set_port_state(rxe);
6065c50f1d1SAndrew Boyer break;
6075c50f1d1SAndrew Boyer case NETDEV_REBOOT:
6088700e3e7SMoni Shoua case NETDEV_GOING_DOWN:
6098700e3e7SMoni Shoua case NETDEV_CHANGEADDR:
6108700e3e7SMoni Shoua case NETDEV_CHANGENAME:
6118700e3e7SMoni Shoua case NETDEV_FEAT_CHANGE:
6128700e3e7SMoni Shoua default:
613a9fb3287SBob Pearson rxe_dbg_dev(rxe, "ignoring netdev event = %ld for %s\n",
6148700e3e7SMoni Shoua event, ndev->name);
6158700e3e7SMoni Shoua break;
6168700e3e7SMoni Shoua }
6174c173f59SJason Gunthorpe
6184c173f59SJason Gunthorpe ib_device_put(&rxe->ib_dev);
6198700e3e7SMoni Shoua return NOTIFY_OK;
6208700e3e7SMoni Shoua }
6218700e3e7SMoni Shoua
6220f02ba7eSZhu Yanjun static struct notifier_block rxe_net_notifier = {
6238700e3e7SMoni Shoua .notifier_call = rxe_notify,
6248700e3e7SMoni Shoua };
6258700e3e7SMoni Shoua
rxe_net_ipv4_init(void)6268d8f0837SBart Van Assche static int rxe_net_ipv4_init(void)
6278700e3e7SMoni Shoua {
628dfdd6158SYonatan Cohen recv_sockets.sk4 = rxe_setup_udp_tunnel(&init_net,
629dfdd6158SYonatan Cohen htons(ROCE_V2_UDP_DPORT), false);
630dfdd6158SYonatan Cohen if (IS_ERR(recv_sockets.sk4)) {
631dfdd6158SYonatan Cohen recv_sockets.sk4 = NULL;
632e404f945SParav Pandit pr_err("Failed to create IPv4 UDP tunnel\n");
633dfdd6158SYonatan Cohen return -1;
634dfdd6158SYonatan Cohen }
635dfdd6158SYonatan Cohen
636dfdd6158SYonatan Cohen return 0;
637dfdd6158SYonatan Cohen }
638dfdd6158SYonatan Cohen
rxe_net_ipv6_init(void)6398d8f0837SBart Van Assche static int rxe_net_ipv6_init(void)
640dfdd6158SYonatan Cohen {
641dfdd6158SYonatan Cohen #if IS_ENABLED(CONFIG_IPV6)
6428700e3e7SMoni Shoua
6438700e3e7SMoni Shoua recv_sockets.sk6 = rxe_setup_udp_tunnel(&init_net,
6448700e3e7SMoni Shoua htons(ROCE_V2_UDP_DPORT), true);
64532a25f2eSKamal Heib if (PTR_ERR(recv_sockets.sk6) == -EAFNOSUPPORT) {
64632a25f2eSKamal Heib recv_sockets.sk6 = NULL;
64732a25f2eSKamal Heib pr_warn("IPv6 is not supported, can not create a UDPv6 socket\n");
64832a25f2eSKamal Heib return 0;
64932a25f2eSKamal Heib }
65032a25f2eSKamal Heib
6518700e3e7SMoni Shoua if (IS_ERR(recv_sockets.sk6)) {
6528700e3e7SMoni Shoua recv_sockets.sk6 = NULL;
653e404f945SParav Pandit pr_err("Failed to create IPv6 UDP tunnel\n");
6548700e3e7SMoni Shoua return -1;
6558700e3e7SMoni Shoua }
656dfdd6158SYonatan Cohen #endif
657dfdd6158SYonatan Cohen return 0;
6588700e3e7SMoni Shoua }
6598700e3e7SMoni Shoua
rxe_net_exit(void)6608700e3e7SMoni Shoua void rxe_net_exit(void)
6618700e3e7SMoni Shoua {
6628700e3e7SMoni Shoua rxe_release_udp_tunnel(recv_sockets.sk6);
6638700e3e7SMoni Shoua rxe_release_udp_tunnel(recv_sockets.sk4);
6648700e3e7SMoni Shoua unregister_netdevice_notifier(&rxe_net_notifier);
6658700e3e7SMoni Shoua }
666e404f945SParav Pandit
rxe_net_init(void)667e404f945SParav Pandit int rxe_net_init(void)
668e404f945SParav Pandit {
669e404f945SParav Pandit int err;
670e404f945SParav Pandit
671e404f945SParav Pandit recv_sockets.sk6 = NULL;
672e404f945SParav Pandit
673e404f945SParav Pandit err = rxe_net_ipv4_init();
674e404f945SParav Pandit if (err)
675e404f945SParav Pandit return err;
676e404f945SParav Pandit err = rxe_net_ipv6_init();
677e404f945SParav Pandit if (err)
678e404f945SParav Pandit goto err_out;
679e404f945SParav Pandit err = register_netdevice_notifier(&rxe_net_notifier);
680e404f945SParav Pandit if (err) {
681e404f945SParav Pandit pr_err("Failed to register netdev notifier\n");
682e404f945SParav Pandit goto err_out;
683e404f945SParav Pandit }
684e404f945SParav Pandit return 0;
685e404f945SParav Pandit err_out:
686e404f945SParav Pandit rxe_net_exit();
687e404f945SParav Pandit return err;
688e404f945SParav Pandit }
689