1 #include <linux/skbuff.h> 2 #include <linux/export.h> 3 #include <linux/ip.h> 4 #include <linux/ipv6.h> 5 #include <linux/if_vlan.h> 6 #include <net/ip.h> 7 #include <net/ipv6.h> 8 #include <linux/if_tunnel.h> 9 #include <linux/if_pppox.h> 10 #include <linux/ppp_defs.h> 11 #include <net/flow_keys.h> 12 13 /* copy saddr & daddr, possibly using 64bit load/store 14 * Equivalent to : flow->src = iph->saddr; 15 * flow->dst = iph->daddr; 16 */ 17 static void iph_to_flow_copy_addrs(struct flow_keys *flow, const struct iphdr *iph) 18 { 19 BUILD_BUG_ON(offsetof(typeof(*flow), dst) != 20 offsetof(typeof(*flow), src) + sizeof(flow->src)); 21 memcpy(&flow->src, &iph->saddr, sizeof(flow->src) + sizeof(flow->dst)); 22 } 23 24 bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow) 25 { 26 int poff, nhoff = skb_network_offset(skb); 27 u8 ip_proto; 28 __be16 proto = skb->protocol; 29 30 memset(flow, 0, sizeof(*flow)); 31 32 again: 33 switch (proto) { 34 case __constant_htons(ETH_P_IP): { 35 const struct iphdr *iph; 36 struct iphdr _iph; 37 ip: 38 iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); 39 if (!iph) 40 return false; 41 42 if (ip_is_fragment(iph)) 43 ip_proto = 0; 44 else 45 ip_proto = iph->protocol; 46 iph_to_flow_copy_addrs(flow, iph); 47 nhoff += iph->ihl * 4; 48 break; 49 } 50 case __constant_htons(ETH_P_IPV6): { 51 const struct ipv6hdr *iph; 52 struct ipv6hdr _iph; 53 ipv6: 54 iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); 55 if (!iph) 56 return false; 57 58 ip_proto = iph->nexthdr; 59 flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr); 60 flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr); 61 nhoff += sizeof(struct ipv6hdr); 62 break; 63 } 64 case __constant_htons(ETH_P_8021Q): { 65 const struct vlan_hdr *vlan; 66 struct vlan_hdr _vlan; 67 68 vlan = skb_header_pointer(skb, nhoff, sizeof(_vlan), &_vlan); 69 if (!vlan) 70 return false; 71 72 proto = vlan->h_vlan_encapsulated_proto; 73 nhoff += sizeof(*vlan); 74 goto again; 75 } 76 case __constant_htons(ETH_P_PPP_SES): { 77 struct { 78 struct pppoe_hdr hdr; 79 __be16 proto; 80 } *hdr, _hdr; 81 hdr = skb_header_pointer(skb, nhoff, sizeof(_hdr), &_hdr); 82 if (!hdr) 83 return false; 84 proto = hdr->proto; 85 nhoff += PPPOE_SES_HLEN; 86 switch (proto) { 87 case __constant_htons(PPP_IP): 88 goto ip; 89 case __constant_htons(PPP_IPV6): 90 goto ipv6; 91 default: 92 return false; 93 } 94 } 95 default: 96 return false; 97 } 98 99 switch (ip_proto) { 100 case IPPROTO_GRE: { 101 struct gre_hdr { 102 __be16 flags; 103 __be16 proto; 104 } *hdr, _hdr; 105 106 hdr = skb_header_pointer(skb, nhoff, sizeof(_hdr), &_hdr); 107 if (!hdr) 108 return false; 109 /* 110 * Only look inside GRE if version zero and no 111 * routing 112 */ 113 if (!(hdr->flags & (GRE_VERSION|GRE_ROUTING))) { 114 proto = hdr->proto; 115 nhoff += 4; 116 if (hdr->flags & GRE_CSUM) 117 nhoff += 4; 118 if (hdr->flags & GRE_KEY) 119 nhoff += 4; 120 if (hdr->flags & GRE_SEQ) 121 nhoff += 4; 122 goto again; 123 } 124 break; 125 } 126 case IPPROTO_IPIP: 127 goto again; 128 default: 129 break; 130 } 131 132 flow->ip_proto = ip_proto; 133 poff = proto_ports_offset(ip_proto); 134 if (poff >= 0) { 135 __be32 *ports, _ports; 136 137 nhoff += poff; 138 ports = skb_header_pointer(skb, nhoff, sizeof(_ports), &_ports); 139 if (ports) 140 flow->ports = *ports; 141 } 142 143 return true; 144 } 145 EXPORT_SYMBOL(skb_flow_dissect); 146 147 static u32 hashrnd __read_mostly; 148 149 /* 150 * __skb_get_rxhash: calculate a flow hash based on src/dst addresses 151 * and src/dst port numbers. Sets rxhash in skb to non-zero hash value 152 * on success, zero indicates no valid hash. Also, sets l4_rxhash in skb 153 * if hash is a canonical 4-tuple hash over transport ports. 154 */ 155 void __skb_get_rxhash(struct sk_buff *skb) 156 { 157 struct flow_keys keys; 158 u32 hash; 159 160 if (!skb_flow_dissect(skb, &keys)) 161 return; 162 163 if (keys.ports) 164 skb->l4_rxhash = 1; 165 166 /* get a consistent hash (same value on both flow directions) */ 167 if (((__force u32)keys.dst < (__force u32)keys.src) || 168 (((__force u32)keys.dst == (__force u32)keys.src) && 169 ((__force u16)keys.port16[1] < (__force u16)keys.port16[0]))) { 170 swap(keys.dst, keys.src); 171 swap(keys.port16[0], keys.port16[1]); 172 } 173 174 hash = jhash_3words((__force u32)keys.dst, 175 (__force u32)keys.src, 176 (__force u32)keys.ports, hashrnd); 177 if (!hash) 178 hash = 1; 179 180 skb->rxhash = hash; 181 } 182 EXPORT_SYMBOL(__skb_get_rxhash); 183 184 /* 185 * Returns a Tx hash based on the given packet descriptor a Tx queues' number 186 * to be used as a distribution range. 187 */ 188 u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb, 189 unsigned int num_tx_queues) 190 { 191 u32 hash; 192 u16 qoffset = 0; 193 u16 qcount = num_tx_queues; 194 195 if (skb_rx_queue_recorded(skb)) { 196 hash = skb_get_rx_queue(skb); 197 while (unlikely(hash >= num_tx_queues)) 198 hash -= num_tx_queues; 199 return hash; 200 } 201 202 if (dev->num_tc) { 203 u8 tc = netdev_get_prio_tc_map(dev, skb->priority); 204 qoffset = dev->tc_to_txq[tc].offset; 205 qcount = dev->tc_to_txq[tc].count; 206 } 207 208 if (skb->sk && skb->sk->sk_hash) 209 hash = skb->sk->sk_hash; 210 else 211 hash = (__force u16) skb->protocol; 212 hash = jhash_1word(hash, hashrnd); 213 214 return (u16) (((u64) hash * qcount) >> 32) + qoffset; 215 } 216 EXPORT_SYMBOL(__skb_tx_hash); 217 218 static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index) 219 { 220 if (unlikely(queue_index >= dev->real_num_tx_queues)) { 221 net_warn_ratelimited("%s selects TX queue %d, but real number of TX queues is %d\n", 222 dev->name, queue_index, 223 dev->real_num_tx_queues); 224 return 0; 225 } 226 return queue_index; 227 } 228 229 static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) 230 { 231 #ifdef CONFIG_XPS 232 struct xps_dev_maps *dev_maps; 233 struct xps_map *map; 234 int queue_index = -1; 235 236 rcu_read_lock(); 237 dev_maps = rcu_dereference(dev->xps_maps); 238 if (dev_maps) { 239 map = rcu_dereference( 240 dev_maps->cpu_map[raw_smp_processor_id()]); 241 if (map) { 242 if (map->len == 1) 243 queue_index = map->queues[0]; 244 else { 245 u32 hash; 246 if (skb->sk && skb->sk->sk_hash) 247 hash = skb->sk->sk_hash; 248 else 249 hash = (__force u16) skb->protocol ^ 250 skb->rxhash; 251 hash = jhash_1word(hash, hashrnd); 252 queue_index = map->queues[ 253 ((u64)hash * map->len) >> 32]; 254 } 255 if (unlikely(queue_index >= dev->real_num_tx_queues)) 256 queue_index = -1; 257 } 258 } 259 rcu_read_unlock(); 260 261 return queue_index; 262 #else 263 return -1; 264 #endif 265 } 266 267 u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb) 268 { 269 struct sock *sk = skb->sk; 270 int queue_index = sk_tx_queue_get(sk); 271 272 if (queue_index < 0 || skb->ooo_okay || 273 queue_index >= dev->real_num_tx_queues) { 274 int new_index = get_xps_queue(dev, skb); 275 if (new_index < 0) 276 new_index = skb_tx_hash(dev, skb); 277 278 if (queue_index != new_index && sk) { 279 struct dst_entry *dst = 280 rcu_dereference_check(sk->sk_dst_cache, 1); 281 282 if (dst && skb_dst(skb) == dst) 283 sk_tx_queue_set(sk, queue_index); 284 285 } 286 287 queue_index = new_index; 288 } 289 290 return queue_index; 291 } 292 EXPORT_SYMBOL(__netdev_pick_tx); 293 294 struct netdev_queue *netdev_pick_tx(struct net_device *dev, 295 struct sk_buff *skb) 296 { 297 int queue_index = 0; 298 299 if (dev->real_num_tx_queues != 1) { 300 const struct net_device_ops *ops = dev->netdev_ops; 301 if (ops->ndo_select_queue) 302 queue_index = ops->ndo_select_queue(dev, skb); 303 else 304 queue_index = __netdev_pick_tx(dev, skb); 305 queue_index = dev_cap_txqueue(dev, queue_index); 306 } 307 308 skb_set_queue_mapping(skb, queue_index); 309 return netdev_get_tx_queue(dev, queue_index); 310 } 311 312 static int __init initialize_hashrnd(void) 313 { 314 get_random_bytes(&hashrnd, sizeof(hashrnd)); 315 return 0; 316 } 317 318 late_initcall_sync(initialize_hashrnd); 319