1 /* 2 * Copyright(c) 2017 Intel Corporation. 3 * 4 * This file is provided under a dual BSD/GPLv2 license. When using or 5 * redistributing this file, you may do so under either license. 6 * 7 * GPL LICENSE SUMMARY 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of version 2 of the GNU General Public License as 11 * published by the Free Software Foundation. 12 * 13 * This program is distributed in the hope that it will be useful, but 14 * WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * General Public License for more details. 17 * 18 * BSD LICENSE 19 * 20 * Redistribution and use in source and binary forms, with or without 21 * modification, are permitted provided that the following conditions 22 * are met: 23 * 24 * - Redistributions of source code must retain the above copyright 25 * notice, this list of conditions and the following disclaimer. 26 * - Redistributions in binary form must reproduce the above copyright 27 * notice, this list of conditions and the following disclaimer in 28 * the documentation and/or other materials provided with the 29 * distribution. 30 * - Neither the name of Intel Corporation nor the names of its 31 * contributors may be used to endorse or promote products derived 32 * from this software without specific prior written permission. 33 * 34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45 * 46 */ 47 48 /* 49 * This file contains OPA VNIC encapsulation/decapsulation function. 50 */ 51 52 #include <linux/if_ether.h> 53 #include <linux/if_vlan.h> 54 55 #include "opa_vnic_internal.h" 56 57 /* OPA 16B Header fields */ 58 #define OPA_16B_LID_MASK 0xFFFFFull 59 #define OPA_16B_SLID_HIGH_SHFT 8 60 #define OPA_16B_SLID_MASK 0xF00ull 61 #define OPA_16B_DLID_MASK 0xF000ull 62 #define OPA_16B_DLID_HIGH_SHFT 12 63 #define OPA_16B_LEN_SHFT 20 64 #define OPA_16B_SC_SHFT 20 65 #define OPA_16B_RC_SHFT 25 66 #define OPA_16B_PKEY_SHFT 16 67 68 #define OPA_VNIC_L4_HDR_SHFT 16 69 70 /* L2+L4 hdr len is 20 bytes (5 quad words) */ 71 #define OPA_VNIC_HDR_QW_LEN 5 72 73 static inline void opa_vnic_make_header(u8 *hdr, u32 slid, u32 dlid, u16 len, 74 u16 pkey, u16 entropy, u8 sc, u8 rc, 75 u8 l4_type, u16 l4_hdr) 76 { 77 /* h[1]: LT=1, 16B L2=10 */ 78 u32 h[OPA_VNIC_HDR_QW_LEN] = {0, 0xc0000000, 0, 0, 0}; 79 80 h[2] = l4_type; 81 h[3] = entropy; 82 h[4] = l4_hdr << OPA_VNIC_L4_HDR_SHFT; 83 84 /* Extract and set 4 upper bits and 20 lower bits of the lids */ 85 h[0] |= (slid & OPA_16B_LID_MASK); 86 h[2] |= ((slid >> (20 - OPA_16B_SLID_HIGH_SHFT)) & OPA_16B_SLID_MASK); 87 88 h[1] |= (dlid & OPA_16B_LID_MASK); 89 h[2] |= ((dlid >> (20 - OPA_16B_DLID_HIGH_SHFT)) & OPA_16B_DLID_MASK); 90 91 h[0] |= (len << OPA_16B_LEN_SHFT); 92 h[1] |= (rc << OPA_16B_RC_SHFT); 93 h[1] |= (sc << OPA_16B_SC_SHFT); 94 h[2] |= ((u32)pkey << OPA_16B_PKEY_SHFT); 95 96 memcpy(hdr, h, OPA_VNIC_HDR_LEN); 97 } 98 99 /* 100 * Using a simple hash table for mac table implementation with the last octet 101 * of mac address as a key. 102 */ 103 static void opa_vnic_free_mac_tbl(struct hlist_head *mactbl) 104 { 105 struct opa_vnic_mac_tbl_node *node; 106 struct hlist_node *tmp; 107 int bkt; 108 109 if (!mactbl) 110 return; 111 112 vnic_hash_for_each_safe(mactbl, bkt, tmp, node, hlist) { 113 hash_del(&node->hlist); 114 kfree(node); 115 } 116 kfree(mactbl); 117 } 118 119 static struct hlist_head *opa_vnic_alloc_mac_tbl(void) 120 { 121 u32 size = sizeof(struct hlist_head) * OPA_VNIC_MAC_TBL_SIZE; 122 struct hlist_head *mactbl; 123 124 mactbl = kzalloc(size, GFP_KERNEL); 125 if (!mactbl) 126 return ERR_PTR(-ENOMEM); 127 128 vnic_hash_init(mactbl); 129 return mactbl; 130 } 131 132 /* opa_vnic_release_mac_tbl - empty and free the mac table */ 133 void opa_vnic_release_mac_tbl(struct opa_vnic_adapter *adapter) 134 { 135 struct hlist_head *mactbl; 136 137 mutex_lock(&adapter->mactbl_lock); 138 mactbl = rcu_access_pointer(adapter->mactbl); 139 rcu_assign_pointer(adapter->mactbl, NULL); 140 synchronize_rcu(); 141 opa_vnic_free_mac_tbl(mactbl); 142 mutex_unlock(&adapter->mactbl_lock); 143 } 144 145 /* 146 * opa_vnic_query_mac_tbl - query the mac table for a section 147 * 148 * This function implements query of specific function of the mac table. 149 * The function also expects the requested range to be valid. 150 */ 151 void opa_vnic_query_mac_tbl(struct opa_vnic_adapter *adapter, 152 struct opa_veswport_mactable *tbl) 153 { 154 struct opa_vnic_mac_tbl_node *node; 155 struct hlist_head *mactbl; 156 int bkt; 157 u16 loffset, lnum_entries; 158 159 rcu_read_lock(); 160 mactbl = rcu_dereference(adapter->mactbl); 161 if (!mactbl) 162 goto get_mac_done; 163 164 loffset = be16_to_cpu(tbl->offset); 165 lnum_entries = be16_to_cpu(tbl->num_entries); 166 167 vnic_hash_for_each(mactbl, bkt, node, hlist) { 168 struct __opa_vnic_mactable_entry *nentry = &node->entry; 169 struct opa_veswport_mactable_entry *entry; 170 171 if ((node->index < loffset) || 172 (node->index >= (loffset + lnum_entries))) 173 continue; 174 175 /* populate entry in the tbl corresponding to the index */ 176 entry = &tbl->tbl_entries[node->index - loffset]; 177 memcpy(entry->mac_addr, nentry->mac_addr, 178 ARRAY_SIZE(entry->mac_addr)); 179 memcpy(entry->mac_addr_mask, nentry->mac_addr_mask, 180 ARRAY_SIZE(entry->mac_addr_mask)); 181 entry->dlid_sd = cpu_to_be32(nentry->dlid_sd); 182 } 183 tbl->mac_tbl_digest = cpu_to_be32(adapter->info.vport.mac_tbl_digest); 184 get_mac_done: 185 rcu_read_unlock(); 186 } 187 188 /* 189 * opa_vnic_update_mac_tbl - update mac table section 190 * 191 * This function updates the specified section of the mac table. 192 * The procedure includes following steps. 193 * - Allocate a new mac (hash) table. 194 * - Add the specified entries to the new table. 195 * (except the ones that are requested to be deleted). 196 * - Add all the other entries from the old mac table. 197 * - If there is a failure, free the new table and return. 198 * - Switch to the new table. 199 * - Free the old table and return. 200 * 201 * The function also expects the requested range to be valid. 202 */ 203 int opa_vnic_update_mac_tbl(struct opa_vnic_adapter *adapter, 204 struct opa_veswport_mactable *tbl) 205 { 206 struct opa_vnic_mac_tbl_node *node, *new_node; 207 struct hlist_head *new_mactbl, *old_mactbl; 208 int i, bkt, rc = 0; 209 u8 key; 210 u16 loffset, lnum_entries; 211 212 mutex_lock(&adapter->mactbl_lock); 213 /* allocate new mac table */ 214 new_mactbl = opa_vnic_alloc_mac_tbl(); 215 if (IS_ERR(new_mactbl)) { 216 mutex_unlock(&adapter->mactbl_lock); 217 return PTR_ERR(new_mactbl); 218 } 219 220 loffset = be16_to_cpu(tbl->offset); 221 lnum_entries = be16_to_cpu(tbl->num_entries); 222 223 /* add updated entries to the new mac table */ 224 for (i = 0; i < lnum_entries; i++) { 225 struct __opa_vnic_mactable_entry *nentry; 226 struct opa_veswport_mactable_entry *entry = 227 &tbl->tbl_entries[i]; 228 u8 *mac_addr = entry->mac_addr; 229 u8 empty_mac[ETH_ALEN] = { 0 }; 230 231 v_dbg("new mac entry %4d: %02x:%02x:%02x:%02x:%02x:%02x %x\n", 232 loffset + i, mac_addr[0], mac_addr[1], mac_addr[2], 233 mac_addr[3], mac_addr[4], mac_addr[5], 234 entry->dlid_sd); 235 236 /* if the entry is being removed, do not add it */ 237 if (!memcmp(mac_addr, empty_mac, ARRAY_SIZE(empty_mac))) 238 continue; 239 240 node = kzalloc(sizeof(*node), GFP_KERNEL); 241 if (!node) { 242 rc = -ENOMEM; 243 goto updt_done; 244 } 245 246 node->index = loffset + i; 247 nentry = &node->entry; 248 memcpy(nentry->mac_addr, entry->mac_addr, 249 ARRAY_SIZE(nentry->mac_addr)); 250 memcpy(nentry->mac_addr_mask, entry->mac_addr_mask, 251 ARRAY_SIZE(nentry->mac_addr_mask)); 252 nentry->dlid_sd = be32_to_cpu(entry->dlid_sd); 253 key = node->entry.mac_addr[OPA_VNIC_MAC_HASH_IDX]; 254 vnic_hash_add(new_mactbl, &node->hlist, key); 255 } 256 257 /* add other entries from current mac table to new mac table */ 258 old_mactbl = rcu_access_pointer(adapter->mactbl); 259 if (!old_mactbl) 260 goto switch_tbl; 261 262 vnic_hash_for_each(old_mactbl, bkt, node, hlist) { 263 if ((node->index >= loffset) && 264 (node->index < (loffset + lnum_entries))) 265 continue; 266 267 new_node = kzalloc(sizeof(*new_node), GFP_KERNEL); 268 if (!new_node) { 269 rc = -ENOMEM; 270 goto updt_done; 271 } 272 273 new_node->index = node->index; 274 memcpy(&new_node->entry, &node->entry, sizeof(node->entry)); 275 key = new_node->entry.mac_addr[OPA_VNIC_MAC_HASH_IDX]; 276 vnic_hash_add(new_mactbl, &new_node->hlist, key); 277 } 278 279 switch_tbl: 280 /* switch to new table */ 281 rcu_assign_pointer(adapter->mactbl, new_mactbl); 282 synchronize_rcu(); 283 284 adapter->info.vport.mac_tbl_digest = be32_to_cpu(tbl->mac_tbl_digest); 285 updt_done: 286 /* upon failure, free the new table; otherwise, free the old table */ 287 if (rc) 288 opa_vnic_free_mac_tbl(new_mactbl); 289 else 290 opa_vnic_free_mac_tbl(old_mactbl); 291 292 mutex_unlock(&adapter->mactbl_lock); 293 return rc; 294 } 295 296 /* opa_vnic_chk_mac_tbl - check mac table for dlid */ 297 static uint32_t opa_vnic_chk_mac_tbl(struct opa_vnic_adapter *adapter, 298 struct ethhdr *mac_hdr) 299 { 300 struct opa_vnic_mac_tbl_node *node; 301 struct hlist_head *mactbl; 302 u32 dlid = 0; 303 u8 key; 304 305 rcu_read_lock(); 306 mactbl = rcu_dereference(adapter->mactbl); 307 if (unlikely(!mactbl)) 308 goto chk_done; 309 310 key = mac_hdr->h_dest[OPA_VNIC_MAC_HASH_IDX]; 311 vnic_hash_for_each_possible(mactbl, node, hlist, key) { 312 struct __opa_vnic_mactable_entry *entry = &node->entry; 313 314 /* if related to source mac, skip */ 315 if (unlikely(OPA_VNIC_DLID_SD_IS_SRC_MAC(entry->dlid_sd))) 316 continue; 317 318 if (!memcmp(node->entry.mac_addr, mac_hdr->h_dest, 319 ARRAY_SIZE(node->entry.mac_addr))) { 320 /* mac address found */ 321 dlid = OPA_VNIC_DLID_SD_GET_DLID(node->entry.dlid_sd); 322 break; 323 } 324 } 325 326 chk_done: 327 rcu_read_unlock(); 328 return dlid; 329 } 330 331 /* opa_vnic_get_dlid - find and return the DLID */ 332 static uint32_t opa_vnic_get_dlid(struct opa_vnic_adapter *adapter, 333 struct sk_buff *skb, u8 def_port) 334 { 335 struct __opa_veswport_info *info = &adapter->info; 336 struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb); 337 u32 dlid; 338 339 dlid = opa_vnic_chk_mac_tbl(adapter, mac_hdr); 340 if (dlid) 341 return dlid; 342 343 if (is_multicast_ether_addr(mac_hdr->h_dest)) { 344 dlid = info->vesw.u_mcast_dlid; 345 } else { 346 if (is_local_ether_addr(mac_hdr->h_dest)) { 347 dlid = ((uint32_t)mac_hdr->h_dest[5] << 16) | 348 ((uint32_t)mac_hdr->h_dest[4] << 8) | 349 mac_hdr->h_dest[3]; 350 if (unlikely(!dlid)) 351 v_warn("Null dlid in MAC address\n"); 352 } else if (def_port != OPA_VNIC_INVALID_PORT) { 353 dlid = info->vesw.u_ucast_dlid[def_port]; 354 } 355 } 356 357 return dlid; 358 } 359 360 /* opa_vnic_get_sc - return the service class */ 361 static u8 opa_vnic_get_sc(struct __opa_veswport_info *info, 362 struct sk_buff *skb) 363 { 364 struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb); 365 u16 vlan_tci; 366 u8 sc; 367 368 if (!__vlan_get_tag(skb, &vlan_tci)) { 369 u8 pcp = OPA_VNIC_VLAN_PCP(vlan_tci); 370 371 if (is_multicast_ether_addr(mac_hdr->h_dest)) 372 sc = info->vport.pcp_to_sc_mc[pcp]; 373 else 374 sc = info->vport.pcp_to_sc_uc[pcp]; 375 } else { 376 if (is_multicast_ether_addr(mac_hdr->h_dest)) 377 sc = info->vport.non_vlan_sc_mc; 378 else 379 sc = info->vport.non_vlan_sc_uc; 380 } 381 382 return sc; 383 } 384 385 u8 opa_vnic_get_vl(struct opa_vnic_adapter *adapter, struct sk_buff *skb) 386 { 387 struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb); 388 struct __opa_veswport_info *info = &adapter->info; 389 u8 vl; 390 391 if (skb_vlan_tag_present(skb)) { 392 u8 pcp = skb_vlan_tag_get(skb) >> VLAN_PRIO_SHIFT; 393 394 if (is_multicast_ether_addr(mac_hdr->h_dest)) 395 vl = info->vport.pcp_to_vl_mc[pcp]; 396 else 397 vl = info->vport.pcp_to_vl_uc[pcp]; 398 } else { 399 if (is_multicast_ether_addr(mac_hdr->h_dest)) 400 vl = info->vport.non_vlan_vl_mc; 401 else 402 vl = info->vport.non_vlan_vl_uc; 403 } 404 405 return vl; 406 } 407 408 /* opa_vnic_calc_entropy - calculate the packet entropy */ 409 u8 opa_vnic_calc_entropy(struct opa_vnic_adapter *adapter, struct sk_buff *skb) 410 { 411 u16 hash16; 412 413 /* 414 * Get flow based 16-bit hash and then XOR the upper and lower bytes 415 * to get the entropy. 416 * __skb_tx_hash limits qcount to 16 bits. Hence, get 15-bit hash. 417 */ 418 hash16 = __skb_tx_hash(adapter->netdev, skb, BIT(15)); 419 return (u8)((hash16 >> 8) ^ (hash16 & 0xff)); 420 } 421 422 /* opa_vnic_get_def_port - get default port based on entropy */ 423 static inline u8 opa_vnic_get_def_port(struct opa_vnic_adapter *adapter, 424 u8 entropy) 425 { 426 u8 flow_id; 427 428 /* Add the upper and lower 4-bits of entropy to get the flow id */ 429 flow_id = ((entropy & 0xf) + (entropy >> 4)); 430 return adapter->flow_tbl[flow_id & (OPA_VNIC_FLOW_TBL_SIZE - 1)]; 431 } 432 433 /* Calculate packet length including OPA header, crc and padding */ 434 static inline int opa_vnic_wire_length(struct sk_buff *skb) 435 { 436 u32 pad_len; 437 438 /* padding for 8 bytes size alignment */ 439 pad_len = -(skb->len + OPA_VNIC_ICRC_TAIL_LEN) & 0x7; 440 pad_len += OPA_VNIC_ICRC_TAIL_LEN; 441 442 return (skb->len + pad_len) >> 3; 443 } 444 445 /* opa_vnic_encap_skb - encapsulate skb packet with OPA header and meta data */ 446 void opa_vnic_encap_skb(struct opa_vnic_adapter *adapter, struct sk_buff *skb) 447 { 448 struct __opa_veswport_info *info = &adapter->info; 449 struct opa_vnic_skb_mdata *mdata; 450 u8 def_port, sc, entropy, *hdr; 451 u16 len, l4_hdr; 452 u32 dlid; 453 454 hdr = skb_push(skb, OPA_VNIC_HDR_LEN); 455 456 entropy = opa_vnic_calc_entropy(adapter, skb); 457 def_port = opa_vnic_get_def_port(adapter, entropy); 458 len = opa_vnic_wire_length(skb); 459 dlid = opa_vnic_get_dlid(adapter, skb, def_port); 460 sc = opa_vnic_get_sc(info, skb); 461 l4_hdr = info->vesw.vesw_id; 462 463 mdata = skb_push(skb, sizeof(*mdata)); 464 mdata->vl = opa_vnic_get_vl(adapter, skb); 465 mdata->entropy = entropy; 466 mdata->flags = 0; 467 if (unlikely(!dlid)) { 468 mdata->flags = OPA_VNIC_SKB_MDATA_ENCAP_ERR; 469 return; 470 } 471 472 opa_vnic_make_header(hdr, info->vport.encap_slid, dlid, len, 473 info->vesw.pkey, entropy, sc, 0, 474 OPA_VNIC_L4_ETHR, l4_hdr); 475 } 476