1 /* 2 * Copyright (c) 2017 Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 * 32 */ 33 34 #include <crypto/internal/geniv.h> 35 #include <crypto/aead.h> 36 #include <linux/inetdevice.h> 37 #include <linux/netdevice.h> 38 #include <net/netevent.h> 39 40 #include "en.h" 41 #include "ipsec.h" 42 #include "ipsec_rxtx.h" 43 44 #define MLX5_IPSEC_RESCHED msecs_to_jiffies(1000) 45 #define MLX5E_IPSEC_TUNNEL_SA XA_MARK_1 46 47 static struct mlx5e_ipsec_sa_entry *to_ipsec_sa_entry(struct xfrm_state *x) 48 { 49 return (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle; 50 } 51 52 static struct mlx5e_ipsec_pol_entry *to_ipsec_pol_entry(struct xfrm_policy *x) 53 { 54 return (struct mlx5e_ipsec_pol_entry *)x->xdo.offload_handle; 55 } 56 57 static void mlx5e_ipsec_handle_tx_limit(struct work_struct *_work) 58 { 59 struct mlx5e_ipsec_dwork *dwork = 60 container_of(_work, struct mlx5e_ipsec_dwork, dwork.work); 61 struct mlx5e_ipsec_sa_entry *sa_entry = dwork->sa_entry; 62 struct xfrm_state *x = sa_entry->x; 63 64 if (sa_entry->attrs.drop) 65 return; 66 67 spin_lock_bh(&x->lock); 68 xfrm_state_check_expire(x); 69 if (x->km.state == XFRM_STATE_EXPIRED) { 70 sa_entry->attrs.drop = true; 71 spin_unlock_bh(&x->lock); 72 73 mlx5e_accel_ipsec_fs_modify(sa_entry); 74 return; 75 } 76 spin_unlock_bh(&x->lock); 77 78 queue_delayed_work(sa_entry->ipsec->wq, &dwork->dwork, 79 MLX5_IPSEC_RESCHED); 80 } 81 82 static bool mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry) 83 { 84 struct xfrm_state *x = sa_entry->x; 85 u32 seq_bottom = 0; 86 u32 esn, esn_msb; 87 u8 overlap; 88 89 switch (x->xso.type) { 90 case XFRM_DEV_OFFLOAD_PACKET: 91 switch (x->xso.dir) { 92 case XFRM_DEV_OFFLOAD_IN: 93 esn = x->replay_esn->seq; 94 esn_msb = x->replay_esn->seq_hi; 95 break; 96 case XFRM_DEV_OFFLOAD_OUT: 97 esn = x->replay_esn->oseq; 98 esn_msb = x->replay_esn->oseq_hi; 99 break; 100 default: 101 WARN_ON(true); 102 return false; 103 } 104 break; 105 case XFRM_DEV_OFFLOAD_CRYPTO: 106 /* Already parsed by XFRM core */ 107 esn = x->replay_esn->seq; 108 break; 109 default: 110 WARN_ON(true); 111 return false; 112 } 113 114 overlap = sa_entry->esn_state.overlap; 115 116 if (esn >= x->replay_esn->replay_window) 117 seq_bottom = esn - x->replay_esn->replay_window + 1; 118 119 if (x->xso.type == XFRM_DEV_OFFLOAD_CRYPTO) 120 esn_msb = xfrm_replay_seqhi(x, htonl(seq_bottom)); 121 122 sa_entry->esn_state.esn = esn; 123 sa_entry->esn_state.esn_msb = esn_msb; 124 125 if (unlikely(overlap && seq_bottom < MLX5E_IPSEC_ESN_SCOPE_MID)) { 126 sa_entry->esn_state.overlap = 0; 127 return true; 128 } else if (unlikely(!overlap && 129 (seq_bottom >= MLX5E_IPSEC_ESN_SCOPE_MID))) { 130 sa_entry->esn_state.overlap = 1; 131 return true; 132 } 133 134 return false; 135 } 136 137 static void mlx5e_ipsec_init_limits(struct mlx5e_ipsec_sa_entry *sa_entry, 138 struct mlx5_accel_esp_xfrm_attrs *attrs) 139 { 140 struct xfrm_state *x = sa_entry->x; 141 s64 start_value, n; 142 143 attrs->lft.hard_packet_limit = x->lft.hard_packet_limit; 144 attrs->lft.soft_packet_limit = x->lft.soft_packet_limit; 145 if (x->lft.soft_packet_limit == XFRM_INF) 146 return; 147 148 /* Compute hard limit initial value and number of rounds. 149 * 150 * The counting pattern of hardware counter goes: 151 * value -> 2^31-1 152 * 2^31 | (2^31-1) -> 2^31-1 153 * 2^31 | (2^31-1) -> 2^31-1 154 * [..] 155 * 2^31 | (2^31-1) -> 0 156 * 157 * The pattern is created by using an ASO operation to atomically set 158 * bit 31 after the down counter clears bit 31. This is effectively an 159 * atomic addition of 2**31 to the counter. 160 * 161 * We wish to configure the counter, within the above pattern, so that 162 * when it reaches 0, it has hit the hard limit. This is defined by this 163 * system of equations: 164 * 165 * hard_limit == start_value + n * 2^31 166 * n >= 0 167 * start_value < 2^32, start_value >= 0 168 * 169 * These equations are not single-solution, there are often two choices: 170 * hard_limit == start_value + n * 2^31 171 * hard_limit == (start_value+2^31) + (n-1) * 2^31 172 * 173 * The algorithm selects the solution that keeps the counter value 174 * above 2^31 until the final iteration. 175 */ 176 177 /* Start by estimating n and compute start_value */ 178 n = attrs->lft.hard_packet_limit / BIT_ULL(31); 179 start_value = attrs->lft.hard_packet_limit - n * BIT_ULL(31); 180 181 /* Choose the best of the two solutions: */ 182 if (n >= 1) 183 n -= 1; 184 185 /* Computed values solve the system of equations: */ 186 start_value = attrs->lft.hard_packet_limit - n * BIT_ULL(31); 187 188 /* The best solution means: when there are multiple iterations we must 189 * start above 2^31 and count down to 2**31 to get the interrupt. 190 */ 191 attrs->lft.hard_packet_limit = lower_32_bits(start_value); 192 attrs->lft.numb_rounds_hard = (u64)n; 193 194 /* Compute soft limit initial value and number of rounds. 195 * 196 * The soft_limit is achieved by adjusting the counter's 197 * interrupt_value. This is embedded in the counting pattern created by 198 * hard packet calculations above. 199 * 200 * We wish to compute the interrupt_value for the soft_limit. This is 201 * defined by this system of equations: 202 * 203 * soft_limit == start_value - soft_value + n * 2^31 204 * n >= 0 205 * soft_value < 2^32, soft_value >= 0 206 * for n == 0 start_value > soft_value 207 * 208 * As with compute_hard_n_value() the equations are not single-solution. 209 * The algorithm selects the solution that has: 210 * 2^30 <= soft_limit < 2^31 + 2^30 211 * for the interior iterations, which guarantees a large guard band 212 * around the counter hard limit and next interrupt. 213 */ 214 215 /* Start by estimating n and compute soft_value */ 216 n = (x->lft.soft_packet_limit - attrs->lft.hard_packet_limit) / BIT_ULL(31); 217 start_value = attrs->lft.hard_packet_limit + n * BIT_ULL(31) - 218 x->lft.soft_packet_limit; 219 220 /* Compare against constraints and adjust n */ 221 if (n < 0) 222 n = 0; 223 else if (start_value >= BIT_ULL(32)) 224 n -= 1; 225 else if (start_value < 0) 226 n += 1; 227 228 /* Choose the best of the two solutions: */ 229 start_value = attrs->lft.hard_packet_limit + n * BIT_ULL(31) - start_value; 230 if (n != attrs->lft.numb_rounds_hard && start_value < BIT_ULL(30)) 231 n += 1; 232 233 /* Note that the upper limit of soft_value happens naturally because we 234 * always select the lowest soft_value. 235 */ 236 237 /* Computed values solve the system of equations: */ 238 start_value = attrs->lft.hard_packet_limit + n * BIT_ULL(31) - start_value; 239 240 /* The best solution means: when there are multiple iterations we must 241 * not fall below 2^30 as that would get too close to the false 242 * hard_limit and when we reach an interior iteration for soft_limit it 243 * has to be far away from 2**32-1 which is the counter reset point 244 * after the +2^31 to accommodate latency. 245 */ 246 attrs->lft.soft_packet_limit = lower_32_bits(start_value); 247 attrs->lft.numb_rounds_soft = (u64)n; 248 } 249 250 static void mlx5e_ipsec_init_macs(struct mlx5e_ipsec_sa_entry *sa_entry, 251 struct mlx5_accel_esp_xfrm_attrs *attrs) 252 { 253 struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry); 254 struct xfrm_state *x = sa_entry->x; 255 struct net_device *netdev; 256 struct neighbour *n; 257 u8 addr[ETH_ALEN]; 258 const void *pkey; 259 u8 *dst, *src; 260 261 if (attrs->mode != XFRM_MODE_TUNNEL || 262 attrs->type != XFRM_DEV_OFFLOAD_PACKET) 263 return; 264 265 netdev = x->xso.real_dev; 266 267 mlx5_query_mac_address(mdev, addr); 268 switch (attrs->dir) { 269 case XFRM_DEV_OFFLOAD_IN: 270 src = attrs->dmac; 271 dst = attrs->smac; 272 pkey = &attrs->saddr.a4; 273 break; 274 case XFRM_DEV_OFFLOAD_OUT: 275 src = attrs->smac; 276 dst = attrs->dmac; 277 pkey = &attrs->daddr.a4; 278 break; 279 default: 280 return; 281 } 282 283 ether_addr_copy(src, addr); 284 n = neigh_lookup(&arp_tbl, pkey, netdev); 285 if (!n) { 286 n = neigh_create(&arp_tbl, pkey, netdev); 287 if (IS_ERR(n)) 288 return; 289 neigh_event_send(n, NULL); 290 attrs->drop = true; 291 } else { 292 neigh_ha_snapshot(addr, n, netdev); 293 ether_addr_copy(dst, addr); 294 } 295 neigh_release(n); 296 } 297 298 void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry, 299 struct mlx5_accel_esp_xfrm_attrs *attrs) 300 { 301 struct xfrm_state *x = sa_entry->x; 302 struct aes_gcm_keymat *aes_gcm = &attrs->aes_gcm; 303 struct aead_geniv_ctx *geniv_ctx; 304 struct crypto_aead *aead; 305 unsigned int crypto_data_len, key_len; 306 int ivsize; 307 308 memset(attrs, 0, sizeof(*attrs)); 309 310 /* key */ 311 crypto_data_len = (x->aead->alg_key_len + 7) / 8; 312 key_len = crypto_data_len - 4; /* 4 bytes salt at end */ 313 314 memcpy(aes_gcm->aes_key, x->aead->alg_key, key_len); 315 aes_gcm->key_len = key_len * 8; 316 317 /* salt and seq_iv */ 318 aead = x->data; 319 geniv_ctx = crypto_aead_ctx(aead); 320 ivsize = crypto_aead_ivsize(aead); 321 memcpy(&aes_gcm->seq_iv, &geniv_ctx->salt, ivsize); 322 memcpy(&aes_gcm->salt, x->aead->alg_key + key_len, 323 sizeof(aes_gcm->salt)); 324 325 attrs->authsize = crypto_aead_authsize(aead) / 4; /* in dwords */ 326 327 /* iv len */ 328 aes_gcm->icv_len = x->aead->alg_icv_len; 329 330 /* esn */ 331 if (x->props.flags & XFRM_STATE_ESN) { 332 attrs->replay_esn.trigger = true; 333 attrs->replay_esn.esn = sa_entry->esn_state.esn; 334 attrs->replay_esn.esn_msb = sa_entry->esn_state.esn_msb; 335 attrs->replay_esn.overlap = sa_entry->esn_state.overlap; 336 } 337 338 attrs->dir = x->xso.dir; 339 /* spi */ 340 attrs->spi = be32_to_cpu(x->id.spi); 341 342 /* source , destination ips */ 343 memcpy(&attrs->saddr, x->props.saddr.a6, sizeof(attrs->saddr)); 344 memcpy(&attrs->daddr, x->id.daddr.a6, sizeof(attrs->daddr)); 345 attrs->family = x->props.family; 346 attrs->type = x->xso.type; 347 attrs->reqid = x->props.reqid; 348 attrs->upspec.dport = ntohs(x->sel.dport); 349 attrs->upspec.dport_mask = ntohs(x->sel.dport_mask); 350 attrs->upspec.sport = ntohs(x->sel.sport); 351 attrs->upspec.sport_mask = ntohs(x->sel.sport_mask); 352 attrs->upspec.proto = x->sel.proto; 353 attrs->mode = x->props.mode; 354 355 mlx5e_ipsec_init_limits(sa_entry, attrs); 356 mlx5e_ipsec_init_macs(sa_entry, attrs); 357 } 358 359 static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev, 360 struct xfrm_state *x, 361 struct netlink_ext_ack *extack) 362 { 363 if (x->props.aalgo != SADB_AALG_NONE) { 364 NL_SET_ERR_MSG_MOD(extack, "Cannot offload authenticated xfrm states"); 365 return -EINVAL; 366 } 367 if (x->props.ealgo != SADB_X_EALG_AES_GCM_ICV16) { 368 NL_SET_ERR_MSG_MOD(extack, "Only AES-GCM-ICV16 xfrm state may be offloaded"); 369 return -EINVAL; 370 } 371 if (x->props.calgo != SADB_X_CALG_NONE) { 372 NL_SET_ERR_MSG_MOD(extack, "Cannot offload compressed xfrm states"); 373 return -EINVAL; 374 } 375 if (x->props.flags & XFRM_STATE_ESN && 376 !(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_ESN)) { 377 NL_SET_ERR_MSG_MOD(extack, "Cannot offload ESN xfrm states"); 378 return -EINVAL; 379 } 380 if (x->props.family != AF_INET && 381 x->props.family != AF_INET6) { 382 NL_SET_ERR_MSG_MOD(extack, "Only IPv4/6 xfrm states may be offloaded"); 383 return -EINVAL; 384 } 385 if (x->id.proto != IPPROTO_ESP) { 386 NL_SET_ERR_MSG_MOD(extack, "Only ESP xfrm state may be offloaded"); 387 return -EINVAL; 388 } 389 if (x->encap) { 390 NL_SET_ERR_MSG_MOD(extack, "Encapsulated xfrm state may not be offloaded"); 391 return -EINVAL; 392 } 393 if (!x->aead) { 394 NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states without aead"); 395 return -EINVAL; 396 } 397 if (x->aead->alg_icv_len != 128) { 398 NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states with AEAD ICV length other than 128bit"); 399 return -EINVAL; 400 } 401 if ((x->aead->alg_key_len != 128 + 32) && 402 (x->aead->alg_key_len != 256 + 32)) { 403 NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states with AEAD key length other than 128/256 bit"); 404 return -EINVAL; 405 } 406 if (x->tfcpad) { 407 NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states with tfc padding"); 408 return -EINVAL; 409 } 410 if (!x->geniv) { 411 NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states without geniv"); 412 return -EINVAL; 413 } 414 if (strcmp(x->geniv, "seqiv")) { 415 NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states with geniv other than seqiv"); 416 return -EINVAL; 417 } 418 419 if (x->sel.proto != IPPROTO_IP && 420 (x->sel.proto != IPPROTO_UDP || x->xso.dir != XFRM_DEV_OFFLOAD_OUT)) { 421 NL_SET_ERR_MSG_MOD(extack, "Device does not support upper protocol other than UDP, and only Tx direction"); 422 return -EINVAL; 423 } 424 425 if (x->props.mode != XFRM_MODE_TRANSPORT && x->props.mode != XFRM_MODE_TUNNEL) { 426 NL_SET_ERR_MSG_MOD(extack, "Only transport and tunnel xfrm states may be offloaded"); 427 return -EINVAL; 428 } 429 430 switch (x->xso.type) { 431 case XFRM_DEV_OFFLOAD_CRYPTO: 432 if (!(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_CRYPTO)) { 433 NL_SET_ERR_MSG_MOD(extack, "Crypto offload is not supported"); 434 return -EINVAL; 435 } 436 437 break; 438 case XFRM_DEV_OFFLOAD_PACKET: 439 if (!(mlx5_ipsec_device_caps(mdev) & 440 MLX5_IPSEC_CAP_PACKET_OFFLOAD)) { 441 NL_SET_ERR_MSG_MOD(extack, "Packet offload is not supported"); 442 return -EINVAL; 443 } 444 445 if (x->props.mode == XFRM_MODE_TUNNEL && 446 !(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_TUNNEL)) { 447 NL_SET_ERR_MSG_MOD(extack, "Packet offload is not supported for tunnel mode"); 448 return -EINVAL; 449 } 450 451 if (x->replay_esn && x->replay_esn->replay_window != 32 && 452 x->replay_esn->replay_window != 64 && 453 x->replay_esn->replay_window != 128 && 454 x->replay_esn->replay_window != 256) { 455 NL_SET_ERR_MSG_MOD(extack, "Unsupported replay window size"); 456 return -EINVAL; 457 } 458 459 if (!x->props.reqid) { 460 NL_SET_ERR_MSG_MOD(extack, "Cannot offload without reqid"); 461 return -EINVAL; 462 } 463 464 if (x->lft.hard_byte_limit != XFRM_INF || 465 x->lft.soft_byte_limit != XFRM_INF) { 466 NL_SET_ERR_MSG_MOD(extack, "Device doesn't support limits in bytes"); 467 return -EINVAL; 468 } 469 470 if (x->lft.soft_packet_limit >= x->lft.hard_packet_limit && 471 x->lft.hard_packet_limit != XFRM_INF) { 472 /* XFRM stack doesn't prevent such configuration :(. */ 473 NL_SET_ERR_MSG_MOD(extack, "Hard packet limit must be greater than soft one"); 474 return -EINVAL; 475 } 476 477 if (!x->lft.soft_packet_limit || !x->lft.hard_packet_limit) { 478 NL_SET_ERR_MSG_MOD(extack, "Soft/hard packet limits can't be 0"); 479 return -EINVAL; 480 } 481 break; 482 default: 483 NL_SET_ERR_MSG_MOD(extack, "Unsupported xfrm offload type"); 484 return -EINVAL; 485 } 486 return 0; 487 } 488 489 static void mlx5e_ipsec_modify_state(struct work_struct *_work) 490 { 491 struct mlx5e_ipsec_work *work = 492 container_of(_work, struct mlx5e_ipsec_work, work); 493 struct mlx5e_ipsec_sa_entry *sa_entry = work->sa_entry; 494 struct mlx5_accel_esp_xfrm_attrs *attrs; 495 496 attrs = &((struct mlx5e_ipsec_sa_entry *)work->data)->attrs; 497 498 mlx5_accel_esp_modify_xfrm(sa_entry, attrs); 499 } 500 501 static void mlx5e_ipsec_set_esn_ops(struct mlx5e_ipsec_sa_entry *sa_entry) 502 { 503 struct xfrm_state *x = sa_entry->x; 504 505 if (x->xso.type != XFRM_DEV_OFFLOAD_CRYPTO || 506 x->xso.dir != XFRM_DEV_OFFLOAD_OUT) 507 return; 508 509 if (x->props.flags & XFRM_STATE_ESN) { 510 sa_entry->set_iv_op = mlx5e_ipsec_set_iv_esn; 511 return; 512 } 513 514 sa_entry->set_iv_op = mlx5e_ipsec_set_iv; 515 } 516 517 static void mlx5e_ipsec_handle_netdev_event(struct work_struct *_work) 518 { 519 struct mlx5e_ipsec_work *work = 520 container_of(_work, struct mlx5e_ipsec_work, work); 521 struct mlx5e_ipsec_sa_entry *sa_entry = work->sa_entry; 522 struct mlx5e_ipsec_netevent_data *data = work->data; 523 struct mlx5_accel_esp_xfrm_attrs *attrs; 524 525 attrs = &sa_entry->attrs; 526 527 switch (attrs->dir) { 528 case XFRM_DEV_OFFLOAD_IN: 529 ether_addr_copy(attrs->smac, data->addr); 530 break; 531 case XFRM_DEV_OFFLOAD_OUT: 532 ether_addr_copy(attrs->dmac, data->addr); 533 break; 534 default: 535 WARN_ON_ONCE(true); 536 } 537 attrs->drop = false; 538 mlx5e_accel_ipsec_fs_modify(sa_entry); 539 } 540 541 static int mlx5_ipsec_create_work(struct mlx5e_ipsec_sa_entry *sa_entry) 542 { 543 struct xfrm_state *x = sa_entry->x; 544 struct mlx5e_ipsec_work *work; 545 void *data = NULL; 546 547 switch (x->xso.type) { 548 case XFRM_DEV_OFFLOAD_CRYPTO: 549 if (!(x->props.flags & XFRM_STATE_ESN)) 550 return 0; 551 break; 552 case XFRM_DEV_OFFLOAD_PACKET: 553 if (x->props.mode != XFRM_MODE_TUNNEL) 554 return 0; 555 break; 556 default: 557 break; 558 } 559 560 work = kzalloc(sizeof(*work), GFP_KERNEL); 561 if (!work) 562 return -ENOMEM; 563 564 switch (x->xso.type) { 565 case XFRM_DEV_OFFLOAD_CRYPTO: 566 data = kzalloc(sizeof(*sa_entry), GFP_KERNEL); 567 if (!data) 568 goto free_work; 569 570 INIT_WORK(&work->work, mlx5e_ipsec_modify_state); 571 break; 572 case XFRM_DEV_OFFLOAD_PACKET: 573 data = kzalloc(sizeof(struct mlx5e_ipsec_netevent_data), 574 GFP_KERNEL); 575 if (!data) 576 goto free_work; 577 578 INIT_WORK(&work->work, mlx5e_ipsec_handle_netdev_event); 579 break; 580 default: 581 break; 582 } 583 584 work->data = data; 585 work->sa_entry = sa_entry; 586 sa_entry->work = work; 587 return 0; 588 589 free_work: 590 kfree(work); 591 return -ENOMEM; 592 } 593 594 static int mlx5e_ipsec_create_dwork(struct mlx5e_ipsec_sa_entry *sa_entry) 595 { 596 struct xfrm_state *x = sa_entry->x; 597 struct mlx5e_ipsec_dwork *dwork; 598 599 if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET) 600 return 0; 601 602 if (x->xso.dir != XFRM_DEV_OFFLOAD_OUT) 603 return 0; 604 605 if (x->lft.soft_packet_limit == XFRM_INF && 606 x->lft.hard_packet_limit == XFRM_INF) 607 return 0; 608 609 dwork = kzalloc(sizeof(*dwork), GFP_KERNEL); 610 if (!dwork) 611 return -ENOMEM; 612 613 dwork->sa_entry = sa_entry; 614 INIT_DELAYED_WORK(&dwork->dwork, mlx5e_ipsec_handle_tx_limit); 615 sa_entry->dwork = dwork; 616 return 0; 617 } 618 619 static int mlx5e_xfrm_add_state(struct xfrm_state *x, 620 struct netlink_ext_ack *extack) 621 { 622 struct mlx5e_ipsec_sa_entry *sa_entry = NULL; 623 struct net_device *netdev = x->xso.real_dev; 624 struct mlx5e_ipsec *ipsec; 625 struct mlx5e_priv *priv; 626 gfp_t gfp; 627 int err; 628 629 priv = netdev_priv(netdev); 630 if (!priv->ipsec) 631 return -EOPNOTSUPP; 632 633 ipsec = priv->ipsec; 634 gfp = (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ) ? GFP_ATOMIC : GFP_KERNEL; 635 sa_entry = kzalloc(sizeof(*sa_entry), gfp); 636 if (!sa_entry) 637 return -ENOMEM; 638 639 sa_entry->x = x; 640 sa_entry->ipsec = ipsec; 641 /* Check if this SA is originated from acquire flow temporary SA */ 642 if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ) 643 goto out; 644 645 err = mlx5e_xfrm_validate_state(priv->mdev, x, extack); 646 if (err) 647 goto err_xfrm; 648 649 /* check esn */ 650 if (x->props.flags & XFRM_STATE_ESN) 651 mlx5e_ipsec_update_esn_state(sa_entry); 652 653 mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &sa_entry->attrs); 654 655 err = mlx5_ipsec_create_work(sa_entry); 656 if (err) 657 goto err_xfrm; 658 659 err = mlx5e_ipsec_create_dwork(sa_entry); 660 if (err) 661 goto release_work; 662 663 /* create hw context */ 664 err = mlx5_ipsec_create_sa_ctx(sa_entry); 665 if (err) 666 goto release_dwork; 667 668 err = mlx5e_accel_ipsec_fs_add_rule(sa_entry); 669 if (err) 670 goto err_hw_ctx; 671 672 if (x->props.mode == XFRM_MODE_TUNNEL && 673 x->xso.type == XFRM_DEV_OFFLOAD_PACKET && 674 !mlx5e_ipsec_fs_tunnel_enabled(sa_entry)) { 675 NL_SET_ERR_MSG_MOD(extack, "Packet offload tunnel mode is disabled due to encap settings"); 676 err = -EINVAL; 677 goto err_add_rule; 678 } 679 680 /* We use *_bh() variant because xfrm_timer_handler(), which runs 681 * in softirq context, can reach our state delete logic and we need 682 * xa_erase_bh() there. 683 */ 684 err = xa_insert_bh(&ipsec->sadb, sa_entry->ipsec_obj_id, sa_entry, 685 GFP_KERNEL); 686 if (err) 687 goto err_add_rule; 688 689 mlx5e_ipsec_set_esn_ops(sa_entry); 690 691 if (sa_entry->dwork) 692 queue_delayed_work(ipsec->wq, &sa_entry->dwork->dwork, 693 MLX5_IPSEC_RESCHED); 694 695 if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET && 696 x->props.mode == XFRM_MODE_TUNNEL) 697 xa_set_mark(&ipsec->sadb, sa_entry->ipsec_obj_id, 698 MLX5E_IPSEC_TUNNEL_SA); 699 700 out: 701 x->xso.offload_handle = (unsigned long)sa_entry; 702 return 0; 703 704 err_add_rule: 705 mlx5e_accel_ipsec_fs_del_rule(sa_entry); 706 err_hw_ctx: 707 mlx5_ipsec_free_sa_ctx(sa_entry); 708 release_dwork: 709 kfree(sa_entry->dwork); 710 release_work: 711 if (sa_entry->work) 712 kfree(sa_entry->work->data); 713 kfree(sa_entry->work); 714 err_xfrm: 715 kfree(sa_entry); 716 NL_SET_ERR_MSG_WEAK_MOD(extack, "Device failed to offload this state"); 717 return err; 718 } 719 720 static void mlx5e_xfrm_del_state(struct xfrm_state *x) 721 { 722 struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x); 723 struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs; 724 struct mlx5e_ipsec *ipsec = sa_entry->ipsec; 725 struct mlx5e_ipsec_sa_entry *old; 726 727 if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ) 728 return; 729 730 old = xa_erase_bh(&ipsec->sadb, sa_entry->ipsec_obj_id); 731 WARN_ON(old != sa_entry); 732 733 if (attrs->mode == XFRM_MODE_TUNNEL && 734 attrs->type == XFRM_DEV_OFFLOAD_PACKET) 735 /* Make sure that no ARP requests are running in parallel */ 736 flush_workqueue(ipsec->wq); 737 738 } 739 740 static void mlx5e_xfrm_free_state(struct xfrm_state *x) 741 { 742 struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x); 743 744 if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ) 745 goto sa_entry_free; 746 747 if (sa_entry->work) 748 cancel_work_sync(&sa_entry->work->work); 749 750 if (sa_entry->dwork) 751 cancel_delayed_work_sync(&sa_entry->dwork->dwork); 752 753 mlx5e_accel_ipsec_fs_del_rule(sa_entry); 754 mlx5_ipsec_free_sa_ctx(sa_entry); 755 kfree(sa_entry->dwork); 756 if (sa_entry->work) 757 kfree(sa_entry->work->data); 758 kfree(sa_entry->work); 759 sa_entry_free: 760 kfree(sa_entry); 761 } 762 763 static int mlx5e_ipsec_netevent_event(struct notifier_block *nb, 764 unsigned long event, void *ptr) 765 { 766 struct mlx5_accel_esp_xfrm_attrs *attrs; 767 struct mlx5e_ipsec_netevent_data *data; 768 struct mlx5e_ipsec_sa_entry *sa_entry; 769 struct mlx5e_ipsec *ipsec; 770 struct neighbour *n = ptr; 771 struct net_device *netdev; 772 struct xfrm_state *x; 773 unsigned long idx; 774 775 if (event != NETEVENT_NEIGH_UPDATE || !(n->nud_state & NUD_VALID)) 776 return NOTIFY_DONE; 777 778 ipsec = container_of(nb, struct mlx5e_ipsec, netevent_nb); 779 xa_for_each_marked(&ipsec->sadb, idx, sa_entry, MLX5E_IPSEC_TUNNEL_SA) { 780 attrs = &sa_entry->attrs; 781 782 if (attrs->family == AF_INET) { 783 if (!neigh_key_eq32(n, &attrs->saddr.a4) && 784 !neigh_key_eq32(n, &attrs->daddr.a4)) 785 continue; 786 } else { 787 if (!neigh_key_eq128(n, &attrs->saddr.a4) && 788 !neigh_key_eq128(n, &attrs->daddr.a4)) 789 continue; 790 } 791 792 x = sa_entry->x; 793 netdev = x->xso.real_dev; 794 data = sa_entry->work->data; 795 796 neigh_ha_snapshot(data->addr, n, netdev); 797 queue_work(ipsec->wq, &sa_entry->work->work); 798 } 799 800 return NOTIFY_DONE; 801 } 802 803 void mlx5e_ipsec_init(struct mlx5e_priv *priv) 804 { 805 struct mlx5e_ipsec *ipsec; 806 int ret = -ENOMEM; 807 808 if (!mlx5_ipsec_device_caps(priv->mdev)) { 809 netdev_dbg(priv->netdev, "Not an IPSec offload device\n"); 810 return; 811 } 812 813 ipsec = kzalloc(sizeof(*ipsec), GFP_KERNEL); 814 if (!ipsec) 815 return; 816 817 xa_init_flags(&ipsec->sadb, XA_FLAGS_ALLOC); 818 ipsec->mdev = priv->mdev; 819 ipsec->wq = alloc_workqueue("mlx5e_ipsec: %s", WQ_UNBOUND, 0, 820 priv->netdev->name); 821 if (!ipsec->wq) 822 goto err_wq; 823 824 if (mlx5_ipsec_device_caps(priv->mdev) & 825 MLX5_IPSEC_CAP_PACKET_OFFLOAD) { 826 ret = mlx5e_ipsec_aso_init(ipsec); 827 if (ret) 828 goto err_aso; 829 } 830 831 if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_TUNNEL) { 832 ipsec->netevent_nb.notifier_call = mlx5e_ipsec_netevent_event; 833 ret = register_netevent_notifier(&ipsec->netevent_nb); 834 if (ret) 835 goto clear_aso; 836 } 837 838 ret = mlx5e_accel_ipsec_fs_init(ipsec); 839 if (ret) 840 goto err_fs_init; 841 842 ipsec->fs = priv->fs; 843 priv->ipsec = ipsec; 844 netdev_dbg(priv->netdev, "IPSec attached to netdevice\n"); 845 return; 846 847 err_fs_init: 848 if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_TUNNEL) 849 unregister_netevent_notifier(&ipsec->netevent_nb); 850 clear_aso: 851 if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD) 852 mlx5e_ipsec_aso_cleanup(ipsec); 853 err_aso: 854 destroy_workqueue(ipsec->wq); 855 err_wq: 856 kfree(ipsec); 857 mlx5_core_err(priv->mdev, "IPSec initialization failed, %d\n", ret); 858 return; 859 } 860 861 void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv) 862 { 863 struct mlx5e_ipsec *ipsec = priv->ipsec; 864 865 if (!ipsec) 866 return; 867 868 mlx5e_accel_ipsec_fs_cleanup(ipsec); 869 if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_TUNNEL) 870 unregister_netevent_notifier(&ipsec->netevent_nb); 871 if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD) 872 mlx5e_ipsec_aso_cleanup(ipsec); 873 destroy_workqueue(ipsec->wq); 874 kfree(ipsec); 875 priv->ipsec = NULL; 876 } 877 878 static bool mlx5e_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *x) 879 { 880 if (x->props.family == AF_INET) { 881 /* Offload with IPv4 options is not supported yet */ 882 if (ip_hdr(skb)->ihl > 5) 883 return false; 884 } else { 885 /* Offload with IPv6 extension headers is not support yet */ 886 if (ipv6_ext_hdr(ipv6_hdr(skb)->nexthdr)) 887 return false; 888 } 889 890 return true; 891 } 892 893 static void mlx5e_xfrm_advance_esn_state(struct xfrm_state *x) 894 { 895 struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x); 896 struct mlx5e_ipsec_work *work = sa_entry->work; 897 struct mlx5e_ipsec_sa_entry *sa_entry_shadow; 898 bool need_update; 899 900 need_update = mlx5e_ipsec_update_esn_state(sa_entry); 901 if (!need_update) 902 return; 903 904 sa_entry_shadow = work->data; 905 memset(sa_entry_shadow, 0x00, sizeof(*sa_entry_shadow)); 906 mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &sa_entry_shadow->attrs); 907 queue_work(sa_entry->ipsec->wq, &work->work); 908 } 909 910 static void mlx5e_xfrm_update_curlft(struct xfrm_state *x) 911 { 912 struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x); 913 struct mlx5e_ipsec_rule *ipsec_rule = &sa_entry->ipsec_rule; 914 u64 packets, bytes, lastuse; 915 916 lockdep_assert(lockdep_is_held(&x->lock) || 917 lockdep_is_held(&dev_net(x->xso.real_dev)->xfrm.xfrm_cfg_mutex)); 918 919 if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ) 920 return; 921 922 mlx5_fc_query_cached(ipsec_rule->fc, &bytes, &packets, &lastuse); 923 x->curlft.packets += packets; 924 x->curlft.bytes += bytes; 925 } 926 927 static int mlx5e_xfrm_validate_policy(struct mlx5_core_dev *mdev, 928 struct xfrm_policy *x, 929 struct netlink_ext_ack *extack) 930 { 931 struct xfrm_selector *sel = &x->selector; 932 933 if (x->type != XFRM_POLICY_TYPE_MAIN) { 934 NL_SET_ERR_MSG_MOD(extack, "Cannot offload non-main policy types"); 935 return -EINVAL; 936 } 937 938 /* Please pay attention that we support only one template */ 939 if (x->xfrm_nr > 1) { 940 NL_SET_ERR_MSG_MOD(extack, "Cannot offload more than one template"); 941 return -EINVAL; 942 } 943 944 if (x->xdo.dir != XFRM_DEV_OFFLOAD_IN && 945 x->xdo.dir != XFRM_DEV_OFFLOAD_OUT) { 946 NL_SET_ERR_MSG_MOD(extack, "Cannot offload forward policy"); 947 return -EINVAL; 948 } 949 950 if (!x->xfrm_vec[0].reqid && sel->proto == IPPROTO_IP && 951 addr6_all_zero(sel->saddr.a6) && addr6_all_zero(sel->daddr.a6)) { 952 NL_SET_ERR_MSG_MOD(extack, "Unsupported policy with reqid 0 without at least one of upper protocol or ip addr(s) different than 0"); 953 return -EINVAL; 954 } 955 956 if (x->xdo.type != XFRM_DEV_OFFLOAD_PACKET) { 957 NL_SET_ERR_MSG_MOD(extack, "Unsupported xfrm offload type"); 958 return -EINVAL; 959 } 960 961 if (sel->proto != IPPROTO_IP && 962 (sel->proto != IPPROTO_UDP || x->xdo.dir != XFRM_DEV_OFFLOAD_OUT)) { 963 NL_SET_ERR_MSG_MOD(extack, "Device does not support upper protocol other than UDP, and only Tx direction"); 964 return -EINVAL; 965 } 966 967 if (x->priority) { 968 if (!(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_PRIO)) { 969 NL_SET_ERR_MSG_MOD(extack, "Device does not support policy priority"); 970 return -EINVAL; 971 } 972 973 if (x->priority == U32_MAX) { 974 NL_SET_ERR_MSG_MOD(extack, "Device does not support requested policy priority"); 975 return -EINVAL; 976 } 977 } 978 979 return 0; 980 } 981 982 static void 983 mlx5e_ipsec_build_accel_pol_attrs(struct mlx5e_ipsec_pol_entry *pol_entry, 984 struct mlx5_accel_pol_xfrm_attrs *attrs) 985 { 986 struct xfrm_policy *x = pol_entry->x; 987 struct xfrm_selector *sel; 988 989 sel = &x->selector; 990 memset(attrs, 0, sizeof(*attrs)); 991 992 memcpy(&attrs->saddr, sel->saddr.a6, sizeof(attrs->saddr)); 993 memcpy(&attrs->daddr, sel->daddr.a6, sizeof(attrs->daddr)); 994 attrs->family = sel->family; 995 attrs->dir = x->xdo.dir; 996 attrs->action = x->action; 997 attrs->type = XFRM_DEV_OFFLOAD_PACKET; 998 attrs->reqid = x->xfrm_vec[0].reqid; 999 attrs->upspec.dport = ntohs(sel->dport); 1000 attrs->upspec.dport_mask = ntohs(sel->dport_mask); 1001 attrs->upspec.sport = ntohs(sel->sport); 1002 attrs->upspec.sport_mask = ntohs(sel->sport_mask); 1003 attrs->upspec.proto = sel->proto; 1004 attrs->prio = x->priority; 1005 } 1006 1007 static int mlx5e_xfrm_add_policy(struct xfrm_policy *x, 1008 struct netlink_ext_ack *extack) 1009 { 1010 struct net_device *netdev = x->xdo.real_dev; 1011 struct mlx5e_ipsec_pol_entry *pol_entry; 1012 struct mlx5e_priv *priv; 1013 int err; 1014 1015 priv = netdev_priv(netdev); 1016 if (!priv->ipsec) { 1017 NL_SET_ERR_MSG_MOD(extack, "Device doesn't support IPsec packet offload"); 1018 return -EOPNOTSUPP; 1019 } 1020 1021 err = mlx5e_xfrm_validate_policy(priv->mdev, x, extack); 1022 if (err) 1023 return err; 1024 1025 pol_entry = kzalloc(sizeof(*pol_entry), GFP_KERNEL); 1026 if (!pol_entry) 1027 return -ENOMEM; 1028 1029 pol_entry->x = x; 1030 pol_entry->ipsec = priv->ipsec; 1031 1032 mlx5e_ipsec_build_accel_pol_attrs(pol_entry, &pol_entry->attrs); 1033 err = mlx5e_accel_ipsec_fs_add_pol(pol_entry); 1034 if (err) 1035 goto err_fs; 1036 1037 x->xdo.offload_handle = (unsigned long)pol_entry; 1038 return 0; 1039 1040 err_fs: 1041 kfree(pol_entry); 1042 NL_SET_ERR_MSG_MOD(extack, "Device failed to offload this policy"); 1043 return err; 1044 } 1045 1046 static void mlx5e_xfrm_del_policy(struct xfrm_policy *x) 1047 { 1048 struct mlx5e_ipsec_pol_entry *pol_entry = to_ipsec_pol_entry(x); 1049 1050 mlx5e_accel_ipsec_fs_del_pol(pol_entry); 1051 } 1052 1053 static void mlx5e_xfrm_free_policy(struct xfrm_policy *x) 1054 { 1055 struct mlx5e_ipsec_pol_entry *pol_entry = to_ipsec_pol_entry(x); 1056 1057 kfree(pol_entry); 1058 } 1059 1060 static const struct xfrmdev_ops mlx5e_ipsec_xfrmdev_ops = { 1061 .xdo_dev_state_add = mlx5e_xfrm_add_state, 1062 .xdo_dev_state_delete = mlx5e_xfrm_del_state, 1063 .xdo_dev_state_free = mlx5e_xfrm_free_state, 1064 .xdo_dev_offload_ok = mlx5e_ipsec_offload_ok, 1065 .xdo_dev_state_advance_esn = mlx5e_xfrm_advance_esn_state, 1066 }; 1067 1068 static const struct xfrmdev_ops mlx5e_ipsec_packet_xfrmdev_ops = { 1069 .xdo_dev_state_add = mlx5e_xfrm_add_state, 1070 .xdo_dev_state_delete = mlx5e_xfrm_del_state, 1071 .xdo_dev_state_free = mlx5e_xfrm_free_state, 1072 .xdo_dev_offload_ok = mlx5e_ipsec_offload_ok, 1073 .xdo_dev_state_advance_esn = mlx5e_xfrm_advance_esn_state, 1074 1075 .xdo_dev_state_update_curlft = mlx5e_xfrm_update_curlft, 1076 .xdo_dev_policy_add = mlx5e_xfrm_add_policy, 1077 .xdo_dev_policy_delete = mlx5e_xfrm_del_policy, 1078 .xdo_dev_policy_free = mlx5e_xfrm_free_policy, 1079 }; 1080 1081 void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv) 1082 { 1083 struct mlx5_core_dev *mdev = priv->mdev; 1084 struct net_device *netdev = priv->netdev; 1085 1086 if (!mlx5_ipsec_device_caps(mdev)) 1087 return; 1088 1089 mlx5_core_info(mdev, "mlx5e: IPSec ESP acceleration enabled\n"); 1090 1091 if (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD) 1092 netdev->xfrmdev_ops = &mlx5e_ipsec_packet_xfrmdev_ops; 1093 else 1094 netdev->xfrmdev_ops = &mlx5e_ipsec_xfrmdev_ops; 1095 1096 netdev->features |= NETIF_F_HW_ESP; 1097 netdev->hw_enc_features |= NETIF_F_HW_ESP; 1098 1099 if (!MLX5_CAP_ETH(mdev, swp_csum)) { 1100 mlx5_core_dbg(mdev, "mlx5e: SWP checksum not supported\n"); 1101 return; 1102 } 1103 1104 netdev->features |= NETIF_F_HW_ESP_TX_CSUM; 1105 netdev->hw_enc_features |= NETIF_F_HW_ESP_TX_CSUM; 1106 1107 if (!MLX5_CAP_ETH(mdev, swp_lso)) { 1108 mlx5_core_dbg(mdev, "mlx5e: ESP LSO not supported\n"); 1109 return; 1110 } 1111 1112 netdev->gso_partial_features |= NETIF_F_GSO_ESP; 1113 mlx5_core_dbg(mdev, "mlx5e: ESP GSO capability turned on\n"); 1114 netdev->features |= NETIF_F_GSO_ESP; 1115 netdev->hw_features |= NETIF_F_GSO_ESP; 1116 netdev->hw_enc_features |= NETIF_F_GSO_ESP; 1117 } 1118