1 /* 2 * Copyright (c) 2017 Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 * 32 */ 33 34 #include <crypto/internal/geniv.h> 35 #include <crypto/aead.h> 36 #include <linux/inetdevice.h> 37 #include <linux/netdevice.h> 38 #include <net/netevent.h> 39 40 #include "en.h" 41 #include "ipsec.h" 42 #include "ipsec_rxtx.h" 43 44 #define MLX5_IPSEC_RESCHED msecs_to_jiffies(1000) 45 #define MLX5E_IPSEC_TUNNEL_SA XA_MARK_1 46 47 static struct mlx5e_ipsec_sa_entry *to_ipsec_sa_entry(struct xfrm_state *x) 48 { 49 return (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle; 50 } 51 52 static struct mlx5e_ipsec_pol_entry *to_ipsec_pol_entry(struct xfrm_policy *x) 53 { 54 return (struct mlx5e_ipsec_pol_entry *)x->xdo.offload_handle; 55 } 56 57 static void mlx5e_ipsec_handle_tx_limit(struct work_struct *_work) 58 { 59 struct mlx5e_ipsec_dwork *dwork = 60 container_of(_work, struct mlx5e_ipsec_dwork, dwork.work); 61 struct mlx5e_ipsec_sa_entry *sa_entry = dwork->sa_entry; 62 struct xfrm_state *x = sa_entry->x; 63 64 spin_lock(&x->lock); 65 xfrm_state_check_expire(x); 66 if (x->km.state == XFRM_STATE_EXPIRED) { 67 sa_entry->attrs.drop = true; 68 mlx5e_accel_ipsec_fs_modify(sa_entry); 69 } 70 spin_unlock(&x->lock); 71 72 if (sa_entry->attrs.drop) 73 return; 74 75 queue_delayed_work(sa_entry->ipsec->wq, &dwork->dwork, 76 MLX5_IPSEC_RESCHED); 77 } 78 79 static bool mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry) 80 { 81 struct xfrm_state *x = sa_entry->x; 82 u32 seq_bottom = 0; 83 u32 esn, esn_msb; 84 u8 overlap; 85 86 switch (x->xso.type) { 87 case XFRM_DEV_OFFLOAD_PACKET: 88 switch (x->xso.dir) { 89 case XFRM_DEV_OFFLOAD_IN: 90 esn = x->replay_esn->seq; 91 esn_msb = x->replay_esn->seq_hi; 92 break; 93 case XFRM_DEV_OFFLOAD_OUT: 94 esn = x->replay_esn->oseq; 95 esn_msb = x->replay_esn->oseq_hi; 96 break; 97 default: 98 WARN_ON(true); 99 return false; 100 } 101 break; 102 case XFRM_DEV_OFFLOAD_CRYPTO: 103 /* Already parsed by XFRM core */ 104 esn = x->replay_esn->seq; 105 break; 106 default: 107 WARN_ON(true); 108 return false; 109 } 110 111 overlap = sa_entry->esn_state.overlap; 112 113 if (esn >= x->replay_esn->replay_window) 114 seq_bottom = esn - x->replay_esn->replay_window + 1; 115 116 if (x->xso.type == XFRM_DEV_OFFLOAD_CRYPTO) 117 esn_msb = xfrm_replay_seqhi(x, htonl(seq_bottom)); 118 119 sa_entry->esn_state.esn = esn; 120 sa_entry->esn_state.esn_msb = esn_msb; 121 122 if (unlikely(overlap && seq_bottom < MLX5E_IPSEC_ESN_SCOPE_MID)) { 123 sa_entry->esn_state.overlap = 0; 124 return true; 125 } else if (unlikely(!overlap && 126 (seq_bottom >= MLX5E_IPSEC_ESN_SCOPE_MID))) { 127 sa_entry->esn_state.overlap = 1; 128 return true; 129 } 130 131 return false; 132 } 133 134 static void mlx5e_ipsec_init_limits(struct mlx5e_ipsec_sa_entry *sa_entry, 135 struct mlx5_accel_esp_xfrm_attrs *attrs) 136 { 137 struct xfrm_state *x = sa_entry->x; 138 s64 start_value, n; 139 140 attrs->lft.hard_packet_limit = x->lft.hard_packet_limit; 141 attrs->lft.soft_packet_limit = x->lft.soft_packet_limit; 142 if (x->lft.soft_packet_limit == XFRM_INF) 143 return; 144 145 /* Compute hard limit initial value and number of rounds. 146 * 147 * The counting pattern of hardware counter goes: 148 * value -> 2^31-1 149 * 2^31 | (2^31-1) -> 2^31-1 150 * 2^31 | (2^31-1) -> 2^31-1 151 * [..] 152 * 2^31 | (2^31-1) -> 0 153 * 154 * The pattern is created by using an ASO operation to atomically set 155 * bit 31 after the down counter clears bit 31. This is effectively an 156 * atomic addition of 2**31 to the counter. 157 * 158 * We wish to configure the counter, within the above pattern, so that 159 * when it reaches 0, it has hit the hard limit. This is defined by this 160 * system of equations: 161 * 162 * hard_limit == start_value + n * 2^31 163 * n >= 0 164 * start_value < 2^32, start_value >= 0 165 * 166 * These equations are not single-solution, there are often two choices: 167 * hard_limit == start_value + n * 2^31 168 * hard_limit == (start_value+2^31) + (n-1) * 2^31 169 * 170 * The algorithm selects the solution that keeps the counter value 171 * above 2^31 until the final iteration. 172 */ 173 174 /* Start by estimating n and compute start_value */ 175 n = attrs->lft.hard_packet_limit / BIT_ULL(31); 176 start_value = attrs->lft.hard_packet_limit - n * BIT_ULL(31); 177 178 /* Choose the best of the two solutions: */ 179 if (n >= 1) 180 n -= 1; 181 182 /* Computed values solve the system of equations: */ 183 start_value = attrs->lft.hard_packet_limit - n * BIT_ULL(31); 184 185 /* The best solution means: when there are multiple iterations we must 186 * start above 2^31 and count down to 2**31 to get the interrupt. 187 */ 188 attrs->lft.hard_packet_limit = lower_32_bits(start_value); 189 attrs->lft.numb_rounds_hard = (u64)n; 190 191 /* Compute soft limit initial value and number of rounds. 192 * 193 * The soft_limit is achieved by adjusting the counter's 194 * interrupt_value. This is embedded in the counting pattern created by 195 * hard packet calculations above. 196 * 197 * We wish to compute the interrupt_value for the soft_limit. This is 198 * defined by this system of equations: 199 * 200 * soft_limit == start_value - soft_value + n * 2^31 201 * n >= 0 202 * soft_value < 2^32, soft_value >= 0 203 * for n == 0 start_value > soft_value 204 * 205 * As with compute_hard_n_value() the equations are not single-solution. 206 * The algorithm selects the solution that has: 207 * 2^30 <= soft_limit < 2^31 + 2^30 208 * for the interior iterations, which guarantees a large guard band 209 * around the counter hard limit and next interrupt. 210 */ 211 212 /* Start by estimating n and compute soft_value */ 213 n = (x->lft.soft_packet_limit - attrs->lft.hard_packet_limit) / BIT_ULL(31); 214 start_value = attrs->lft.hard_packet_limit + n * BIT_ULL(31) - 215 x->lft.soft_packet_limit; 216 217 /* Compare against constraints and adjust n */ 218 if (n < 0) 219 n = 0; 220 else if (start_value >= BIT_ULL(32)) 221 n -= 1; 222 else if (start_value < 0) 223 n += 1; 224 225 /* Choose the best of the two solutions: */ 226 start_value = attrs->lft.hard_packet_limit + n * BIT_ULL(31) - start_value; 227 if (n != attrs->lft.numb_rounds_hard && start_value < BIT_ULL(30)) 228 n += 1; 229 230 /* Note that the upper limit of soft_value happens naturally because we 231 * always select the lowest soft_value. 232 */ 233 234 /* Computed values solve the system of equations: */ 235 start_value = attrs->lft.hard_packet_limit + n * BIT_ULL(31) - start_value; 236 237 /* The best solution means: when there are multiple iterations we must 238 * not fall below 2^30 as that would get too close to the false 239 * hard_limit and when we reach an interior iteration for soft_limit it 240 * has to be far away from 2**32-1 which is the counter reset point 241 * after the +2^31 to accommodate latency. 242 */ 243 attrs->lft.soft_packet_limit = lower_32_bits(start_value); 244 attrs->lft.numb_rounds_soft = (u64)n; 245 } 246 247 static void mlx5e_ipsec_init_macs(struct mlx5e_ipsec_sa_entry *sa_entry, 248 struct mlx5_accel_esp_xfrm_attrs *attrs) 249 { 250 struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry); 251 struct xfrm_state *x = sa_entry->x; 252 struct net_device *netdev; 253 struct neighbour *n; 254 u8 addr[ETH_ALEN]; 255 const void *pkey; 256 u8 *dst, *src; 257 258 if (attrs->mode != XFRM_MODE_TUNNEL || 259 attrs->type != XFRM_DEV_OFFLOAD_PACKET) 260 return; 261 262 netdev = x->xso.real_dev; 263 264 mlx5_query_mac_address(mdev, addr); 265 switch (attrs->dir) { 266 case XFRM_DEV_OFFLOAD_IN: 267 src = attrs->dmac; 268 dst = attrs->smac; 269 pkey = &attrs->saddr.a4; 270 break; 271 case XFRM_DEV_OFFLOAD_OUT: 272 src = attrs->smac; 273 dst = attrs->dmac; 274 pkey = &attrs->daddr.a4; 275 break; 276 default: 277 return; 278 } 279 280 ether_addr_copy(src, addr); 281 n = neigh_lookup(&arp_tbl, pkey, netdev); 282 if (!n) { 283 n = neigh_create(&arp_tbl, pkey, netdev); 284 if (IS_ERR(n)) 285 return; 286 neigh_event_send(n, NULL); 287 attrs->drop = true; 288 } else { 289 neigh_ha_snapshot(addr, n, netdev); 290 ether_addr_copy(dst, addr); 291 } 292 neigh_release(n); 293 } 294 295 void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry, 296 struct mlx5_accel_esp_xfrm_attrs *attrs) 297 { 298 struct xfrm_state *x = sa_entry->x; 299 struct aes_gcm_keymat *aes_gcm = &attrs->aes_gcm; 300 struct aead_geniv_ctx *geniv_ctx; 301 struct crypto_aead *aead; 302 unsigned int crypto_data_len, key_len; 303 int ivsize; 304 305 memset(attrs, 0, sizeof(*attrs)); 306 307 /* key */ 308 crypto_data_len = (x->aead->alg_key_len + 7) / 8; 309 key_len = crypto_data_len - 4; /* 4 bytes salt at end */ 310 311 memcpy(aes_gcm->aes_key, x->aead->alg_key, key_len); 312 aes_gcm->key_len = key_len * 8; 313 314 /* salt and seq_iv */ 315 aead = x->data; 316 geniv_ctx = crypto_aead_ctx(aead); 317 ivsize = crypto_aead_ivsize(aead); 318 memcpy(&aes_gcm->seq_iv, &geniv_ctx->salt, ivsize); 319 memcpy(&aes_gcm->salt, x->aead->alg_key + key_len, 320 sizeof(aes_gcm->salt)); 321 322 attrs->authsize = crypto_aead_authsize(aead) / 4; /* in dwords */ 323 324 /* iv len */ 325 aes_gcm->icv_len = x->aead->alg_icv_len; 326 327 /* esn */ 328 if (x->props.flags & XFRM_STATE_ESN) { 329 attrs->replay_esn.trigger = true; 330 attrs->replay_esn.esn = sa_entry->esn_state.esn; 331 attrs->replay_esn.esn_msb = sa_entry->esn_state.esn_msb; 332 attrs->replay_esn.overlap = sa_entry->esn_state.overlap; 333 } 334 335 attrs->dir = x->xso.dir; 336 /* spi */ 337 attrs->spi = be32_to_cpu(x->id.spi); 338 339 /* source , destination ips */ 340 memcpy(&attrs->saddr, x->props.saddr.a6, sizeof(attrs->saddr)); 341 memcpy(&attrs->daddr, x->id.daddr.a6, sizeof(attrs->daddr)); 342 attrs->family = x->props.family; 343 attrs->type = x->xso.type; 344 attrs->reqid = x->props.reqid; 345 attrs->upspec.dport = ntohs(x->sel.dport); 346 attrs->upspec.dport_mask = ntohs(x->sel.dport_mask); 347 attrs->upspec.sport = ntohs(x->sel.sport); 348 attrs->upspec.sport_mask = ntohs(x->sel.sport_mask); 349 attrs->upspec.proto = x->sel.proto; 350 attrs->mode = x->props.mode; 351 352 mlx5e_ipsec_init_limits(sa_entry, attrs); 353 mlx5e_ipsec_init_macs(sa_entry, attrs); 354 } 355 356 static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev, 357 struct xfrm_state *x, 358 struct netlink_ext_ack *extack) 359 { 360 if (x->props.aalgo != SADB_AALG_NONE) { 361 NL_SET_ERR_MSG_MOD(extack, "Cannot offload authenticated xfrm states"); 362 return -EINVAL; 363 } 364 if (x->props.ealgo != SADB_X_EALG_AES_GCM_ICV16) { 365 NL_SET_ERR_MSG_MOD(extack, "Only AES-GCM-ICV16 xfrm state may be offloaded"); 366 return -EINVAL; 367 } 368 if (x->props.calgo != SADB_X_CALG_NONE) { 369 NL_SET_ERR_MSG_MOD(extack, "Cannot offload compressed xfrm states"); 370 return -EINVAL; 371 } 372 if (x->props.flags & XFRM_STATE_ESN && 373 !(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_ESN)) { 374 NL_SET_ERR_MSG_MOD(extack, "Cannot offload ESN xfrm states"); 375 return -EINVAL; 376 } 377 if (x->props.family != AF_INET && 378 x->props.family != AF_INET6) { 379 NL_SET_ERR_MSG_MOD(extack, "Only IPv4/6 xfrm states may be offloaded"); 380 return -EINVAL; 381 } 382 if (x->id.proto != IPPROTO_ESP) { 383 NL_SET_ERR_MSG_MOD(extack, "Only ESP xfrm state may be offloaded"); 384 return -EINVAL; 385 } 386 if (x->encap) { 387 NL_SET_ERR_MSG_MOD(extack, "Encapsulated xfrm state may not be offloaded"); 388 return -EINVAL; 389 } 390 if (!x->aead) { 391 NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states without aead"); 392 return -EINVAL; 393 } 394 if (x->aead->alg_icv_len != 128) { 395 NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states with AEAD ICV length other than 128bit"); 396 return -EINVAL; 397 } 398 if ((x->aead->alg_key_len != 128 + 32) && 399 (x->aead->alg_key_len != 256 + 32)) { 400 NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states with AEAD key length other than 128/256 bit"); 401 return -EINVAL; 402 } 403 if (x->tfcpad) { 404 NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states with tfc padding"); 405 return -EINVAL; 406 } 407 if (!x->geniv) { 408 NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states without geniv"); 409 return -EINVAL; 410 } 411 if (strcmp(x->geniv, "seqiv")) { 412 NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states with geniv other than seqiv"); 413 return -EINVAL; 414 } 415 416 if (x->sel.proto != IPPROTO_IP && 417 (x->sel.proto != IPPROTO_UDP || x->xso.dir != XFRM_DEV_OFFLOAD_OUT)) { 418 NL_SET_ERR_MSG_MOD(extack, "Device does not support upper protocol other than UDP, and only Tx direction"); 419 return -EINVAL; 420 } 421 422 if (x->props.mode != XFRM_MODE_TRANSPORT && x->props.mode != XFRM_MODE_TUNNEL) { 423 NL_SET_ERR_MSG_MOD(extack, "Only transport and tunnel xfrm states may be offloaded"); 424 return -EINVAL; 425 } 426 427 switch (x->xso.type) { 428 case XFRM_DEV_OFFLOAD_CRYPTO: 429 if (!(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_CRYPTO)) { 430 NL_SET_ERR_MSG_MOD(extack, "Crypto offload is not supported"); 431 return -EINVAL; 432 } 433 434 break; 435 case XFRM_DEV_OFFLOAD_PACKET: 436 if (!(mlx5_ipsec_device_caps(mdev) & 437 MLX5_IPSEC_CAP_PACKET_OFFLOAD)) { 438 NL_SET_ERR_MSG_MOD(extack, "Packet offload is not supported"); 439 return -EINVAL; 440 } 441 442 if (x->props.mode == XFRM_MODE_TUNNEL && 443 !(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_TUNNEL)) { 444 NL_SET_ERR_MSG_MOD(extack, "Packet offload is not supported for tunnel mode"); 445 return -EINVAL; 446 } 447 448 if (x->replay_esn && x->replay_esn->replay_window != 32 && 449 x->replay_esn->replay_window != 64 && 450 x->replay_esn->replay_window != 128 && 451 x->replay_esn->replay_window != 256) { 452 NL_SET_ERR_MSG_MOD(extack, "Unsupported replay window size"); 453 return -EINVAL; 454 } 455 456 if (!x->props.reqid) { 457 NL_SET_ERR_MSG_MOD(extack, "Cannot offload without reqid"); 458 return -EINVAL; 459 } 460 461 if (x->lft.hard_byte_limit != XFRM_INF || 462 x->lft.soft_byte_limit != XFRM_INF) { 463 NL_SET_ERR_MSG_MOD(extack, "Device doesn't support limits in bytes"); 464 return -EINVAL; 465 } 466 467 if (x->lft.soft_packet_limit >= x->lft.hard_packet_limit && 468 x->lft.hard_packet_limit != XFRM_INF) { 469 /* XFRM stack doesn't prevent such configuration :(. */ 470 NL_SET_ERR_MSG_MOD(extack, "Hard packet limit must be greater than soft one"); 471 return -EINVAL; 472 } 473 474 if (!x->lft.soft_packet_limit || !x->lft.hard_packet_limit) { 475 NL_SET_ERR_MSG_MOD(extack, "Soft/hard packet limits can't be 0"); 476 return -EINVAL; 477 } 478 break; 479 default: 480 NL_SET_ERR_MSG_MOD(extack, "Unsupported xfrm offload type"); 481 return -EINVAL; 482 } 483 return 0; 484 } 485 486 static void mlx5e_ipsec_modify_state(struct work_struct *_work) 487 { 488 struct mlx5e_ipsec_work *work = 489 container_of(_work, struct mlx5e_ipsec_work, work); 490 struct mlx5e_ipsec_sa_entry *sa_entry = work->sa_entry; 491 struct mlx5_accel_esp_xfrm_attrs *attrs; 492 493 attrs = &((struct mlx5e_ipsec_sa_entry *)work->data)->attrs; 494 495 mlx5_accel_esp_modify_xfrm(sa_entry, attrs); 496 } 497 498 static void mlx5e_ipsec_set_esn_ops(struct mlx5e_ipsec_sa_entry *sa_entry) 499 { 500 struct xfrm_state *x = sa_entry->x; 501 502 if (x->xso.type != XFRM_DEV_OFFLOAD_CRYPTO || 503 x->xso.dir != XFRM_DEV_OFFLOAD_OUT) 504 return; 505 506 if (x->props.flags & XFRM_STATE_ESN) { 507 sa_entry->set_iv_op = mlx5e_ipsec_set_iv_esn; 508 return; 509 } 510 511 sa_entry->set_iv_op = mlx5e_ipsec_set_iv; 512 } 513 514 static void mlx5e_ipsec_handle_netdev_event(struct work_struct *_work) 515 { 516 struct mlx5e_ipsec_work *work = 517 container_of(_work, struct mlx5e_ipsec_work, work); 518 struct mlx5e_ipsec_sa_entry *sa_entry = work->sa_entry; 519 struct mlx5e_ipsec_netevent_data *data = work->data; 520 struct mlx5_accel_esp_xfrm_attrs *attrs; 521 522 attrs = &sa_entry->attrs; 523 524 switch (attrs->dir) { 525 case XFRM_DEV_OFFLOAD_IN: 526 ether_addr_copy(attrs->smac, data->addr); 527 break; 528 case XFRM_DEV_OFFLOAD_OUT: 529 ether_addr_copy(attrs->dmac, data->addr); 530 break; 531 default: 532 WARN_ON_ONCE(true); 533 } 534 attrs->drop = false; 535 mlx5e_accel_ipsec_fs_modify(sa_entry); 536 } 537 538 static int mlx5_ipsec_create_work(struct mlx5e_ipsec_sa_entry *sa_entry) 539 { 540 struct xfrm_state *x = sa_entry->x; 541 struct mlx5e_ipsec_work *work; 542 void *data = NULL; 543 544 switch (x->xso.type) { 545 case XFRM_DEV_OFFLOAD_CRYPTO: 546 if (!(x->props.flags & XFRM_STATE_ESN)) 547 return 0; 548 break; 549 case XFRM_DEV_OFFLOAD_PACKET: 550 if (x->props.mode != XFRM_MODE_TUNNEL) 551 return 0; 552 break; 553 default: 554 break; 555 } 556 557 work = kzalloc(sizeof(*work), GFP_KERNEL); 558 if (!work) 559 return -ENOMEM; 560 561 switch (x->xso.type) { 562 case XFRM_DEV_OFFLOAD_CRYPTO: 563 data = kzalloc(sizeof(*sa_entry), GFP_KERNEL); 564 if (!data) 565 goto free_work; 566 567 INIT_WORK(&work->work, mlx5e_ipsec_modify_state); 568 break; 569 case XFRM_DEV_OFFLOAD_PACKET: 570 data = kzalloc(sizeof(struct mlx5e_ipsec_netevent_data), 571 GFP_KERNEL); 572 if (!data) 573 goto free_work; 574 575 INIT_WORK(&work->work, mlx5e_ipsec_handle_netdev_event); 576 break; 577 default: 578 break; 579 } 580 581 work->data = data; 582 work->sa_entry = sa_entry; 583 sa_entry->work = work; 584 return 0; 585 586 free_work: 587 kfree(work); 588 return -ENOMEM; 589 } 590 591 static int mlx5e_ipsec_create_dwork(struct mlx5e_ipsec_sa_entry *sa_entry) 592 { 593 struct xfrm_state *x = sa_entry->x; 594 struct mlx5e_ipsec_dwork *dwork; 595 596 if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET) 597 return 0; 598 599 if (x->xso.dir != XFRM_DEV_OFFLOAD_OUT) 600 return 0; 601 602 if (x->lft.soft_packet_limit == XFRM_INF && 603 x->lft.hard_packet_limit == XFRM_INF) 604 return 0; 605 606 dwork = kzalloc(sizeof(*dwork), GFP_KERNEL); 607 if (!dwork) 608 return -ENOMEM; 609 610 dwork->sa_entry = sa_entry; 611 INIT_DELAYED_WORK(&dwork->dwork, mlx5e_ipsec_handle_tx_limit); 612 sa_entry->dwork = dwork; 613 return 0; 614 } 615 616 static int mlx5e_xfrm_add_state(struct xfrm_state *x, 617 struct netlink_ext_ack *extack) 618 { 619 struct mlx5e_ipsec_sa_entry *sa_entry = NULL; 620 struct net_device *netdev = x->xso.real_dev; 621 struct mlx5e_ipsec *ipsec; 622 struct mlx5e_priv *priv; 623 gfp_t gfp; 624 int err; 625 626 priv = netdev_priv(netdev); 627 if (!priv->ipsec) 628 return -EOPNOTSUPP; 629 630 ipsec = priv->ipsec; 631 gfp = (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ) ? GFP_ATOMIC : GFP_KERNEL; 632 sa_entry = kzalloc(sizeof(*sa_entry), gfp); 633 if (!sa_entry) 634 return -ENOMEM; 635 636 sa_entry->x = x; 637 sa_entry->ipsec = ipsec; 638 /* Check if this SA is originated from acquire flow temporary SA */ 639 if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ) 640 goto out; 641 642 err = mlx5e_xfrm_validate_state(priv->mdev, x, extack); 643 if (err) 644 goto err_xfrm; 645 646 /* check esn */ 647 if (x->props.flags & XFRM_STATE_ESN) 648 mlx5e_ipsec_update_esn_state(sa_entry); 649 650 mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &sa_entry->attrs); 651 652 err = mlx5_ipsec_create_work(sa_entry); 653 if (err) 654 goto err_xfrm; 655 656 err = mlx5e_ipsec_create_dwork(sa_entry); 657 if (err) 658 goto release_work; 659 660 /* create hw context */ 661 err = mlx5_ipsec_create_sa_ctx(sa_entry); 662 if (err) 663 goto release_dwork; 664 665 err = mlx5e_accel_ipsec_fs_add_rule(sa_entry); 666 if (err) 667 goto err_hw_ctx; 668 669 if (x->props.mode == XFRM_MODE_TUNNEL && 670 x->xso.type == XFRM_DEV_OFFLOAD_PACKET && 671 !mlx5e_ipsec_fs_tunnel_enabled(sa_entry)) { 672 NL_SET_ERR_MSG_MOD(extack, "Packet offload tunnel mode is disabled due to encap settings"); 673 err = -EINVAL; 674 goto err_add_rule; 675 } 676 677 /* We use *_bh() variant because xfrm_timer_handler(), which runs 678 * in softirq context, can reach our state delete logic and we need 679 * xa_erase_bh() there. 680 */ 681 err = xa_insert_bh(&ipsec->sadb, sa_entry->ipsec_obj_id, sa_entry, 682 GFP_KERNEL); 683 if (err) 684 goto err_add_rule; 685 686 mlx5e_ipsec_set_esn_ops(sa_entry); 687 688 if (sa_entry->dwork) 689 queue_delayed_work(ipsec->wq, &sa_entry->dwork->dwork, 690 MLX5_IPSEC_RESCHED); 691 692 if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET && 693 x->props.mode == XFRM_MODE_TUNNEL) 694 xa_set_mark(&ipsec->sadb, sa_entry->ipsec_obj_id, 695 MLX5E_IPSEC_TUNNEL_SA); 696 697 out: 698 x->xso.offload_handle = (unsigned long)sa_entry; 699 return 0; 700 701 err_add_rule: 702 mlx5e_accel_ipsec_fs_del_rule(sa_entry); 703 err_hw_ctx: 704 mlx5_ipsec_free_sa_ctx(sa_entry); 705 release_dwork: 706 kfree(sa_entry->dwork); 707 release_work: 708 if (sa_entry->work) 709 kfree(sa_entry->work->data); 710 kfree(sa_entry->work); 711 err_xfrm: 712 kfree(sa_entry); 713 NL_SET_ERR_MSG_WEAK_MOD(extack, "Device failed to offload this state"); 714 return err; 715 } 716 717 static void mlx5e_xfrm_del_state(struct xfrm_state *x) 718 { 719 struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x); 720 struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs; 721 struct mlx5e_ipsec *ipsec = sa_entry->ipsec; 722 struct mlx5e_ipsec_sa_entry *old; 723 724 if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ) 725 return; 726 727 old = xa_erase_bh(&ipsec->sadb, sa_entry->ipsec_obj_id); 728 WARN_ON(old != sa_entry); 729 730 if (attrs->mode == XFRM_MODE_TUNNEL && 731 attrs->type == XFRM_DEV_OFFLOAD_PACKET) 732 /* Make sure that no ARP requests are running in parallel */ 733 flush_workqueue(ipsec->wq); 734 735 } 736 737 static void mlx5e_xfrm_free_state(struct xfrm_state *x) 738 { 739 struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x); 740 741 if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ) 742 goto sa_entry_free; 743 744 if (sa_entry->work) 745 cancel_work_sync(&sa_entry->work->work); 746 747 if (sa_entry->dwork) 748 cancel_delayed_work_sync(&sa_entry->dwork->dwork); 749 750 mlx5e_accel_ipsec_fs_del_rule(sa_entry); 751 mlx5_ipsec_free_sa_ctx(sa_entry); 752 kfree(sa_entry->dwork); 753 if (sa_entry->work) 754 kfree(sa_entry->work->data); 755 kfree(sa_entry->work); 756 sa_entry_free: 757 kfree(sa_entry); 758 } 759 760 static int mlx5e_ipsec_netevent_event(struct notifier_block *nb, 761 unsigned long event, void *ptr) 762 { 763 struct mlx5_accel_esp_xfrm_attrs *attrs; 764 struct mlx5e_ipsec_netevent_data *data; 765 struct mlx5e_ipsec_sa_entry *sa_entry; 766 struct mlx5e_ipsec *ipsec; 767 struct neighbour *n = ptr; 768 struct net_device *netdev; 769 struct xfrm_state *x; 770 unsigned long idx; 771 772 if (event != NETEVENT_NEIGH_UPDATE || !(n->nud_state & NUD_VALID)) 773 return NOTIFY_DONE; 774 775 ipsec = container_of(nb, struct mlx5e_ipsec, netevent_nb); 776 xa_for_each_marked(&ipsec->sadb, idx, sa_entry, MLX5E_IPSEC_TUNNEL_SA) { 777 attrs = &sa_entry->attrs; 778 779 if (attrs->family == AF_INET) { 780 if (!neigh_key_eq32(n, &attrs->saddr.a4) && 781 !neigh_key_eq32(n, &attrs->daddr.a4)) 782 continue; 783 } else { 784 if (!neigh_key_eq128(n, &attrs->saddr.a4) && 785 !neigh_key_eq128(n, &attrs->daddr.a4)) 786 continue; 787 } 788 789 x = sa_entry->x; 790 netdev = x->xso.real_dev; 791 data = sa_entry->work->data; 792 793 neigh_ha_snapshot(data->addr, n, netdev); 794 queue_work(ipsec->wq, &sa_entry->work->work); 795 } 796 797 return NOTIFY_DONE; 798 } 799 800 void mlx5e_ipsec_init(struct mlx5e_priv *priv) 801 { 802 struct mlx5e_ipsec *ipsec; 803 int ret = -ENOMEM; 804 805 if (!mlx5_ipsec_device_caps(priv->mdev)) { 806 netdev_dbg(priv->netdev, "Not an IPSec offload device\n"); 807 return; 808 } 809 810 ipsec = kzalloc(sizeof(*ipsec), GFP_KERNEL); 811 if (!ipsec) 812 return; 813 814 xa_init_flags(&ipsec->sadb, XA_FLAGS_ALLOC); 815 ipsec->mdev = priv->mdev; 816 ipsec->wq = alloc_workqueue("mlx5e_ipsec: %s", WQ_UNBOUND, 0, 817 priv->netdev->name); 818 if (!ipsec->wq) 819 goto err_wq; 820 821 if (mlx5_ipsec_device_caps(priv->mdev) & 822 MLX5_IPSEC_CAP_PACKET_OFFLOAD) { 823 ret = mlx5e_ipsec_aso_init(ipsec); 824 if (ret) 825 goto err_aso; 826 } 827 828 if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_TUNNEL) { 829 ipsec->netevent_nb.notifier_call = mlx5e_ipsec_netevent_event; 830 ret = register_netevent_notifier(&ipsec->netevent_nb); 831 if (ret) 832 goto clear_aso; 833 } 834 835 ret = mlx5e_accel_ipsec_fs_init(ipsec); 836 if (ret) 837 goto err_fs_init; 838 839 ipsec->fs = priv->fs; 840 priv->ipsec = ipsec; 841 netdev_dbg(priv->netdev, "IPSec attached to netdevice\n"); 842 return; 843 844 err_fs_init: 845 if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_TUNNEL) 846 unregister_netevent_notifier(&ipsec->netevent_nb); 847 clear_aso: 848 if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD) 849 mlx5e_ipsec_aso_cleanup(ipsec); 850 err_aso: 851 destroy_workqueue(ipsec->wq); 852 err_wq: 853 kfree(ipsec); 854 mlx5_core_err(priv->mdev, "IPSec initialization failed, %d\n", ret); 855 return; 856 } 857 858 void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv) 859 { 860 struct mlx5e_ipsec *ipsec = priv->ipsec; 861 862 if (!ipsec) 863 return; 864 865 mlx5e_accel_ipsec_fs_cleanup(ipsec); 866 if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_TUNNEL) 867 unregister_netevent_notifier(&ipsec->netevent_nb); 868 if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD) 869 mlx5e_ipsec_aso_cleanup(ipsec); 870 destroy_workqueue(ipsec->wq); 871 kfree(ipsec); 872 priv->ipsec = NULL; 873 } 874 875 static bool mlx5e_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *x) 876 { 877 if (x->props.family == AF_INET) { 878 /* Offload with IPv4 options is not supported yet */ 879 if (ip_hdr(skb)->ihl > 5) 880 return false; 881 } else { 882 /* Offload with IPv6 extension headers is not support yet */ 883 if (ipv6_ext_hdr(ipv6_hdr(skb)->nexthdr)) 884 return false; 885 } 886 887 return true; 888 } 889 890 static void mlx5e_xfrm_advance_esn_state(struct xfrm_state *x) 891 { 892 struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x); 893 struct mlx5e_ipsec_work *work = sa_entry->work; 894 struct mlx5e_ipsec_sa_entry *sa_entry_shadow; 895 bool need_update; 896 897 need_update = mlx5e_ipsec_update_esn_state(sa_entry); 898 if (!need_update) 899 return; 900 901 sa_entry_shadow = work->data; 902 memset(sa_entry_shadow, 0x00, sizeof(*sa_entry_shadow)); 903 mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &sa_entry_shadow->attrs); 904 queue_work(sa_entry->ipsec->wq, &work->work); 905 } 906 907 static void mlx5e_xfrm_update_curlft(struct xfrm_state *x) 908 { 909 struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x); 910 struct mlx5e_ipsec_rule *ipsec_rule = &sa_entry->ipsec_rule; 911 u64 packets, bytes, lastuse; 912 913 lockdep_assert(lockdep_is_held(&x->lock) || 914 lockdep_is_held(&dev_net(x->xso.real_dev)->xfrm.xfrm_cfg_mutex)); 915 916 if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ) 917 return; 918 919 mlx5_fc_query_cached(ipsec_rule->fc, &bytes, &packets, &lastuse); 920 x->curlft.packets += packets; 921 x->curlft.bytes += bytes; 922 } 923 924 static int mlx5e_xfrm_validate_policy(struct mlx5_core_dev *mdev, 925 struct xfrm_policy *x, 926 struct netlink_ext_ack *extack) 927 { 928 struct xfrm_selector *sel = &x->selector; 929 930 if (x->type != XFRM_POLICY_TYPE_MAIN) { 931 NL_SET_ERR_MSG_MOD(extack, "Cannot offload non-main policy types"); 932 return -EINVAL; 933 } 934 935 /* Please pay attention that we support only one template */ 936 if (x->xfrm_nr > 1) { 937 NL_SET_ERR_MSG_MOD(extack, "Cannot offload more than one template"); 938 return -EINVAL; 939 } 940 941 if (x->xdo.dir != XFRM_DEV_OFFLOAD_IN && 942 x->xdo.dir != XFRM_DEV_OFFLOAD_OUT) { 943 NL_SET_ERR_MSG_MOD(extack, "Cannot offload forward policy"); 944 return -EINVAL; 945 } 946 947 if (!x->xfrm_vec[0].reqid && sel->proto == IPPROTO_IP && 948 addr6_all_zero(sel->saddr.a6) && addr6_all_zero(sel->daddr.a6)) { 949 NL_SET_ERR_MSG_MOD(extack, "Unsupported policy with reqid 0 without at least one of upper protocol or ip addr(s) different than 0"); 950 return -EINVAL; 951 } 952 953 if (x->xdo.type != XFRM_DEV_OFFLOAD_PACKET) { 954 NL_SET_ERR_MSG_MOD(extack, "Unsupported xfrm offload type"); 955 return -EINVAL; 956 } 957 958 if (sel->proto != IPPROTO_IP && 959 (sel->proto != IPPROTO_UDP || x->xdo.dir != XFRM_DEV_OFFLOAD_OUT)) { 960 NL_SET_ERR_MSG_MOD(extack, "Device does not support upper protocol other than UDP, and only Tx direction"); 961 return -EINVAL; 962 } 963 964 if (x->priority) { 965 if (!(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_PRIO)) { 966 NL_SET_ERR_MSG_MOD(extack, "Device does not support policy priority"); 967 return -EINVAL; 968 } 969 970 if (x->priority == U32_MAX) { 971 NL_SET_ERR_MSG_MOD(extack, "Device does not support requested policy priority"); 972 return -EINVAL; 973 } 974 } 975 976 return 0; 977 } 978 979 static void 980 mlx5e_ipsec_build_accel_pol_attrs(struct mlx5e_ipsec_pol_entry *pol_entry, 981 struct mlx5_accel_pol_xfrm_attrs *attrs) 982 { 983 struct xfrm_policy *x = pol_entry->x; 984 struct xfrm_selector *sel; 985 986 sel = &x->selector; 987 memset(attrs, 0, sizeof(*attrs)); 988 989 memcpy(&attrs->saddr, sel->saddr.a6, sizeof(attrs->saddr)); 990 memcpy(&attrs->daddr, sel->daddr.a6, sizeof(attrs->daddr)); 991 attrs->family = sel->family; 992 attrs->dir = x->xdo.dir; 993 attrs->action = x->action; 994 attrs->type = XFRM_DEV_OFFLOAD_PACKET; 995 attrs->reqid = x->xfrm_vec[0].reqid; 996 attrs->upspec.dport = ntohs(sel->dport); 997 attrs->upspec.dport_mask = ntohs(sel->dport_mask); 998 attrs->upspec.sport = ntohs(sel->sport); 999 attrs->upspec.sport_mask = ntohs(sel->sport_mask); 1000 attrs->upspec.proto = sel->proto; 1001 attrs->prio = x->priority; 1002 } 1003 1004 static int mlx5e_xfrm_add_policy(struct xfrm_policy *x, 1005 struct netlink_ext_ack *extack) 1006 { 1007 struct net_device *netdev = x->xdo.real_dev; 1008 struct mlx5e_ipsec_pol_entry *pol_entry; 1009 struct mlx5e_priv *priv; 1010 int err; 1011 1012 priv = netdev_priv(netdev); 1013 if (!priv->ipsec) { 1014 NL_SET_ERR_MSG_MOD(extack, "Device doesn't support IPsec packet offload"); 1015 return -EOPNOTSUPP; 1016 } 1017 1018 err = mlx5e_xfrm_validate_policy(priv->mdev, x, extack); 1019 if (err) 1020 return err; 1021 1022 pol_entry = kzalloc(sizeof(*pol_entry), GFP_KERNEL); 1023 if (!pol_entry) 1024 return -ENOMEM; 1025 1026 pol_entry->x = x; 1027 pol_entry->ipsec = priv->ipsec; 1028 1029 mlx5e_ipsec_build_accel_pol_attrs(pol_entry, &pol_entry->attrs); 1030 err = mlx5e_accel_ipsec_fs_add_pol(pol_entry); 1031 if (err) 1032 goto err_fs; 1033 1034 x->xdo.offload_handle = (unsigned long)pol_entry; 1035 return 0; 1036 1037 err_fs: 1038 kfree(pol_entry); 1039 NL_SET_ERR_MSG_MOD(extack, "Device failed to offload this policy"); 1040 return err; 1041 } 1042 1043 static void mlx5e_xfrm_free_policy(struct xfrm_policy *x) 1044 { 1045 struct mlx5e_ipsec_pol_entry *pol_entry = to_ipsec_pol_entry(x); 1046 1047 mlx5e_accel_ipsec_fs_del_pol(pol_entry); 1048 kfree(pol_entry); 1049 } 1050 1051 static const struct xfrmdev_ops mlx5e_ipsec_xfrmdev_ops = { 1052 .xdo_dev_state_add = mlx5e_xfrm_add_state, 1053 .xdo_dev_state_delete = mlx5e_xfrm_del_state, 1054 .xdo_dev_state_free = mlx5e_xfrm_free_state, 1055 .xdo_dev_offload_ok = mlx5e_ipsec_offload_ok, 1056 .xdo_dev_state_advance_esn = mlx5e_xfrm_advance_esn_state, 1057 }; 1058 1059 static const struct xfrmdev_ops mlx5e_ipsec_packet_xfrmdev_ops = { 1060 .xdo_dev_state_add = mlx5e_xfrm_add_state, 1061 .xdo_dev_state_delete = mlx5e_xfrm_del_state, 1062 .xdo_dev_state_free = mlx5e_xfrm_free_state, 1063 .xdo_dev_offload_ok = mlx5e_ipsec_offload_ok, 1064 .xdo_dev_state_advance_esn = mlx5e_xfrm_advance_esn_state, 1065 1066 .xdo_dev_state_update_curlft = mlx5e_xfrm_update_curlft, 1067 .xdo_dev_policy_add = mlx5e_xfrm_add_policy, 1068 .xdo_dev_policy_free = mlx5e_xfrm_free_policy, 1069 }; 1070 1071 void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv) 1072 { 1073 struct mlx5_core_dev *mdev = priv->mdev; 1074 struct net_device *netdev = priv->netdev; 1075 1076 if (!mlx5_ipsec_device_caps(mdev)) 1077 return; 1078 1079 mlx5_core_info(mdev, "mlx5e: IPSec ESP acceleration enabled\n"); 1080 1081 if (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD) 1082 netdev->xfrmdev_ops = &mlx5e_ipsec_packet_xfrmdev_ops; 1083 else 1084 netdev->xfrmdev_ops = &mlx5e_ipsec_xfrmdev_ops; 1085 1086 netdev->features |= NETIF_F_HW_ESP; 1087 netdev->hw_enc_features |= NETIF_F_HW_ESP; 1088 1089 if (!MLX5_CAP_ETH(mdev, swp_csum)) { 1090 mlx5_core_dbg(mdev, "mlx5e: SWP checksum not supported\n"); 1091 return; 1092 } 1093 1094 netdev->features |= NETIF_F_HW_ESP_TX_CSUM; 1095 netdev->hw_enc_features |= NETIF_F_HW_ESP_TX_CSUM; 1096 1097 if (!MLX5_CAP_ETH(mdev, swp_lso)) { 1098 mlx5_core_dbg(mdev, "mlx5e: ESP LSO not supported\n"); 1099 return; 1100 } 1101 1102 netdev->gso_partial_features |= NETIF_F_GSO_ESP; 1103 mlx5_core_dbg(mdev, "mlx5e: ESP GSO capability turned on\n"); 1104 netdev->features |= NETIF_F_GSO_ESP; 1105 netdev->hw_features |= NETIF_F_GSO_ESP; 1106 netdev->hw_enc_features |= NETIF_F_GSO_ESP; 1107 } 1108