1 /********************************************************************** 2 * Author: Cavium Networks 3 * 4 * Contact: support@caviumnetworks.com 5 * This file is part of the OCTEON SDK 6 * 7 * Copyright (c) 2003-2010 Cavium Networks 8 * 9 * This file is free software; you can redistribute it and/or modify 10 * it under the terms of the GNU General Public License, Version 2, as 11 * published by the Free Software Foundation. 12 * 13 * This file is distributed in the hope that it will be useful, but 14 * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty 15 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or 16 * NONINFRINGEMENT. See the GNU General Public License for more 17 * details. 18 * 19 * You should have received a copy of the GNU General Public License 20 * along with this file; if not, write to the Free Software 21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 22 * or visit http://www.gnu.org/licenses/. 23 * 24 * This file may also be available under a different license from Cavium. 25 * Contact Cavium Networks for more information 26 **********************************************************************/ 27 #include <linux/module.h> 28 #include <linux/kernel.h> 29 #include <linux/cache.h> 30 #include <linux/cpumask.h> 31 #include <linux/netdevice.h> 32 #include <linux/init.h> 33 #include <linux/etherdevice.h> 34 #include <linux/ip.h> 35 #include <linux/string.h> 36 #include <linux/prefetch.h> 37 #include <linux/ratelimit.h> 38 #include <linux/smp.h> 39 #include <linux/interrupt.h> 40 #include <net/dst.h> 41 #ifdef CONFIG_XFRM 42 #include <linux/xfrm.h> 43 #include <net/xfrm.h> 44 #endif /* CONFIG_XFRM */ 45 46 #include <linux/atomic.h> 47 48 #include <asm/octeon/octeon.h> 49 50 #include "ethernet-defines.h" 51 #include "ethernet-mem.h" 52 #include "ethernet-rx.h" 53 #include "octeon-ethernet.h" 54 #include "ethernet-util.h" 55 56 #include <asm/octeon/cvmx-helper.h> 57 #include <asm/octeon/cvmx-wqe.h> 58 #include <asm/octeon/cvmx-fau.h> 59 #include <asm/octeon/cvmx-pow.h> 60 #include <asm/octeon/cvmx-pip.h> 61 #include <asm/octeon/cvmx-scratch.h> 62 63 #include <asm/octeon/cvmx-gmxx-defs.h> 64 65 struct cvm_napi_wrapper { 66 struct napi_struct napi; 67 } ____cacheline_aligned_in_smp; 68 69 static struct cvm_napi_wrapper cvm_oct_napi[NR_CPUS] __cacheline_aligned_in_smp; 70 71 struct cvm_oct_core_state { 72 int baseline_cores; 73 /* 74 * The number of additional cores that could be processing 75 * input packtes. 76 */ 77 atomic_t available_cores; 78 cpumask_t cpu_state; 79 } ____cacheline_aligned_in_smp; 80 81 static struct cvm_oct_core_state core_state __cacheline_aligned_in_smp; 82 83 static void cvm_oct_enable_napi(void *_) 84 { 85 int cpu = smp_processor_id(); 86 napi_schedule(&cvm_oct_napi[cpu].napi); 87 } 88 89 static void cvm_oct_enable_one_cpu(void) 90 { 91 int v; 92 int cpu; 93 94 /* Check to see if more CPUs are available for receive processing... */ 95 v = atomic_sub_if_positive(1, &core_state.available_cores); 96 if (v < 0) 97 return; 98 99 /* ... if a CPU is available, Turn on NAPI polling for that CPU. */ 100 for_each_online_cpu(cpu) { 101 if (!cpu_test_and_set(cpu, core_state.cpu_state)) { 102 v = smp_call_function_single(cpu, cvm_oct_enable_napi, 103 NULL, 0); 104 if (v) 105 panic("Can't enable NAPI."); 106 break; 107 } 108 } 109 } 110 111 static void cvm_oct_no_more_work(void) 112 { 113 int cpu = smp_processor_id(); 114 115 /* 116 * CPU zero is special. It always has the irq enabled when 117 * waiting for incoming packets. 118 */ 119 if (cpu == 0) { 120 enable_irq(OCTEON_IRQ_WORKQ0 + pow_receive_group); 121 return; 122 } 123 124 cpu_clear(cpu, core_state.cpu_state); 125 atomic_add(1, &core_state.available_cores); 126 } 127 128 /** 129 * cvm_oct_do_interrupt - interrupt handler. 130 * 131 * The interrupt occurs whenever the POW has packets in our group. 132 * 133 */ 134 static irqreturn_t cvm_oct_do_interrupt(int cpl, void *dev_id) 135 { 136 /* Disable the IRQ and start napi_poll. */ 137 disable_irq_nosync(OCTEON_IRQ_WORKQ0 + pow_receive_group); 138 cvm_oct_enable_napi(NULL); 139 140 return IRQ_HANDLED; 141 } 142 143 /** 144 * cvm_oct_check_rcv_error - process receive errors 145 * @work: Work queue entry pointing to the packet. 146 * 147 * Returns Non-zero if the packet can be dropped, zero otherwise. 148 */ 149 static inline int cvm_oct_check_rcv_error(cvmx_wqe_t *work) 150 { 151 if ((work->word2.snoip.err_code == 10) && (work->len <= 64)) { 152 /* 153 * Ignore length errors on min size packets. Some 154 * equipment incorrectly pads packets to 64+4FCS 155 * instead of 60+4FCS. Note these packets still get 156 * counted as frame errors. 157 */ 158 } else 159 if (USE_10MBPS_PREAMBLE_WORKAROUND 160 && ((work->word2.snoip.err_code == 5) 161 || (work->word2.snoip.err_code == 7))) { 162 163 /* 164 * We received a packet with either an alignment error 165 * or a FCS error. This may be signalling that we are 166 * running 10Mbps with GMXX_RXX_FRM_CTL[PRE_CHK] 167 * off. If this is the case we need to parse the 168 * packet to determine if we can remove a non spec 169 * preamble and generate a correct packet. 170 */ 171 int interface = cvmx_helper_get_interface_num(work->ipprt); 172 int index = cvmx_helper_get_interface_index_num(work->ipprt); 173 union cvmx_gmxx_rxx_frm_ctl gmxx_rxx_frm_ctl; 174 gmxx_rxx_frm_ctl.u64 = 175 cvmx_read_csr(CVMX_GMXX_RXX_FRM_CTL(index, interface)); 176 if (gmxx_rxx_frm_ctl.s.pre_chk == 0) { 177 178 uint8_t *ptr = 179 cvmx_phys_to_ptr(work->packet_ptr.s.addr); 180 int i = 0; 181 182 while (i < work->len - 1) { 183 if (*ptr != 0x55) 184 break; 185 ptr++; 186 i++; 187 } 188 189 if (*ptr == 0xd5) { 190 /* 191 printk_ratelimited("Port %d received 0xd5 preamble\n", work->ipprt); 192 */ 193 work->packet_ptr.s.addr += i + 1; 194 work->len -= i + 5; 195 } else if ((*ptr & 0xf) == 0xd) { 196 /* 197 printk_ratelimited("Port %d received 0x?d preamble\n", work->ipprt); 198 */ 199 work->packet_ptr.s.addr += i; 200 work->len -= i + 4; 201 for (i = 0; i < work->len; i++) { 202 *ptr = 203 ((*ptr & 0xf0) >> 4) | 204 ((*(ptr + 1) & 0xf) << 4); 205 ptr++; 206 } 207 } else { 208 printk_ratelimited("Port %d unknown preamble, packet " 209 "dropped\n", 210 work->ipprt); 211 /* 212 cvmx_helper_dump_packet(work); 213 */ 214 cvm_oct_free_work(work); 215 return 1; 216 } 217 } 218 } else { 219 printk_ratelimited("Port %d receive error code %d, packet dropped\n", 220 work->ipprt, work->word2.snoip.err_code); 221 cvm_oct_free_work(work); 222 return 1; 223 } 224 225 return 0; 226 } 227 228 /** 229 * cvm_oct_napi_poll - the NAPI poll function. 230 * @napi: The NAPI instance, or null if called from cvm_oct_poll_controller 231 * @budget: Maximum number of packets to receive. 232 * 233 * Returns the number of packets processed. 234 */ 235 static int cvm_oct_napi_poll(struct napi_struct *napi, int budget) 236 { 237 const int coreid = cvmx_get_core_num(); 238 uint64_t old_group_mask; 239 uint64_t old_scratch; 240 int rx_count = 0; 241 int did_work_request = 0; 242 int packet_not_copied; 243 244 /* Prefetch cvm_oct_device since we know we need it soon */ 245 prefetch(cvm_oct_device); 246 247 if (USE_ASYNC_IOBDMA) { 248 /* Save scratch in case userspace is using it */ 249 CVMX_SYNCIOBDMA; 250 old_scratch = cvmx_scratch_read64(CVMX_SCR_SCRATCH); 251 } 252 253 /* Only allow work for our group (and preserve priorities) */ 254 old_group_mask = cvmx_read_csr(CVMX_POW_PP_GRP_MSKX(coreid)); 255 cvmx_write_csr(CVMX_POW_PP_GRP_MSKX(coreid), 256 (old_group_mask & ~0xFFFFull) | 1 << pow_receive_group); 257 258 if (USE_ASYNC_IOBDMA) { 259 cvmx_pow_work_request_async(CVMX_SCR_SCRATCH, CVMX_POW_NO_WAIT); 260 did_work_request = 1; 261 } 262 263 while (rx_count < budget) { 264 struct sk_buff *skb = NULL; 265 struct sk_buff **pskb = NULL; 266 int skb_in_hw; 267 cvmx_wqe_t *work; 268 269 if (USE_ASYNC_IOBDMA && did_work_request) 270 work = cvmx_pow_work_response_async(CVMX_SCR_SCRATCH); 271 else 272 work = cvmx_pow_work_request_sync(CVMX_POW_NO_WAIT); 273 274 prefetch(work); 275 did_work_request = 0; 276 if (work == NULL) { 277 union cvmx_pow_wq_int wq_int; 278 wq_int.u64 = 0; 279 wq_int.s.iq_dis = 1 << pow_receive_group; 280 wq_int.s.wq_int = 1 << pow_receive_group; 281 cvmx_write_csr(CVMX_POW_WQ_INT, wq_int.u64); 282 break; 283 } 284 pskb = (struct sk_buff **)(cvm_oct_get_buffer_ptr(work->packet_ptr) - sizeof(void *)); 285 prefetch(pskb); 286 287 if (USE_ASYNC_IOBDMA && rx_count < (budget - 1)) { 288 cvmx_pow_work_request_async_nocheck(CVMX_SCR_SCRATCH, CVMX_POW_NO_WAIT); 289 did_work_request = 1; 290 } 291 292 if (rx_count == 0) { 293 /* 294 * First time through, see if there is enough 295 * work waiting to merit waking another 296 * CPU. 297 */ 298 union cvmx_pow_wq_int_cntx counts; 299 int backlog; 300 int cores_in_use = core_state.baseline_cores - atomic_read(&core_state.available_cores); 301 counts.u64 = cvmx_read_csr(CVMX_POW_WQ_INT_CNTX(pow_receive_group)); 302 backlog = counts.s.iq_cnt + counts.s.ds_cnt; 303 if (backlog > budget * cores_in_use && napi != NULL) 304 cvm_oct_enable_one_cpu(); 305 } 306 307 skb_in_hw = USE_SKBUFFS_IN_HW && work->word2.s.bufs == 1; 308 if (likely(skb_in_hw)) { 309 skb = *pskb; 310 prefetch(&skb->head); 311 prefetch(&skb->len); 312 } 313 prefetch(cvm_oct_device[work->ipprt]); 314 315 /* Immediately throw away all packets with receive errors */ 316 if (unlikely(work->word2.snoip.rcv_error)) { 317 if (cvm_oct_check_rcv_error(work)) 318 continue; 319 } 320 321 /* 322 * We can only use the zero copy path if skbuffs are 323 * in the FPA pool and the packet fits in a single 324 * buffer. 325 */ 326 if (likely(skb_in_hw)) { 327 skb->data = skb->head + work->packet_ptr.s.addr - cvmx_ptr_to_phys(skb->head); 328 prefetch(skb->data); 329 skb->len = work->len; 330 skb_set_tail_pointer(skb, skb->len); 331 packet_not_copied = 1; 332 } else { 333 /* 334 * We have to copy the packet. First allocate 335 * an skbuff for it. 336 */ 337 skb = dev_alloc_skb(work->len); 338 if (!skb) { 339 printk_ratelimited("Port %d failed to allocate " 340 "skbuff, packet dropped\n", 341 work->ipprt); 342 cvm_oct_free_work(work); 343 continue; 344 } 345 346 /* 347 * Check if we've received a packet that was 348 * entirely stored in the work entry. 349 */ 350 if (unlikely(work->word2.s.bufs == 0)) { 351 uint8_t *ptr = work->packet_data; 352 353 if (likely(!work->word2.s.not_IP)) { 354 /* 355 * The beginning of the packet 356 * moves for IP packets. 357 */ 358 if (work->word2.s.is_v6) 359 ptr += 2; 360 else 361 ptr += 6; 362 } 363 memcpy(skb_put(skb, work->len), ptr, work->len); 364 /* No packet buffers to free */ 365 } else { 366 int segments = work->word2.s.bufs; 367 union cvmx_buf_ptr segment_ptr = work->packet_ptr; 368 int len = work->len; 369 370 while (segments--) { 371 union cvmx_buf_ptr next_ptr = 372 *(union cvmx_buf_ptr *)cvmx_phys_to_ptr(segment_ptr.s.addr - 8); 373 374 /* 375 * Octeon Errata PKI-100: The segment size is 376 * wrong. Until it is fixed, calculate the 377 * segment size based on the packet pool 378 * buffer size. When it is fixed, the 379 * following line should be replaced with this 380 * one: int segment_size = 381 * segment_ptr.s.size; 382 */ 383 int segment_size = CVMX_FPA_PACKET_POOL_SIZE - 384 (segment_ptr.s.addr - (((segment_ptr.s.addr >> 7) - segment_ptr.s.back) << 7)); 385 /* 386 * Don't copy more than what 387 * is left in the packet. 388 */ 389 if (segment_size > len) 390 segment_size = len; 391 /* Copy the data into the packet */ 392 memcpy(skb_put(skb, segment_size), 393 cvmx_phys_to_ptr(segment_ptr.s.addr), 394 segment_size); 395 len -= segment_size; 396 segment_ptr = next_ptr; 397 } 398 } 399 packet_not_copied = 0; 400 } 401 402 if (likely((work->ipprt < TOTAL_NUMBER_OF_PORTS) && 403 cvm_oct_device[work->ipprt])) { 404 struct net_device *dev = cvm_oct_device[work->ipprt]; 405 struct octeon_ethernet *priv = netdev_priv(dev); 406 407 /* 408 * Only accept packets for devices that are 409 * currently up. 410 */ 411 if (likely(dev->flags & IFF_UP)) { 412 skb->protocol = eth_type_trans(skb, dev); 413 skb->dev = dev; 414 415 if (unlikely(work->word2.s.not_IP || work->word2.s.IP_exc || 416 work->word2.s.L4_error || !work->word2.s.tcp_or_udp)) 417 skb->ip_summed = CHECKSUM_NONE; 418 else 419 skb->ip_summed = CHECKSUM_UNNECESSARY; 420 421 /* Increment RX stats for virtual ports */ 422 if (work->ipprt >= CVMX_PIP_NUM_INPUT_PORTS) { 423 #ifdef CONFIG_64BIT 424 atomic64_add(1, (atomic64_t *)&priv->stats.rx_packets); 425 atomic64_add(skb->len, (atomic64_t *)&priv->stats.rx_bytes); 426 #else 427 atomic_add(1, (atomic_t *)&priv->stats.rx_packets); 428 atomic_add(skb->len, (atomic_t *)&priv->stats.rx_bytes); 429 #endif 430 } 431 netif_receive_skb(skb); 432 rx_count++; 433 } else { 434 /* Drop any packet received for a device that isn't up */ 435 /* 436 printk_ratelimited("%s: Device not up, packet dropped\n", 437 dev->name); 438 */ 439 #ifdef CONFIG_64BIT 440 atomic64_add(1, (atomic64_t *)&priv->stats.rx_dropped); 441 #else 442 atomic_add(1, (atomic_t *)&priv->stats.rx_dropped); 443 #endif 444 dev_kfree_skb_irq(skb); 445 } 446 } else { 447 /* 448 * Drop any packet received for a device that 449 * doesn't exist. 450 */ 451 printk_ratelimited("Port %d not controlled by Linux, packet dropped\n", 452 work->ipprt); 453 dev_kfree_skb_irq(skb); 454 } 455 /* 456 * Check to see if the skbuff and work share the same 457 * packet buffer. 458 */ 459 if (USE_SKBUFFS_IN_HW && likely(packet_not_copied)) { 460 /* 461 * This buffer needs to be replaced, increment 462 * the number of buffers we need to free by 463 * one. 464 */ 465 cvmx_fau_atomic_add32(FAU_NUM_PACKET_BUFFERS_TO_FREE, 466 1); 467 468 cvmx_fpa_free(work, CVMX_FPA_WQE_POOL, 469 DONT_WRITEBACK(1)); 470 } else { 471 cvm_oct_free_work(work); 472 } 473 } 474 /* Restore the original POW group mask */ 475 cvmx_write_csr(CVMX_POW_PP_GRP_MSKX(coreid), old_group_mask); 476 if (USE_ASYNC_IOBDMA) { 477 /* Restore the scratch area */ 478 cvmx_scratch_write64(CVMX_SCR_SCRATCH, old_scratch); 479 } 480 cvm_oct_rx_refill_pool(0); 481 482 if (rx_count < budget && napi != NULL) { 483 /* No more work */ 484 napi_complete(napi); 485 cvm_oct_no_more_work(); 486 } 487 return rx_count; 488 } 489 490 #ifdef CONFIG_NET_POLL_CONTROLLER 491 /** 492 * cvm_oct_poll_controller - poll for receive packets 493 * device. 494 * 495 * @dev: Device to poll. Unused 496 */ 497 void cvm_oct_poll_controller(struct net_device *dev) 498 { 499 cvm_oct_napi_poll(NULL, 16); 500 } 501 #endif 502 503 void cvm_oct_rx_initialize(void) 504 { 505 int i; 506 struct net_device *dev_for_napi = NULL; 507 union cvmx_pow_wq_int_thrx int_thr; 508 union cvmx_pow_wq_int_pc int_pc; 509 510 for (i = 0; i < TOTAL_NUMBER_OF_PORTS; i++) { 511 if (cvm_oct_device[i]) { 512 dev_for_napi = cvm_oct_device[i]; 513 break; 514 } 515 } 516 517 if (NULL == dev_for_napi) 518 panic("No net_devices were allocated."); 519 520 if (max_rx_cpus > 1 && max_rx_cpus < num_online_cpus()) 521 atomic_set(&core_state.available_cores, max_rx_cpus); 522 else 523 atomic_set(&core_state.available_cores, num_online_cpus()); 524 core_state.baseline_cores = atomic_read(&core_state.available_cores); 525 526 core_state.cpu_state = CPU_MASK_NONE; 527 for_each_possible_cpu(i) { 528 netif_napi_add(dev_for_napi, &cvm_oct_napi[i].napi, 529 cvm_oct_napi_poll, rx_napi_weight); 530 napi_enable(&cvm_oct_napi[i].napi); 531 } 532 /* Register an IRQ hander for to receive POW interrupts */ 533 i = request_irq(OCTEON_IRQ_WORKQ0 + pow_receive_group, 534 cvm_oct_do_interrupt, 0, "Ethernet", cvm_oct_device); 535 536 if (i) 537 panic("Could not acquire Ethernet IRQ %d\n", 538 OCTEON_IRQ_WORKQ0 + pow_receive_group); 539 540 disable_irq_nosync(OCTEON_IRQ_WORKQ0 + pow_receive_group); 541 542 int_thr.u64 = 0; 543 int_thr.s.tc_en = 1; 544 int_thr.s.tc_thr = 1; 545 /* Enable POW interrupt when our port has at least one packet */ 546 cvmx_write_csr(CVMX_POW_WQ_INT_THRX(pow_receive_group), int_thr.u64); 547 548 int_pc.u64 = 0; 549 int_pc.s.pc_thr = 5; 550 cvmx_write_csr(CVMX_POW_WQ_INT_PC, int_pc.u64); 551 552 553 /* Scheduld NAPI now. This will indirectly enable interrupts. */ 554 cvm_oct_enable_one_cpu(); 555 } 556 557 void cvm_oct_rx_shutdown(void) 558 { 559 int i; 560 /* Shutdown all of the NAPIs */ 561 for_each_possible_cpu(i) 562 netif_napi_del(&cvm_oct_napi[i].napi); 563 } 564