1 /***********************license start*************** 2 * Author: Cavium Networks 3 * 4 * Contact: support@caviumnetworks.com 5 * This file is part of the OCTEON SDK 6 * 7 * Copyright (c) 2003-2008 Cavium Networks 8 * 9 * This file is free software; you can redistribute it and/or modify 10 * it under the terms of the GNU General Public License, Version 2, as 11 * published by the Free Software Foundation. 12 * 13 * This file is distributed in the hope that it will be useful, but 14 * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty 15 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or 16 * NONINFRINGEMENT. See the GNU General Public License for more 17 * details. 18 * 19 * You should have received a copy of the GNU General Public License 20 * along with this file; if not, write to the Free Software 21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 22 * or visit http://www.gnu.org/licenses/. 23 * 24 * This file may also be available under a different license from Cavium. 25 * Contact Cavium Networks for more information 26 ***********************license end**************************************/ 27 28 /* 29 * 30 * Support functions for managing command queues used for 31 * various hardware blocks. 32 * 33 * The common command queue infrastructure abstracts out the 34 * software necessary for adding to Octeon's chained queue 35 * structures. These structures are used for commands to the 36 * PKO, ZIP, DFA, RAID, and DMA engine blocks. Although each 37 * hardware unit takes commands and CSRs of different types, 38 * they all use basic linked command buffers to store the 39 * pending request. In general, users of the CVMX API don't 40 * call cvmx-cmd-queue functions directly. Instead the hardware 41 * unit specific wrapper should be used. The wrappers perform 42 * unit specific validation and CSR writes to submit the 43 * commands. 44 * 45 * Even though most software will never directly interact with 46 * cvmx-cmd-queue, knowledge of its internal working can help 47 * in diagnosing performance problems and help with debugging. 48 * 49 * Command queue pointers are stored in a global named block 50 * called "cvmx_cmd_queues". Except for the PKO queues, each 51 * hardware queue is stored in its own cache line to reduce SMP 52 * contention on spin locks. The PKO queues are stored such that 53 * every 16th queue is next to each other in memory. This scheme 54 * allows for queues being in separate cache lines when there 55 * are low number of queues per port. With 16 queues per port, 56 * the first queue for each port is in the same cache area. The 57 * second queues for each port are in another area, etc. This 58 * allows software to implement very efficient lockless PKO with 59 * 16 queues per port using a minimum of cache lines per core. 60 * All queues for a given core will be isolated in the same 61 * cache area. 62 * 63 * In addition to the memory pointer layout, cvmx-cmd-queue 64 * provides an optimized fair ll/sc locking mechanism for the 65 * queues. The lock uses a "ticket / now serving" model to 66 * maintain fair order on contended locks. In addition, it uses 67 * predicted locking time to limit cache contention. When a core 68 * know it must wait in line for a lock, it spins on the 69 * internal cycle counter to completely eliminate any causes of 70 * bus traffic. 71 * 72 */ 73 74 #ifndef __CVMX_CMD_QUEUE_H__ 75 #define __CVMX_CMD_QUEUE_H__ 76 77 #include <linux/prefetch.h> 78 79 #include <asm/compiler.h> 80 81 #include <asm/octeon/cvmx-fpa.h> 82 /** 83 * By default we disable the max depth support. Most programs 84 * don't use it and it slows down the command queue processing 85 * significantly. 86 */ 87 #ifndef CVMX_CMD_QUEUE_ENABLE_MAX_DEPTH 88 #define CVMX_CMD_QUEUE_ENABLE_MAX_DEPTH 0 89 #endif 90 91 /** 92 * Enumeration representing all hardware blocks that use command 93 * queues. Each hardware block has up to 65536 sub identifiers for 94 * multiple command queues. Not all chips support all hardware 95 * units. 96 */ 97 typedef enum { 98 CVMX_CMD_QUEUE_PKO_BASE = 0x00000, 99 100 #define CVMX_CMD_QUEUE_PKO(queue) \ 101 ((cvmx_cmd_queue_id_t)(CVMX_CMD_QUEUE_PKO_BASE + (0xffff&(queue)))) 102 103 CVMX_CMD_QUEUE_ZIP = 0x10000, 104 CVMX_CMD_QUEUE_DFA = 0x20000, 105 CVMX_CMD_QUEUE_RAID = 0x30000, 106 CVMX_CMD_QUEUE_DMA_BASE = 0x40000, 107 108 #define CVMX_CMD_QUEUE_DMA(queue) \ 109 ((cvmx_cmd_queue_id_t)(CVMX_CMD_QUEUE_DMA_BASE + (0xffff&(queue)))) 110 111 CVMX_CMD_QUEUE_END = 0x50000, 112 } cvmx_cmd_queue_id_t; 113 114 /** 115 * Command write operations can fail if the command queue needs 116 * a new buffer and the associated FPA pool is empty. It can also 117 * fail if the number of queued command words reaches the maximum 118 * set at initialization. 119 */ 120 typedef enum { 121 CVMX_CMD_QUEUE_SUCCESS = 0, 122 CVMX_CMD_QUEUE_NO_MEMORY = -1, 123 CVMX_CMD_QUEUE_FULL = -2, 124 CVMX_CMD_QUEUE_INVALID_PARAM = -3, 125 CVMX_CMD_QUEUE_ALREADY_SETUP = -4, 126 } cvmx_cmd_queue_result_t; 127 128 typedef struct { 129 /* You have lock when this is your ticket */ 130 uint8_t now_serving; 131 uint64_t unused1:24; 132 /* Maximum outstanding command words */ 133 uint32_t max_depth; 134 /* FPA pool buffers come from */ 135 uint64_t fpa_pool:3; 136 /* Top of command buffer pointer shifted 7 */ 137 uint64_t base_ptr_div128:29; 138 uint64_t unused2:6; 139 /* FPA buffer size in 64bit words minus 1 */ 140 uint64_t pool_size_m1:13; 141 /* Number of commands already used in buffer */ 142 uint64_t index:13; 143 } __cvmx_cmd_queue_state_t; 144 145 /** 146 * This structure contains the global state of all command queues. 147 * It is stored in a bootmem named block and shared by all 148 * applications running on Octeon. Tickets are stored in a differnet 149 * cache line that queue information to reduce the contention on the 150 * ll/sc used to get a ticket. If this is not the case, the update 151 * of queue state causes the ll/sc to fail quite often. 152 */ 153 typedef struct { 154 uint64_t ticket[(CVMX_CMD_QUEUE_END >> 16) * 256]; 155 __cvmx_cmd_queue_state_t state[(CVMX_CMD_QUEUE_END >> 16) * 256]; 156 } __cvmx_cmd_queue_all_state_t; 157 158 /** 159 * Initialize a command queue for use. The initial FPA buffer is 160 * allocated and the hardware unit is configured to point to the 161 * new command queue. 162 * 163 * @queue_id: Hardware command queue to initialize. 164 * @max_depth: Maximum outstanding commands that can be queued. 165 * @fpa_pool: FPA pool the command queues should come from. 166 * @pool_size: Size of each buffer in the FPA pool (bytes) 167 * 168 * Returns CVMX_CMD_QUEUE_SUCCESS or a failure code 169 */ 170 cvmx_cmd_queue_result_t cvmx_cmd_queue_initialize(cvmx_cmd_queue_id_t queue_id, 171 int max_depth, int fpa_pool, 172 int pool_size); 173 174 /** 175 * Shutdown a queue a free it's command buffers to the FPA. The 176 * hardware connected to the queue must be stopped before this 177 * function is called. 178 * 179 * @queue_id: Queue to shutdown 180 * 181 * Returns CVMX_CMD_QUEUE_SUCCESS or a failure code 182 */ 183 cvmx_cmd_queue_result_t cvmx_cmd_queue_shutdown(cvmx_cmd_queue_id_t queue_id); 184 185 /** 186 * Return the number of command words pending in the queue. This 187 * function may be relatively slow for some hardware units. 188 * 189 * @queue_id: Hardware command queue to query 190 * 191 * Returns Number of outstanding commands 192 */ 193 int cvmx_cmd_queue_length(cvmx_cmd_queue_id_t queue_id); 194 195 /** 196 * Return the command buffer to be written to. The purpose of this 197 * function is to allow CVMX routine access t othe low level buffer 198 * for initial hardware setup. User applications should not call this 199 * function directly. 200 * 201 * @queue_id: Command queue to query 202 * 203 * Returns Command buffer or NULL on failure 204 */ 205 void *cvmx_cmd_queue_buffer(cvmx_cmd_queue_id_t queue_id); 206 207 /** 208 * Get the index into the state arrays for the supplied queue id. 209 * 210 * @queue_id: Queue ID to get an index for 211 * 212 * Returns Index into the state arrays 213 */ 214 static inline int __cvmx_cmd_queue_get_index(cvmx_cmd_queue_id_t queue_id) 215 { 216 /* 217 * Warning: This code currently only works with devices that 218 * have 256 queues or less. Devices with more than 16 queues 219 * are laid out in memory to allow cores quick access to 220 * every 16th queue. This reduces cache thrashing when you are 221 * running 16 queues per port to support lockless operation. 222 */ 223 int unit = queue_id >> 16; 224 int q = (queue_id >> 4) & 0xf; 225 int core = queue_id & 0xf; 226 return unit * 256 + core * 16 + q; 227 } 228 229 /** 230 * Lock the supplied queue so nobody else is updating it at the same 231 * time as us. 232 * 233 * @queue_id: Queue ID to lock 234 * @qptr: Pointer to the queue's global state 235 */ 236 static inline void __cvmx_cmd_queue_lock(cvmx_cmd_queue_id_t queue_id, 237 __cvmx_cmd_queue_state_t *qptr) 238 { 239 extern __cvmx_cmd_queue_all_state_t 240 *__cvmx_cmd_queue_state_ptr; 241 int tmp; 242 int my_ticket; 243 prefetch(qptr); 244 asm volatile ( 245 ".set push\n" 246 ".set noreorder\n" 247 "1:\n" 248 /* Atomic add one to ticket_ptr */ 249 "ll %[my_ticket], %[ticket_ptr]\n" 250 /* and store the original value */ 251 "li %[ticket], 1\n" 252 /* in my_ticket */ 253 "baddu %[ticket], %[my_ticket]\n" 254 "sc %[ticket], %[ticket_ptr]\n" 255 "beqz %[ticket], 1b\n" 256 " nop\n" 257 /* Load the current now_serving ticket */ 258 "lbu %[ticket], %[now_serving]\n" 259 "2:\n" 260 /* Jump out if now_serving == my_ticket */ 261 "beq %[ticket], %[my_ticket], 4f\n" 262 /* Find out how many tickets are in front of me */ 263 " subu %[ticket], %[my_ticket], %[ticket]\n" 264 /* Use tickets in front of me minus one to delay */ 265 "subu %[ticket], 1\n" 266 /* Delay will be ((tickets in front)-1)*32 loops */ 267 "cins %[ticket], %[ticket], 5, 7\n" 268 "3:\n" 269 /* Loop here until our ticket might be up */ 270 "bnez %[ticket], 3b\n" 271 " subu %[ticket], 1\n" 272 /* Jump back up to check out ticket again */ 273 "b 2b\n" 274 /* Load the current now_serving ticket */ 275 " lbu %[ticket], %[now_serving]\n" 276 "4:\n" 277 ".set pop\n" : 278 [ticket_ptr] "=" GCC_OFF_SMALL_ASM()(__cvmx_cmd_queue_state_ptr->ticket[__cvmx_cmd_queue_get_index(queue_id)]), 279 [now_serving] "=m"(qptr->now_serving), [ticket] "=r"(tmp), 280 [my_ticket] "=r"(my_ticket) 281 ); 282 } 283 284 /** 285 * Unlock the queue, flushing all writes. 286 * 287 * @qptr: Queue to unlock 288 */ 289 static inline void __cvmx_cmd_queue_unlock(__cvmx_cmd_queue_state_t *qptr) 290 { 291 qptr->now_serving++; 292 CVMX_SYNCWS; 293 } 294 295 /** 296 * Get the queue state structure for the given queue id 297 * 298 * @queue_id: Queue id to get 299 * 300 * Returns Queue structure or NULL on failure 301 */ 302 static inline __cvmx_cmd_queue_state_t 303 *__cvmx_cmd_queue_get_state(cvmx_cmd_queue_id_t queue_id) 304 { 305 extern __cvmx_cmd_queue_all_state_t 306 *__cvmx_cmd_queue_state_ptr; 307 return &__cvmx_cmd_queue_state_ptr-> 308 state[__cvmx_cmd_queue_get_index(queue_id)]; 309 } 310 311 /** 312 * Write an arbitrary number of command words to a command queue. 313 * This is a generic function; the fixed number of command word 314 * functions yield higher performance. 315 * 316 * @queue_id: Hardware command queue to write to 317 * @use_locking: 318 * Use internal locking to ensure exclusive access for queue 319 * updates. If you don't use this locking you must ensure 320 * exclusivity some other way. Locking is strongly recommended. 321 * @cmd_count: Number of command words to write 322 * @cmds: Array of commands to write 323 * 324 * Returns CVMX_CMD_QUEUE_SUCCESS or a failure code 325 */ 326 static inline cvmx_cmd_queue_result_t cvmx_cmd_queue_write(cvmx_cmd_queue_id_t 327 queue_id, 328 int use_locking, 329 int cmd_count, 330 uint64_t *cmds) 331 { 332 __cvmx_cmd_queue_state_t *qptr = __cvmx_cmd_queue_get_state(queue_id); 333 334 /* Make sure nobody else is updating the same queue */ 335 if (likely(use_locking)) 336 __cvmx_cmd_queue_lock(queue_id, qptr); 337 338 /* 339 * If a max queue length was specified then make sure we don't 340 * exceed it. If any part of the command would be below the 341 * limit we allow it. 342 */ 343 if (CVMX_CMD_QUEUE_ENABLE_MAX_DEPTH && unlikely(qptr->max_depth)) { 344 if (unlikely 345 (cvmx_cmd_queue_length(queue_id) > (int)qptr->max_depth)) { 346 if (likely(use_locking)) 347 __cvmx_cmd_queue_unlock(qptr); 348 return CVMX_CMD_QUEUE_FULL; 349 } 350 } 351 352 /* 353 * Normally there is plenty of room in the current buffer for 354 * the command. 355 */ 356 if (likely(qptr->index + cmd_count < qptr->pool_size_m1)) { 357 uint64_t *ptr = 358 (uint64_t *) cvmx_phys_to_ptr((uint64_t) qptr-> 359 base_ptr_div128 << 7); 360 ptr += qptr->index; 361 qptr->index += cmd_count; 362 while (cmd_count--) 363 *ptr++ = *cmds++; 364 } else { 365 uint64_t *ptr; 366 int count; 367 /* 368 * We need a new command buffer. Fail if there isn't 369 * one available. 370 */ 371 uint64_t *new_buffer = 372 (uint64_t *) cvmx_fpa_alloc(qptr->fpa_pool); 373 if (unlikely(new_buffer == NULL)) { 374 if (likely(use_locking)) 375 __cvmx_cmd_queue_unlock(qptr); 376 return CVMX_CMD_QUEUE_NO_MEMORY; 377 } 378 ptr = 379 (uint64_t *) cvmx_phys_to_ptr((uint64_t) qptr-> 380 base_ptr_div128 << 7); 381 /* 382 * Figure out how many command words will fit in this 383 * buffer. One location will be needed for the next 384 * buffer pointer. 385 */ 386 count = qptr->pool_size_m1 - qptr->index; 387 ptr += qptr->index; 388 cmd_count -= count; 389 while (count--) 390 *ptr++ = *cmds++; 391 *ptr = cvmx_ptr_to_phys(new_buffer); 392 /* 393 * The current buffer is full and has a link to the 394 * next buffer. Time to write the rest of the commands 395 * into the new buffer. 396 */ 397 qptr->base_ptr_div128 = *ptr >> 7; 398 qptr->index = cmd_count; 399 ptr = new_buffer; 400 while (cmd_count--) 401 *ptr++ = *cmds++; 402 } 403 404 /* All updates are complete. Release the lock and return */ 405 if (likely(use_locking)) 406 __cvmx_cmd_queue_unlock(qptr); 407 return CVMX_CMD_QUEUE_SUCCESS; 408 } 409 410 /** 411 * Simple function to write two command words to a command 412 * queue. 413 * 414 * @queue_id: Hardware command queue to write to 415 * @use_locking: 416 * Use internal locking to ensure exclusive access for queue 417 * updates. If you don't use this locking you must ensure 418 * exclusivity some other way. Locking is strongly recommended. 419 * @cmd1: Command 420 * @cmd2: Command 421 * 422 * Returns CVMX_CMD_QUEUE_SUCCESS or a failure code 423 */ 424 static inline cvmx_cmd_queue_result_t cvmx_cmd_queue_write2(cvmx_cmd_queue_id_t 425 queue_id, 426 int use_locking, 427 uint64_t cmd1, 428 uint64_t cmd2) 429 { 430 __cvmx_cmd_queue_state_t *qptr = __cvmx_cmd_queue_get_state(queue_id); 431 432 /* Make sure nobody else is updating the same queue */ 433 if (likely(use_locking)) 434 __cvmx_cmd_queue_lock(queue_id, qptr); 435 436 /* 437 * If a max queue length was specified then make sure we don't 438 * exceed it. If any part of the command would be below the 439 * limit we allow it. 440 */ 441 if (CVMX_CMD_QUEUE_ENABLE_MAX_DEPTH && unlikely(qptr->max_depth)) { 442 if (unlikely 443 (cvmx_cmd_queue_length(queue_id) > (int)qptr->max_depth)) { 444 if (likely(use_locking)) 445 __cvmx_cmd_queue_unlock(qptr); 446 return CVMX_CMD_QUEUE_FULL; 447 } 448 } 449 450 /* 451 * Normally there is plenty of room in the current buffer for 452 * the command. 453 */ 454 if (likely(qptr->index + 2 < qptr->pool_size_m1)) { 455 uint64_t *ptr = 456 (uint64_t *) cvmx_phys_to_ptr((uint64_t) qptr-> 457 base_ptr_div128 << 7); 458 ptr += qptr->index; 459 qptr->index += 2; 460 ptr[0] = cmd1; 461 ptr[1] = cmd2; 462 } else { 463 uint64_t *ptr; 464 /* 465 * Figure out how many command words will fit in this 466 * buffer. One location will be needed for the next 467 * buffer pointer. 468 */ 469 int count = qptr->pool_size_m1 - qptr->index; 470 /* 471 * We need a new command buffer. Fail if there isn't 472 * one available. 473 */ 474 uint64_t *new_buffer = 475 (uint64_t *) cvmx_fpa_alloc(qptr->fpa_pool); 476 if (unlikely(new_buffer == NULL)) { 477 if (likely(use_locking)) 478 __cvmx_cmd_queue_unlock(qptr); 479 return CVMX_CMD_QUEUE_NO_MEMORY; 480 } 481 count--; 482 ptr = 483 (uint64_t *) cvmx_phys_to_ptr((uint64_t) qptr-> 484 base_ptr_div128 << 7); 485 ptr += qptr->index; 486 *ptr++ = cmd1; 487 if (likely(count)) 488 *ptr++ = cmd2; 489 *ptr = cvmx_ptr_to_phys(new_buffer); 490 /* 491 * The current buffer is full and has a link to the 492 * next buffer. Time to write the rest of the commands 493 * into the new buffer. 494 */ 495 qptr->base_ptr_div128 = *ptr >> 7; 496 qptr->index = 0; 497 if (unlikely(count == 0)) { 498 qptr->index = 1; 499 new_buffer[0] = cmd2; 500 } 501 } 502 503 /* All updates are complete. Release the lock and return */ 504 if (likely(use_locking)) 505 __cvmx_cmd_queue_unlock(qptr); 506 return CVMX_CMD_QUEUE_SUCCESS; 507 } 508 509 /** 510 * Simple function to write three command words to a command 511 * queue. 512 * 513 * @queue_id: Hardware command queue to write to 514 * @use_locking: 515 * Use internal locking to ensure exclusive access for queue 516 * updates. If you don't use this locking you must ensure 517 * exclusivity some other way. Locking is strongly recommended. 518 * @cmd1: Command 519 * @cmd2: Command 520 * @cmd3: Command 521 * 522 * Returns CVMX_CMD_QUEUE_SUCCESS or a failure code 523 */ 524 static inline cvmx_cmd_queue_result_t cvmx_cmd_queue_write3(cvmx_cmd_queue_id_t 525 queue_id, 526 int use_locking, 527 uint64_t cmd1, 528 uint64_t cmd2, 529 uint64_t cmd3) 530 { 531 __cvmx_cmd_queue_state_t *qptr = __cvmx_cmd_queue_get_state(queue_id); 532 533 /* Make sure nobody else is updating the same queue */ 534 if (likely(use_locking)) 535 __cvmx_cmd_queue_lock(queue_id, qptr); 536 537 /* 538 * If a max queue length was specified then make sure we don't 539 * exceed it. If any part of the command would be below the 540 * limit we allow it. 541 */ 542 if (CVMX_CMD_QUEUE_ENABLE_MAX_DEPTH && unlikely(qptr->max_depth)) { 543 if (unlikely 544 (cvmx_cmd_queue_length(queue_id) > (int)qptr->max_depth)) { 545 if (likely(use_locking)) 546 __cvmx_cmd_queue_unlock(qptr); 547 return CVMX_CMD_QUEUE_FULL; 548 } 549 } 550 551 /* 552 * Normally there is plenty of room in the current buffer for 553 * the command. 554 */ 555 if (likely(qptr->index + 3 < qptr->pool_size_m1)) { 556 uint64_t *ptr = 557 (uint64_t *) cvmx_phys_to_ptr((uint64_t) qptr-> 558 base_ptr_div128 << 7); 559 ptr += qptr->index; 560 qptr->index += 3; 561 ptr[0] = cmd1; 562 ptr[1] = cmd2; 563 ptr[2] = cmd3; 564 } else { 565 uint64_t *ptr; 566 /* 567 * Figure out how many command words will fit in this 568 * buffer. One location will be needed for the next 569 * buffer pointer 570 */ 571 int count = qptr->pool_size_m1 - qptr->index; 572 /* 573 * We need a new command buffer. Fail if there isn't 574 * one available 575 */ 576 uint64_t *new_buffer = 577 (uint64_t *) cvmx_fpa_alloc(qptr->fpa_pool); 578 if (unlikely(new_buffer == NULL)) { 579 if (likely(use_locking)) 580 __cvmx_cmd_queue_unlock(qptr); 581 return CVMX_CMD_QUEUE_NO_MEMORY; 582 } 583 count--; 584 ptr = 585 (uint64_t *) cvmx_phys_to_ptr((uint64_t) qptr-> 586 base_ptr_div128 << 7); 587 ptr += qptr->index; 588 *ptr++ = cmd1; 589 if (count) { 590 *ptr++ = cmd2; 591 if (count > 1) 592 *ptr++ = cmd3; 593 } 594 *ptr = cvmx_ptr_to_phys(new_buffer); 595 /* 596 * The current buffer is full and has a link to the 597 * next buffer. Time to write the rest of the commands 598 * into the new buffer. 599 */ 600 qptr->base_ptr_div128 = *ptr >> 7; 601 qptr->index = 0; 602 ptr = new_buffer; 603 if (count == 0) { 604 *ptr++ = cmd2; 605 qptr->index++; 606 } 607 if (count < 2) { 608 *ptr++ = cmd3; 609 qptr->index++; 610 } 611 } 612 613 /* All updates are complete. Release the lock and return */ 614 if (likely(use_locking)) 615 __cvmx_cmd_queue_unlock(qptr); 616 return CVMX_CMD_QUEUE_SUCCESS; 617 } 618 619 #endif /* __CVMX_CMD_QUEUE_H__ */ 620