1 /***********************license start*************** 2 * Author: Cavium Networks 3 * 4 * Contact: support@caviumnetworks.com 5 * This file is part of the OCTEON SDK 6 * 7 * Copyright (c) 2003-2008 Cavium Networks 8 * 9 * This file is free software; you can redistribute it and/or modify 10 * it under the terms of the GNU General Public License, Version 2, as 11 * published by the Free Software Foundation. 12 * 13 * This file is distributed in the hope that it will be useful, but 14 * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty 15 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or 16 * NONINFRINGEMENT. See the GNU General Public License for more 17 * details. 18 * 19 * You should have received a copy of the GNU General Public License 20 * along with this file; if not, write to the Free Software 21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 22 * or visit http://www.gnu.org/licenses/. 23 * 24 * This file may also be available under a different license from Cavium. 25 * Contact Cavium Networks for more information 26 ***********************license end**************************************/ 27 28 /* 29 * 30 * Support functions for managing command queues used for 31 * various hardware blocks. 32 * 33 * The common command queue infrastructure abstracts out the 34 * software necessary for adding to Octeon's chained queue 35 * structures. These structures are used for commands to the 36 * PKO, ZIP, DFA, RAID, and DMA engine blocks. Although each 37 * hardware unit takes commands and CSRs of different types, 38 * they all use basic linked command buffers to store the 39 * pending request. In general, users of the CVMX API don't 40 * call cvmx-cmd-queue functions directly. Instead the hardware 41 * unit specific wrapper should be used. The wrappers perform 42 * unit specific validation and CSR writes to submit the 43 * commands. 44 * 45 * Even though most software will never directly interact with 46 * cvmx-cmd-queue, knowledge of its internal working can help 47 * in diagnosing performance problems and help with debugging. 48 * 49 * Command queue pointers are stored in a global named block 50 * called "cvmx_cmd_queues". Except for the PKO queues, each 51 * hardware queue is stored in its own cache line to reduce SMP 52 * contention on spin locks. The PKO queues are stored such that 53 * every 16th queue is next to each other in memory. This scheme 54 * allows for queues being in separate cache lines when there 55 * are low number of queues per port. With 16 queues per port, 56 * the first queue for each port is in the same cache area. The 57 * second queues for each port are in another area, etc. This 58 * allows software to implement very efficient lockless PKO with 59 * 16 queues per port using a minimum of cache lines per core. 60 * All queues for a given core will be isolated in the same 61 * cache area. 62 * 63 * In addition to the memory pointer layout, cvmx-cmd-queue 64 * provides an optimized fair ll/sc locking mechanism for the 65 * queues. The lock uses a "ticket / now serving" model to 66 * maintain fair order on contended locks. In addition, it uses 67 * predicted locking time to limit cache contention. When a core 68 * know it must wait in line for a lock, it spins on the 69 * internal cycle counter to completely eliminate any causes of 70 * bus traffic. 71 * 72 */ 73 74 #ifndef __CVMX_CMD_QUEUE_H__ 75 #define __CVMX_CMD_QUEUE_H__ 76 77 #include <linux/prefetch.h> 78 79 #include <asm/octeon/cvmx-fpa.h> 80 /** 81 * By default we disable the max depth support. Most programs 82 * don't use it and it slows down the command queue processing 83 * significantly. 84 */ 85 #ifndef CVMX_CMD_QUEUE_ENABLE_MAX_DEPTH 86 #define CVMX_CMD_QUEUE_ENABLE_MAX_DEPTH 0 87 #endif 88 89 /** 90 * Enumeration representing all hardware blocks that use command 91 * queues. Each hardware block has up to 65536 sub identifiers for 92 * multiple command queues. Not all chips support all hardware 93 * units. 94 */ 95 typedef enum { 96 CVMX_CMD_QUEUE_PKO_BASE = 0x00000, 97 98 #define CVMX_CMD_QUEUE_PKO(queue) \ 99 ((cvmx_cmd_queue_id_t)(CVMX_CMD_QUEUE_PKO_BASE + (0xffff&(queue)))) 100 101 CVMX_CMD_QUEUE_ZIP = 0x10000, 102 CVMX_CMD_QUEUE_DFA = 0x20000, 103 CVMX_CMD_QUEUE_RAID = 0x30000, 104 CVMX_CMD_QUEUE_DMA_BASE = 0x40000, 105 106 #define CVMX_CMD_QUEUE_DMA(queue) \ 107 ((cvmx_cmd_queue_id_t)(CVMX_CMD_QUEUE_DMA_BASE + (0xffff&(queue)))) 108 109 CVMX_CMD_QUEUE_END = 0x50000, 110 } cvmx_cmd_queue_id_t; 111 112 /** 113 * Command write operations can fail if the command queue needs 114 * a new buffer and the associated FPA pool is empty. It can also 115 * fail if the number of queued command words reaches the maximum 116 * set at initialization. 117 */ 118 typedef enum { 119 CVMX_CMD_QUEUE_SUCCESS = 0, 120 CVMX_CMD_QUEUE_NO_MEMORY = -1, 121 CVMX_CMD_QUEUE_FULL = -2, 122 CVMX_CMD_QUEUE_INVALID_PARAM = -3, 123 CVMX_CMD_QUEUE_ALREADY_SETUP = -4, 124 } cvmx_cmd_queue_result_t; 125 126 typedef struct { 127 /* You have lock when this is your ticket */ 128 uint8_t now_serving; 129 uint64_t unused1:24; 130 /* Maximum outstanding command words */ 131 uint32_t max_depth; 132 /* FPA pool buffers come from */ 133 uint64_t fpa_pool:3; 134 /* Top of command buffer pointer shifted 7 */ 135 uint64_t base_ptr_div128:29; 136 uint64_t unused2:6; 137 /* FPA buffer size in 64bit words minus 1 */ 138 uint64_t pool_size_m1:13; 139 /* Number of commands already used in buffer */ 140 uint64_t index:13; 141 } __cvmx_cmd_queue_state_t; 142 143 /** 144 * This structure contains the global state of all command queues. 145 * It is stored in a bootmem named block and shared by all 146 * applications running on Octeon. Tickets are stored in a differnet 147 * cahce line that queue information to reduce the contention on the 148 * ll/sc used to get a ticket. If this is not the case, the update 149 * of queue state causes the ll/sc to fail quite often. 150 */ 151 typedef struct { 152 uint64_t ticket[(CVMX_CMD_QUEUE_END >> 16) * 256]; 153 __cvmx_cmd_queue_state_t state[(CVMX_CMD_QUEUE_END >> 16) * 256]; 154 } __cvmx_cmd_queue_all_state_t; 155 156 /** 157 * Initialize a command queue for use. The initial FPA buffer is 158 * allocated and the hardware unit is configured to point to the 159 * new command queue. 160 * 161 * @queue_id: Hardware command queue to initialize. 162 * @max_depth: Maximum outstanding commands that can be queued. 163 * @fpa_pool: FPA pool the command queues should come from. 164 * @pool_size: Size of each buffer in the FPA pool (bytes) 165 * 166 * Returns CVMX_CMD_QUEUE_SUCCESS or a failure code 167 */ 168 cvmx_cmd_queue_result_t cvmx_cmd_queue_initialize(cvmx_cmd_queue_id_t queue_id, 169 int max_depth, int fpa_pool, 170 int pool_size); 171 172 /** 173 * Shutdown a queue a free it's command buffers to the FPA. The 174 * hardware connected to the queue must be stopped before this 175 * function is called. 176 * 177 * @queue_id: Queue to shutdown 178 * 179 * Returns CVMX_CMD_QUEUE_SUCCESS or a failure code 180 */ 181 cvmx_cmd_queue_result_t cvmx_cmd_queue_shutdown(cvmx_cmd_queue_id_t queue_id); 182 183 /** 184 * Return the number of command words pending in the queue. This 185 * function may be relatively slow for some hardware units. 186 * 187 * @queue_id: Hardware command queue to query 188 * 189 * Returns Number of outstanding commands 190 */ 191 int cvmx_cmd_queue_length(cvmx_cmd_queue_id_t queue_id); 192 193 /** 194 * Return the command buffer to be written to. The purpose of this 195 * function is to allow CVMX routine access t othe low level buffer 196 * for initial hardware setup. User applications should not call this 197 * function directly. 198 * 199 * @queue_id: Command queue to query 200 * 201 * Returns Command buffer or NULL on failure 202 */ 203 void *cvmx_cmd_queue_buffer(cvmx_cmd_queue_id_t queue_id); 204 205 /** 206 * Get the index into the state arrays for the supplied queue id. 207 * 208 * @queue_id: Queue ID to get an index for 209 * 210 * Returns Index into the state arrays 211 */ 212 static inline int __cvmx_cmd_queue_get_index(cvmx_cmd_queue_id_t queue_id) 213 { 214 /* 215 * Warning: This code currently only works with devices that 216 * have 256 queues or less. Devices with more than 16 queues 217 * are laid out in memory to allow cores quick access to 218 * every 16th queue. This reduces cache thrashing when you are 219 * running 16 queues per port to support lockless operation. 220 */ 221 int unit = queue_id >> 16; 222 int q = (queue_id >> 4) & 0xf; 223 int core = queue_id & 0xf; 224 return unit * 256 + core * 16 + q; 225 } 226 227 /** 228 * Lock the supplied queue so nobody else is updating it at the same 229 * time as us. 230 * 231 * @queue_id: Queue ID to lock 232 * @qptr: Pointer to the queue's global state 233 */ 234 static inline void __cvmx_cmd_queue_lock(cvmx_cmd_queue_id_t queue_id, 235 __cvmx_cmd_queue_state_t *qptr) 236 { 237 extern __cvmx_cmd_queue_all_state_t 238 *__cvmx_cmd_queue_state_ptr; 239 int tmp; 240 int my_ticket; 241 prefetch(qptr); 242 asm volatile ( 243 ".set push\n" 244 ".set noreorder\n" 245 "1:\n" 246 /* Atomic add one to ticket_ptr */ 247 "ll %[my_ticket], %[ticket_ptr]\n" 248 /* and store the original value */ 249 "li %[ticket], 1\n" 250 /* in my_ticket */ 251 "baddu %[ticket], %[my_ticket]\n" 252 "sc %[ticket], %[ticket_ptr]\n" 253 "beqz %[ticket], 1b\n" 254 " nop\n" 255 /* Load the current now_serving ticket */ 256 "lbu %[ticket], %[now_serving]\n" 257 "2:\n" 258 /* Jump out if now_serving == my_ticket */ 259 "beq %[ticket], %[my_ticket], 4f\n" 260 /* Find out how many tickets are in front of me */ 261 " subu %[ticket], %[my_ticket], %[ticket]\n" 262 /* Use tickets in front of me minus one to delay */ 263 "subu %[ticket], 1\n" 264 /* Delay will be ((tickets in front)-1)*32 loops */ 265 "cins %[ticket], %[ticket], 5, 7\n" 266 "3:\n" 267 /* Loop here until our ticket might be up */ 268 "bnez %[ticket], 3b\n" 269 " subu %[ticket], 1\n" 270 /* Jump back up to check out ticket again */ 271 "b 2b\n" 272 /* Load the current now_serving ticket */ 273 " lbu %[ticket], %[now_serving]\n" 274 "4:\n" 275 ".set pop\n" : 276 [ticket_ptr] "=m"(__cvmx_cmd_queue_state_ptr->ticket[__cvmx_cmd_queue_get_index(queue_id)]), 277 [now_serving] "=m"(qptr->now_serving), [ticket] "=r"(tmp), 278 [my_ticket] "=r"(my_ticket) 279 ); 280 } 281 282 /** 283 * Unlock the queue, flushing all writes. 284 * 285 * @qptr: Queue to unlock 286 */ 287 static inline void __cvmx_cmd_queue_unlock(__cvmx_cmd_queue_state_t *qptr) 288 { 289 qptr->now_serving++; 290 CVMX_SYNCWS; 291 } 292 293 /** 294 * Get the queue state structure for the given queue id 295 * 296 * @queue_id: Queue id to get 297 * 298 * Returns Queue structure or NULL on failure 299 */ 300 static inline __cvmx_cmd_queue_state_t 301 *__cvmx_cmd_queue_get_state(cvmx_cmd_queue_id_t queue_id) 302 { 303 extern __cvmx_cmd_queue_all_state_t 304 *__cvmx_cmd_queue_state_ptr; 305 return &__cvmx_cmd_queue_state_ptr-> 306 state[__cvmx_cmd_queue_get_index(queue_id)]; 307 } 308 309 /** 310 * Write an arbitrary number of command words to a command queue. 311 * This is a generic function; the fixed number of command word 312 * functions yield higher performance. 313 * 314 * @queue_id: Hardware command queue to write to 315 * @use_locking: 316 * Use internal locking to ensure exclusive access for queue 317 * updates. If you don't use this locking you must ensure 318 * exclusivity some other way. Locking is strongly recommended. 319 * @cmd_count: Number of command words to write 320 * @cmds: Array of commands to write 321 * 322 * Returns CVMX_CMD_QUEUE_SUCCESS or a failure code 323 */ 324 static inline cvmx_cmd_queue_result_t cvmx_cmd_queue_write(cvmx_cmd_queue_id_t 325 queue_id, 326 int use_locking, 327 int cmd_count, 328 uint64_t *cmds) 329 { 330 __cvmx_cmd_queue_state_t *qptr = __cvmx_cmd_queue_get_state(queue_id); 331 332 /* Make sure nobody else is updating the same queue */ 333 if (likely(use_locking)) 334 __cvmx_cmd_queue_lock(queue_id, qptr); 335 336 /* 337 * If a max queue length was specified then make sure we don't 338 * exceed it. If any part of the command would be below the 339 * limit we allow it. 340 */ 341 if (CVMX_CMD_QUEUE_ENABLE_MAX_DEPTH && unlikely(qptr->max_depth)) { 342 if (unlikely 343 (cvmx_cmd_queue_length(queue_id) > (int)qptr->max_depth)) { 344 if (likely(use_locking)) 345 __cvmx_cmd_queue_unlock(qptr); 346 return CVMX_CMD_QUEUE_FULL; 347 } 348 } 349 350 /* 351 * Normally there is plenty of room in the current buffer for 352 * the command. 353 */ 354 if (likely(qptr->index + cmd_count < qptr->pool_size_m1)) { 355 uint64_t *ptr = 356 (uint64_t *) cvmx_phys_to_ptr((uint64_t) qptr-> 357 base_ptr_div128 << 7); 358 ptr += qptr->index; 359 qptr->index += cmd_count; 360 while (cmd_count--) 361 *ptr++ = *cmds++; 362 } else { 363 uint64_t *ptr; 364 int count; 365 /* 366 * We need a new command buffer. Fail if there isn't 367 * one available. 368 */ 369 uint64_t *new_buffer = 370 (uint64_t *) cvmx_fpa_alloc(qptr->fpa_pool); 371 if (unlikely(new_buffer == NULL)) { 372 if (likely(use_locking)) 373 __cvmx_cmd_queue_unlock(qptr); 374 return CVMX_CMD_QUEUE_NO_MEMORY; 375 } 376 ptr = 377 (uint64_t *) cvmx_phys_to_ptr((uint64_t) qptr-> 378 base_ptr_div128 << 7); 379 /* 380 * Figure out how many command words will fit in this 381 * buffer. One location will be needed for the next 382 * buffer pointer. 383 */ 384 count = qptr->pool_size_m1 - qptr->index; 385 ptr += qptr->index; 386 cmd_count -= count; 387 while (count--) 388 *ptr++ = *cmds++; 389 *ptr = cvmx_ptr_to_phys(new_buffer); 390 /* 391 * The current buffer is full and has a link to the 392 * next buffer. Time to write the rest of the commands 393 * into the new buffer. 394 */ 395 qptr->base_ptr_div128 = *ptr >> 7; 396 qptr->index = cmd_count; 397 ptr = new_buffer; 398 while (cmd_count--) 399 *ptr++ = *cmds++; 400 } 401 402 /* All updates are complete. Release the lock and return */ 403 if (likely(use_locking)) 404 __cvmx_cmd_queue_unlock(qptr); 405 return CVMX_CMD_QUEUE_SUCCESS; 406 } 407 408 /** 409 * Simple function to write two command words to a command 410 * queue. 411 * 412 * @queue_id: Hardware command queue to write to 413 * @use_locking: 414 * Use internal locking to ensure exclusive access for queue 415 * updates. If you don't use this locking you must ensure 416 * exclusivity some other way. Locking is strongly recommended. 417 * @cmd1: Command 418 * @cmd2: Command 419 * 420 * Returns CVMX_CMD_QUEUE_SUCCESS or a failure code 421 */ 422 static inline cvmx_cmd_queue_result_t cvmx_cmd_queue_write2(cvmx_cmd_queue_id_t 423 queue_id, 424 int use_locking, 425 uint64_t cmd1, 426 uint64_t cmd2) 427 { 428 __cvmx_cmd_queue_state_t *qptr = __cvmx_cmd_queue_get_state(queue_id); 429 430 /* Make sure nobody else is updating the same queue */ 431 if (likely(use_locking)) 432 __cvmx_cmd_queue_lock(queue_id, qptr); 433 434 /* 435 * If a max queue length was specified then make sure we don't 436 * exceed it. If any part of the command would be below the 437 * limit we allow it. 438 */ 439 if (CVMX_CMD_QUEUE_ENABLE_MAX_DEPTH && unlikely(qptr->max_depth)) { 440 if (unlikely 441 (cvmx_cmd_queue_length(queue_id) > (int)qptr->max_depth)) { 442 if (likely(use_locking)) 443 __cvmx_cmd_queue_unlock(qptr); 444 return CVMX_CMD_QUEUE_FULL; 445 } 446 } 447 448 /* 449 * Normally there is plenty of room in the current buffer for 450 * the command. 451 */ 452 if (likely(qptr->index + 2 < qptr->pool_size_m1)) { 453 uint64_t *ptr = 454 (uint64_t *) cvmx_phys_to_ptr((uint64_t) qptr-> 455 base_ptr_div128 << 7); 456 ptr += qptr->index; 457 qptr->index += 2; 458 ptr[0] = cmd1; 459 ptr[1] = cmd2; 460 } else { 461 uint64_t *ptr; 462 /* 463 * Figure out how many command words will fit in this 464 * buffer. One location will be needed for the next 465 * buffer pointer. 466 */ 467 int count = qptr->pool_size_m1 - qptr->index; 468 /* 469 * We need a new command buffer. Fail if there isn't 470 * one available. 471 */ 472 uint64_t *new_buffer = 473 (uint64_t *) cvmx_fpa_alloc(qptr->fpa_pool); 474 if (unlikely(new_buffer == NULL)) { 475 if (likely(use_locking)) 476 __cvmx_cmd_queue_unlock(qptr); 477 return CVMX_CMD_QUEUE_NO_MEMORY; 478 } 479 count--; 480 ptr = 481 (uint64_t *) cvmx_phys_to_ptr((uint64_t) qptr-> 482 base_ptr_div128 << 7); 483 ptr += qptr->index; 484 *ptr++ = cmd1; 485 if (likely(count)) 486 *ptr++ = cmd2; 487 *ptr = cvmx_ptr_to_phys(new_buffer); 488 /* 489 * The current buffer is full and has a link to the 490 * next buffer. Time to write the rest of the commands 491 * into the new buffer. 492 */ 493 qptr->base_ptr_div128 = *ptr >> 7; 494 qptr->index = 0; 495 if (unlikely(count == 0)) { 496 qptr->index = 1; 497 new_buffer[0] = cmd2; 498 } 499 } 500 501 /* All updates are complete. Release the lock and return */ 502 if (likely(use_locking)) 503 __cvmx_cmd_queue_unlock(qptr); 504 return CVMX_CMD_QUEUE_SUCCESS; 505 } 506 507 /** 508 * Simple function to write three command words to a command 509 * queue. 510 * 511 * @queue_id: Hardware command queue to write to 512 * @use_locking: 513 * Use internal locking to ensure exclusive access for queue 514 * updates. If you don't use this locking you must ensure 515 * exclusivity some other way. Locking is strongly recommended. 516 * @cmd1: Command 517 * @cmd2: Command 518 * @cmd3: Command 519 * 520 * Returns CVMX_CMD_QUEUE_SUCCESS or a failure code 521 */ 522 static inline cvmx_cmd_queue_result_t cvmx_cmd_queue_write3(cvmx_cmd_queue_id_t 523 queue_id, 524 int use_locking, 525 uint64_t cmd1, 526 uint64_t cmd2, 527 uint64_t cmd3) 528 { 529 __cvmx_cmd_queue_state_t *qptr = __cvmx_cmd_queue_get_state(queue_id); 530 531 /* Make sure nobody else is updating the same queue */ 532 if (likely(use_locking)) 533 __cvmx_cmd_queue_lock(queue_id, qptr); 534 535 /* 536 * If a max queue length was specified then make sure we don't 537 * exceed it. If any part of the command would be below the 538 * limit we allow it. 539 */ 540 if (CVMX_CMD_QUEUE_ENABLE_MAX_DEPTH && unlikely(qptr->max_depth)) { 541 if (unlikely 542 (cvmx_cmd_queue_length(queue_id) > (int)qptr->max_depth)) { 543 if (likely(use_locking)) 544 __cvmx_cmd_queue_unlock(qptr); 545 return CVMX_CMD_QUEUE_FULL; 546 } 547 } 548 549 /* 550 * Normally there is plenty of room in the current buffer for 551 * the command. 552 */ 553 if (likely(qptr->index + 3 < qptr->pool_size_m1)) { 554 uint64_t *ptr = 555 (uint64_t *) cvmx_phys_to_ptr((uint64_t) qptr-> 556 base_ptr_div128 << 7); 557 ptr += qptr->index; 558 qptr->index += 3; 559 ptr[0] = cmd1; 560 ptr[1] = cmd2; 561 ptr[2] = cmd3; 562 } else { 563 uint64_t *ptr; 564 /* 565 * Figure out how many command words will fit in this 566 * buffer. One location will be needed for the next 567 * buffer pointer 568 */ 569 int count = qptr->pool_size_m1 - qptr->index; 570 /* 571 * We need a new command buffer. Fail if there isn't 572 * one available 573 */ 574 uint64_t *new_buffer = 575 (uint64_t *) cvmx_fpa_alloc(qptr->fpa_pool); 576 if (unlikely(new_buffer == NULL)) { 577 if (likely(use_locking)) 578 __cvmx_cmd_queue_unlock(qptr); 579 return CVMX_CMD_QUEUE_NO_MEMORY; 580 } 581 count--; 582 ptr = 583 (uint64_t *) cvmx_phys_to_ptr((uint64_t) qptr-> 584 base_ptr_div128 << 7); 585 ptr += qptr->index; 586 *ptr++ = cmd1; 587 if (count) { 588 *ptr++ = cmd2; 589 if (count > 1) 590 *ptr++ = cmd3; 591 } 592 *ptr = cvmx_ptr_to_phys(new_buffer); 593 /* 594 * The current buffer is full and has a link to the 595 * next buffer. Time to write the rest of the commands 596 * into the new buffer. 597 */ 598 qptr->base_ptr_div128 = *ptr >> 7; 599 qptr->index = 0; 600 ptr = new_buffer; 601 if (count == 0) { 602 *ptr++ = cmd2; 603 qptr->index++; 604 } 605 if (count < 2) { 606 *ptr++ = cmd3; 607 qptr->index++; 608 } 609 } 610 611 /* All updates are complete. Release the lock and return */ 612 if (likely(use_locking)) 613 __cvmx_cmd_queue_unlock(qptr); 614 return CVMX_CMD_QUEUE_SUCCESS; 615 } 616 617 #endif /* __CVMX_CMD_QUEUE_H__ */ 618