1 /* 2 * SN Platform GRU Driver 3 * 4 * KERNEL SERVICES THAT USE THE GRU 5 * 6 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. 7 * 8 * This program is free software; you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License as published by 10 * the Free Software Foundation; either version 2 of the License, or 11 * (at your option) any later version. 12 * 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU General Public License for more details. 17 * 18 * You should have received a copy of the GNU General Public License 19 * along with this program; if not, write to the Free Software 20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 21 */ 22 23 #include <linux/kernel.h> 24 #include <linux/errno.h> 25 #include <linux/slab.h> 26 #include <linux/mm.h> 27 #include <linux/spinlock.h> 28 #include <linux/device.h> 29 #include <linux/miscdevice.h> 30 #include <linux/proc_fs.h> 31 #include <linux/interrupt.h> 32 #include <linux/uaccess.h> 33 #include <linux/delay.h> 34 #include "gru.h" 35 #include "grulib.h" 36 #include "grutables.h" 37 #include "grukservices.h" 38 #include "gru_instructions.h" 39 #include <asm/uv/uv_hub.h> 40 41 /* 42 * Kernel GRU Usage 43 * 44 * The following is an interim algorithm for management of kernel GRU 45 * resources. This will likely be replaced when we better understand the 46 * kernel/user requirements. 47 * 48 * Blade percpu resources reserved for kernel use. These resources are 49 * reserved whenever the the kernel context for the blade is loaded. Note 50 * that the kernel context is not guaranteed to be always available. It is 51 * loaded on demand & can be stolen by a user if the user demand exceeds the 52 * kernel demand. The kernel can always reload the kernel context but 53 * a SLEEP may be required!!!. 54 * 55 * Async Overview: 56 * 57 * Each blade has one "kernel context" that owns GRU kernel resources 58 * located on the blade. Kernel drivers use GRU resources in this context 59 * for sending messages, zeroing memory, etc. 60 * 61 * The kernel context is dynamically loaded on demand. If it is not in 62 * use by the kernel, the kernel context can be unloaded & given to a user. 63 * The kernel context will be reloaded when needed. This may require that 64 * a context be stolen from a user. 65 * NOTE: frequent unloading/reloading of the kernel context is 66 * expensive. We are depending on batch schedulers, cpusets, sane 67 * drivers or some other mechanism to prevent the need for frequent 68 * stealing/reloading. 69 * 70 * The kernel context consists of two parts: 71 * - 1 CB & a few DSRs that are reserved for each cpu on the blade. 72 * Each cpu has it's own private resources & does not share them 73 * with other cpus. These resources are used serially, ie, 74 * locked, used & unlocked on each call to a function in 75 * grukservices. 76 * (Now that we have dynamic loading of kernel contexts, I 77 * may rethink this & allow sharing between cpus....) 78 * 79 * - Additional resources can be reserved long term & used directly 80 * by UV drivers located in the kernel. Drivers using these GRU 81 * resources can use asynchronous GRU instructions that send 82 * interrupts on completion. 83 * - these resources must be explicitly locked/unlocked 84 * - locked resources prevent (obviously) the kernel 85 * context from being unloaded. 86 * - drivers using these resource directly issue their own 87 * GRU instruction and must wait/check completion. 88 * 89 * When these resources are reserved, the caller can optionally 90 * associate a wait_queue with the resources and use asynchronous 91 * GRU instructions. When an async GRU instruction completes, the 92 * driver will do a wakeup on the event. 93 * 94 */ 95 96 97 #define ASYNC_HAN_TO_BID(h) ((h) - 1) 98 #define ASYNC_BID_TO_HAN(b) ((b) + 1) 99 #define ASYNC_HAN_TO_BS(h) gru_base[ASYNC_HAN_TO_BID(h)] 100 #define KCB_TO_GID(cb) ((cb - gru_start_vaddr) / \ 101 (GRU_SIZE * GRU_CHIPLETS_PER_BLADE)) 102 #define KCB_TO_BS(cb) gru_base[KCB_TO_GID(cb)] 103 104 #define GRU_NUM_KERNEL_CBR 1 105 #define GRU_NUM_KERNEL_DSR_BYTES 256 106 #define GRU_NUM_KERNEL_DSR_CL (GRU_NUM_KERNEL_DSR_BYTES / \ 107 GRU_CACHE_LINE_BYTES) 108 109 /* GRU instruction attributes for all instructions */ 110 #define IMA IMA_CB_DELAY 111 112 /* GRU cacheline size is always 64 bytes - even on arches with 128 byte lines */ 113 #define __gru_cacheline_aligned__ \ 114 __attribute__((__aligned__(GRU_CACHE_LINE_BYTES))) 115 116 #define MAGIC 0x1234567887654321UL 117 118 /* Default retry count for GRU errors on kernel instructions */ 119 #define EXCEPTION_RETRY_LIMIT 3 120 121 /* Status of message queue sections */ 122 #define MQS_EMPTY 0 123 #define MQS_FULL 1 124 #define MQS_NOOP 2 125 126 /*----------------- RESOURCE MANAGEMENT -------------------------------------*/ 127 /* optimized for x86_64 */ 128 struct message_queue { 129 union gru_mesqhead head __gru_cacheline_aligned__; /* CL 0 */ 130 int qlines; /* DW 1 */ 131 long hstatus[2]; 132 void *next __gru_cacheline_aligned__;/* CL 1 */ 133 void *limit; 134 void *start; 135 void *start2; 136 char data ____cacheline_aligned; /* CL 2 */ 137 }; 138 139 /* First word in every message - used by mesq interface */ 140 struct message_header { 141 char present; 142 char present2; 143 char lines; 144 char fill; 145 }; 146 147 #define HSTATUS(mq, h) ((mq) + offsetof(struct message_queue, hstatus[h])) 148 149 /* 150 * Reload the blade's kernel context into a GRU chiplet. Called holding 151 * the bs_kgts_sema for READ. Will steal user contexts if necessary. 152 */ 153 static void gru_load_kernel_context(struct gru_blade_state *bs, int blade_id) 154 { 155 struct gru_state *gru; 156 struct gru_thread_state *kgts; 157 void *vaddr; 158 int ctxnum, ncpus; 159 160 up_read(&bs->bs_kgts_sema); 161 down_write(&bs->bs_kgts_sema); 162 163 if (!bs->bs_kgts) { 164 bs->bs_kgts = gru_alloc_gts(NULL, 0, 0, 0, 0); 165 bs->bs_kgts->ts_user_blade_id = blade_id; 166 } 167 kgts = bs->bs_kgts; 168 169 if (!kgts->ts_gru) { 170 STAT(load_kernel_context); 171 ncpus = uv_blade_nr_possible_cpus(blade_id); 172 kgts->ts_cbr_au_count = GRU_CB_COUNT_TO_AU( 173 GRU_NUM_KERNEL_CBR * ncpus + bs->bs_async_cbrs); 174 kgts->ts_dsr_au_count = GRU_DS_BYTES_TO_AU( 175 GRU_NUM_KERNEL_DSR_BYTES * ncpus + 176 bs->bs_async_dsr_bytes); 177 while (!gru_assign_gru_context(kgts)) { 178 msleep(1); 179 gru_steal_context(kgts); 180 } 181 gru_load_context(kgts); 182 gru = bs->bs_kgts->ts_gru; 183 vaddr = gru->gs_gru_base_vaddr; 184 ctxnum = kgts->ts_ctxnum; 185 bs->kernel_cb = get_gseg_base_address_cb(vaddr, ctxnum, 0); 186 bs->kernel_dsr = get_gseg_base_address_ds(vaddr, ctxnum, 0); 187 } 188 downgrade_write(&bs->bs_kgts_sema); 189 } 190 191 /* 192 * Free all kernel contexts that are not currently in use. 193 * Returns 0 if all freed, else number of inuse context. 194 */ 195 static int gru_free_kernel_contexts(void) 196 { 197 struct gru_blade_state *bs; 198 struct gru_thread_state *kgts; 199 int bid, ret = 0; 200 201 for (bid = 0; bid < GRU_MAX_BLADES; bid++) { 202 bs = gru_base[bid]; 203 if (!bs) 204 continue; 205 206 /* Ignore busy contexts. Don't want to block here. */ 207 if (down_write_trylock(&bs->bs_kgts_sema)) { 208 kgts = bs->bs_kgts; 209 if (kgts && kgts->ts_gru) 210 gru_unload_context(kgts, 0); 211 bs->bs_kgts = NULL; 212 up_write(&bs->bs_kgts_sema); 213 kfree(kgts); 214 } else { 215 ret++; 216 } 217 } 218 return ret; 219 } 220 221 /* 222 * Lock & load the kernel context for the specified blade. 223 */ 224 static struct gru_blade_state *gru_lock_kernel_context(int blade_id) 225 { 226 struct gru_blade_state *bs; 227 228 STAT(lock_kernel_context); 229 bs = gru_base[blade_id]; 230 231 down_read(&bs->bs_kgts_sema); 232 if (!bs->bs_kgts || !bs->bs_kgts->ts_gru) 233 gru_load_kernel_context(bs, blade_id); 234 return bs; 235 236 } 237 238 /* 239 * Unlock the kernel context for the specified blade. Context is not 240 * unloaded but may be stolen before next use. 241 */ 242 static void gru_unlock_kernel_context(int blade_id) 243 { 244 struct gru_blade_state *bs; 245 246 bs = gru_base[blade_id]; 247 up_read(&bs->bs_kgts_sema); 248 STAT(unlock_kernel_context); 249 } 250 251 /* 252 * Reserve & get pointers to the DSR/CBRs reserved for the current cpu. 253 * - returns with preemption disabled 254 */ 255 static int gru_get_cpu_resources(int dsr_bytes, void **cb, void **dsr) 256 { 257 struct gru_blade_state *bs; 258 int lcpu; 259 260 BUG_ON(dsr_bytes > GRU_NUM_KERNEL_DSR_BYTES); 261 preempt_disable(); 262 bs = gru_lock_kernel_context(uv_numa_blade_id()); 263 lcpu = uv_blade_processor_id(); 264 *cb = bs->kernel_cb + lcpu * GRU_HANDLE_STRIDE; 265 *dsr = bs->kernel_dsr + lcpu * GRU_NUM_KERNEL_DSR_BYTES; 266 return 0; 267 } 268 269 /* 270 * Free the current cpus reserved DSR/CBR resources. 271 */ 272 static void gru_free_cpu_resources(void *cb, void *dsr) 273 { 274 gru_unlock_kernel_context(uv_numa_blade_id()); 275 preempt_enable(); 276 } 277 278 /* 279 * Reserve GRU resources to be used asynchronously. 280 * Note: currently supports only 1 reservation per blade. 281 * 282 * input: 283 * blade_id - blade on which resources should be reserved 284 * cbrs - number of CBRs 285 * dsr_bytes - number of DSR bytes needed 286 * output: 287 * handle to identify resource 288 * (0 = async resources already reserved) 289 */ 290 unsigned long gru_reserve_async_resources(int blade_id, int cbrs, int dsr_bytes, 291 struct completion *cmp) 292 { 293 struct gru_blade_state *bs; 294 struct gru_thread_state *kgts; 295 int ret = 0; 296 297 bs = gru_base[blade_id]; 298 299 down_write(&bs->bs_kgts_sema); 300 301 /* Verify no resources already reserved */ 302 if (bs->bs_async_dsr_bytes + bs->bs_async_cbrs) 303 goto done; 304 bs->bs_async_dsr_bytes = dsr_bytes; 305 bs->bs_async_cbrs = cbrs; 306 bs->bs_async_wq = cmp; 307 kgts = bs->bs_kgts; 308 309 /* Resources changed. Unload context if already loaded */ 310 if (kgts && kgts->ts_gru) 311 gru_unload_context(kgts, 0); 312 ret = ASYNC_BID_TO_HAN(blade_id); 313 314 done: 315 up_write(&bs->bs_kgts_sema); 316 return ret; 317 } 318 319 /* 320 * Release async resources previously reserved. 321 * 322 * input: 323 * han - handle to identify resources 324 */ 325 void gru_release_async_resources(unsigned long han) 326 { 327 struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han); 328 329 down_write(&bs->bs_kgts_sema); 330 bs->bs_async_dsr_bytes = 0; 331 bs->bs_async_cbrs = 0; 332 bs->bs_async_wq = NULL; 333 up_write(&bs->bs_kgts_sema); 334 } 335 336 /* 337 * Wait for async GRU instructions to complete. 338 * 339 * input: 340 * han - handle to identify resources 341 */ 342 void gru_wait_async_cbr(unsigned long han) 343 { 344 struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han); 345 346 wait_for_completion(bs->bs_async_wq); 347 mb(); 348 } 349 350 /* 351 * Lock previous reserved async GRU resources 352 * 353 * input: 354 * han - handle to identify resources 355 * output: 356 * cb - pointer to first CBR 357 * dsr - pointer to first DSR 358 */ 359 void gru_lock_async_resource(unsigned long han, void **cb, void **dsr) 360 { 361 struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han); 362 int blade_id = ASYNC_HAN_TO_BID(han); 363 int ncpus; 364 365 gru_lock_kernel_context(blade_id); 366 ncpus = uv_blade_nr_possible_cpus(blade_id); 367 if (cb) 368 *cb = bs->kernel_cb + ncpus * GRU_HANDLE_STRIDE; 369 if (dsr) 370 *dsr = bs->kernel_dsr + ncpus * GRU_NUM_KERNEL_DSR_BYTES; 371 } 372 373 /* 374 * Unlock previous reserved async GRU resources 375 * 376 * input: 377 * han - handle to identify resources 378 */ 379 void gru_unlock_async_resource(unsigned long han) 380 { 381 int blade_id = ASYNC_HAN_TO_BID(han); 382 383 gru_unlock_kernel_context(blade_id); 384 } 385 386 /*----------------------------------------------------------------------*/ 387 int gru_get_cb_exception_detail(void *cb, 388 struct control_block_extended_exc_detail *excdet) 389 { 390 struct gru_control_block_extended *cbe; 391 struct gru_blade_state *bs; 392 int cbrnum; 393 394 bs = KCB_TO_BS(cb); 395 cbrnum = thread_cbr_number(bs->bs_kgts, get_cb_number(cb)); 396 cbe = get_cbe(GRUBASE(cb), cbrnum); 397 gru_flush_cache(cbe); /* CBE not coherent */ 398 excdet->opc = cbe->opccpy; 399 excdet->exopc = cbe->exopccpy; 400 excdet->ecause = cbe->ecause; 401 excdet->exceptdet0 = cbe->idef1upd; 402 excdet->exceptdet1 = cbe->idef3upd; 403 gru_flush_cache(cbe); 404 return 0; 405 } 406 407 char *gru_get_cb_exception_detail_str(int ret, void *cb, 408 char *buf, int size) 409 { 410 struct gru_control_block_status *gen = (void *)cb; 411 struct control_block_extended_exc_detail excdet; 412 413 if (ret > 0 && gen->istatus == CBS_EXCEPTION) { 414 gru_get_cb_exception_detail(cb, &excdet); 415 snprintf(buf, size, 416 "GRU exception: cb %p, opc %d, exopc %d, ecause 0x%x," 417 "excdet0 0x%lx, excdet1 0x%x", 418 gen, excdet.opc, excdet.exopc, excdet.ecause, 419 excdet.exceptdet0, excdet.exceptdet1); 420 } else { 421 snprintf(buf, size, "No exception"); 422 } 423 return buf; 424 } 425 426 static int gru_wait_idle_or_exception(struct gru_control_block_status *gen) 427 { 428 while (gen->istatus >= CBS_ACTIVE) { 429 cpu_relax(); 430 barrier(); 431 } 432 return gen->istatus; 433 } 434 435 static int gru_retry_exception(void *cb) 436 { 437 struct gru_control_block_status *gen = (void *)cb; 438 struct control_block_extended_exc_detail excdet; 439 int retry = EXCEPTION_RETRY_LIMIT; 440 441 while (1) { 442 if (gru_wait_idle_or_exception(gen) == CBS_IDLE) 443 return CBS_IDLE; 444 if (gru_get_cb_message_queue_substatus(cb)) 445 return CBS_EXCEPTION; 446 gru_get_cb_exception_detail(cb, &excdet); 447 if ((excdet.ecause & ~EXCEPTION_RETRY_BITS) || 448 (excdet.cbrexecstatus & CBR_EXS_ABORT_OCC)) 449 break; 450 if (retry-- == 0) 451 break; 452 gen->icmd = 1; 453 gru_flush_cache(gen); 454 } 455 return CBS_EXCEPTION; 456 } 457 458 int gru_check_status_proc(void *cb) 459 { 460 struct gru_control_block_status *gen = (void *)cb; 461 int ret; 462 463 ret = gen->istatus; 464 if (ret != CBS_EXCEPTION) 465 return ret; 466 return gru_retry_exception(cb); 467 468 } 469 470 int gru_wait_proc(void *cb) 471 { 472 struct gru_control_block_status *gen = (void *)cb; 473 int ret; 474 475 ret = gru_wait_idle_or_exception(gen); 476 if (ret == CBS_EXCEPTION) 477 ret = gru_retry_exception(cb); 478 479 return ret; 480 } 481 482 void gru_abort(int ret, void *cb, char *str) 483 { 484 char buf[GRU_EXC_STR_SIZE]; 485 486 panic("GRU FATAL ERROR: %s - %s\n", str, 487 gru_get_cb_exception_detail_str(ret, cb, buf, sizeof(buf))); 488 } 489 490 void gru_wait_abort_proc(void *cb) 491 { 492 int ret; 493 494 ret = gru_wait_proc(cb); 495 if (ret) 496 gru_abort(ret, cb, "gru_wait_abort"); 497 } 498 499 500 /*------------------------------ MESSAGE QUEUES -----------------------------*/ 501 502 /* Internal status . These are NOT returned to the user. */ 503 #define MQIE_AGAIN -1 /* try again */ 504 505 506 /* 507 * Save/restore the "present" flag that is in the second line of 2-line 508 * messages 509 */ 510 static inline int get_present2(void *p) 511 { 512 struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES; 513 return mhdr->present; 514 } 515 516 static inline void restore_present2(void *p, int val) 517 { 518 struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES; 519 mhdr->present = val; 520 } 521 522 /* 523 * Create a message queue. 524 * qlines - message queue size in cache lines. Includes 2-line header. 525 */ 526 int gru_create_message_queue(struct gru_message_queue_desc *mqd, 527 void *p, unsigned int bytes, int nasid, int vector, int apicid) 528 { 529 struct message_queue *mq = p; 530 unsigned int qlines; 531 532 qlines = bytes / GRU_CACHE_LINE_BYTES - 2; 533 memset(mq, 0, bytes); 534 mq->start = &mq->data; 535 mq->start2 = &mq->data + (qlines / 2 - 1) * GRU_CACHE_LINE_BYTES; 536 mq->next = &mq->data; 537 mq->limit = &mq->data + (qlines - 2) * GRU_CACHE_LINE_BYTES; 538 mq->qlines = qlines; 539 mq->hstatus[0] = 0; 540 mq->hstatus[1] = 1; 541 mq->head = gru_mesq_head(2, qlines / 2 + 1); 542 mqd->mq = mq; 543 mqd->mq_gpa = uv_gpa(mq); 544 mqd->qlines = qlines; 545 mqd->interrupt_pnode = UV_NASID_TO_PNODE(nasid); 546 mqd->interrupt_vector = vector; 547 mqd->interrupt_apicid = apicid; 548 return 0; 549 } 550 EXPORT_SYMBOL_GPL(gru_create_message_queue); 551 552 /* 553 * Send a NOOP message to a message queue 554 * Returns: 555 * 0 - if queue is full after the send. This is the normal case 556 * but various races can change this. 557 * -1 - if mesq sent successfully but queue not full 558 * >0 - unexpected error. MQE_xxx returned 559 */ 560 static int send_noop_message(void *cb, struct gru_message_queue_desc *mqd, 561 void *mesg) 562 { 563 const struct message_header noop_header = { 564 .present = MQS_NOOP, .lines = 1}; 565 unsigned long m; 566 int substatus, ret; 567 struct message_header save_mhdr, *mhdr = mesg; 568 569 STAT(mesq_noop); 570 save_mhdr = *mhdr; 571 *mhdr = noop_header; 572 gru_mesq(cb, mqd->mq_gpa, gru_get_tri(mhdr), 1, IMA); 573 ret = gru_wait(cb); 574 575 if (ret) { 576 substatus = gru_get_cb_message_queue_substatus(cb); 577 switch (substatus) { 578 case CBSS_NO_ERROR: 579 STAT(mesq_noop_unexpected_error); 580 ret = MQE_UNEXPECTED_CB_ERR; 581 break; 582 case CBSS_LB_OVERFLOWED: 583 STAT(mesq_noop_lb_overflow); 584 ret = MQE_CONGESTION; 585 break; 586 case CBSS_QLIMIT_REACHED: 587 STAT(mesq_noop_qlimit_reached); 588 ret = 0; 589 break; 590 case CBSS_AMO_NACKED: 591 STAT(mesq_noop_amo_nacked); 592 ret = MQE_CONGESTION; 593 break; 594 case CBSS_PUT_NACKED: 595 STAT(mesq_noop_put_nacked); 596 m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6); 597 gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, 1, 1, 598 IMA); 599 if (gru_wait(cb) == CBS_IDLE) 600 ret = MQIE_AGAIN; 601 else 602 ret = MQE_UNEXPECTED_CB_ERR; 603 break; 604 case CBSS_PAGE_OVERFLOW: 605 default: 606 BUG(); 607 } 608 } 609 *mhdr = save_mhdr; 610 return ret; 611 } 612 613 /* 614 * Handle a gru_mesq full. 615 */ 616 static int send_message_queue_full(void *cb, struct gru_message_queue_desc *mqd, 617 void *mesg, int lines) 618 { 619 union gru_mesqhead mqh; 620 unsigned int limit, head; 621 unsigned long avalue; 622 int half, qlines; 623 624 /* Determine if switching to first/second half of q */ 625 avalue = gru_get_amo_value(cb); 626 head = gru_get_amo_value_head(cb); 627 limit = gru_get_amo_value_limit(cb); 628 629 qlines = mqd->qlines; 630 half = (limit != qlines); 631 632 if (half) 633 mqh = gru_mesq_head(qlines / 2 + 1, qlines); 634 else 635 mqh = gru_mesq_head(2, qlines / 2 + 1); 636 637 /* Try to get lock for switching head pointer */ 638 gru_gamir(cb, EOP_IR_CLR, HSTATUS(mqd->mq_gpa, half), XTYPE_DW, IMA); 639 if (gru_wait(cb) != CBS_IDLE) 640 goto cberr; 641 if (!gru_get_amo_value(cb)) { 642 STAT(mesq_qf_locked); 643 return MQE_QUEUE_FULL; 644 } 645 646 /* Got the lock. Send optional NOP if queue not full, */ 647 if (head != limit) { 648 if (send_noop_message(cb, mqd, mesg)) { 649 gru_gamir(cb, EOP_IR_INC, HSTATUS(mqd->mq_gpa, half), 650 XTYPE_DW, IMA); 651 if (gru_wait(cb) != CBS_IDLE) 652 goto cberr; 653 STAT(mesq_qf_noop_not_full); 654 return MQIE_AGAIN; 655 } 656 avalue++; 657 } 658 659 /* Then flip queuehead to other half of queue. */ 660 gru_gamer(cb, EOP_ERR_CSWAP, mqd->mq_gpa, XTYPE_DW, mqh.val, avalue, 661 IMA); 662 if (gru_wait(cb) != CBS_IDLE) 663 goto cberr; 664 665 /* If not successfully in swapping queue head, clear the hstatus lock */ 666 if (gru_get_amo_value(cb) != avalue) { 667 STAT(mesq_qf_switch_head_failed); 668 gru_gamir(cb, EOP_IR_INC, HSTATUS(mqd->mq_gpa, half), XTYPE_DW, 669 IMA); 670 if (gru_wait(cb) != CBS_IDLE) 671 goto cberr; 672 } 673 return MQIE_AGAIN; 674 cberr: 675 STAT(mesq_qf_unexpected_error); 676 return MQE_UNEXPECTED_CB_ERR; 677 } 678 679 /* 680 * Send a cross-partition interrupt to the SSI that contains the target 681 * message queue. Normally, the interrupt is automatically delivered by hardware 682 * but some error conditions require explicit delivery. 683 */ 684 static void send_message_queue_interrupt(struct gru_message_queue_desc *mqd) 685 { 686 if (mqd->interrupt_vector) 687 uv_hub_send_ipi(mqd->interrupt_pnode, mqd->interrupt_apicid, 688 mqd->interrupt_vector); 689 } 690 691 /* 692 * Handle a PUT failure. Note: if message was a 2-line message, one of the 693 * lines might have successfully have been written. Before sending the 694 * message, "present" must be cleared in BOTH lines to prevent the receiver 695 * from prematurely seeing the full message. 696 */ 697 static int send_message_put_nacked(void *cb, struct gru_message_queue_desc *mqd, 698 void *mesg, int lines) 699 { 700 unsigned long m; 701 702 m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6); 703 if (lines == 2) { 704 gru_vset(cb, m, 0, XTYPE_CL, lines, 1, IMA); 705 if (gru_wait(cb) != CBS_IDLE) 706 return MQE_UNEXPECTED_CB_ERR; 707 } 708 gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, lines, 1, IMA); 709 if (gru_wait(cb) != CBS_IDLE) 710 return MQE_UNEXPECTED_CB_ERR; 711 send_message_queue_interrupt(mqd); 712 return MQE_OK; 713 } 714 715 /* 716 * Handle a gru_mesq failure. Some of these failures are software recoverable 717 * or retryable. 718 */ 719 static int send_message_failure(void *cb, struct gru_message_queue_desc *mqd, 720 void *mesg, int lines) 721 { 722 int substatus, ret = 0; 723 724 substatus = gru_get_cb_message_queue_substatus(cb); 725 switch (substatus) { 726 case CBSS_NO_ERROR: 727 STAT(mesq_send_unexpected_error); 728 ret = MQE_UNEXPECTED_CB_ERR; 729 break; 730 case CBSS_LB_OVERFLOWED: 731 STAT(mesq_send_lb_overflow); 732 ret = MQE_CONGESTION; 733 break; 734 case CBSS_QLIMIT_REACHED: 735 STAT(mesq_send_qlimit_reached); 736 ret = send_message_queue_full(cb, mqd, mesg, lines); 737 break; 738 case CBSS_AMO_NACKED: 739 STAT(mesq_send_amo_nacked); 740 ret = MQE_CONGESTION; 741 break; 742 case CBSS_PUT_NACKED: 743 STAT(mesq_send_put_nacked); 744 ret = send_message_put_nacked(cb, mqd, mesg, lines); 745 break; 746 default: 747 BUG(); 748 } 749 return ret; 750 } 751 752 /* 753 * Send a message to a message queue 754 * mqd message queue descriptor 755 * mesg message. ust be vaddr within a GSEG 756 * bytes message size (<= 2 CL) 757 */ 758 int gru_send_message_gpa(struct gru_message_queue_desc *mqd, void *mesg, 759 unsigned int bytes) 760 { 761 struct message_header *mhdr; 762 void *cb; 763 void *dsr; 764 int istatus, clines, ret; 765 766 STAT(mesq_send); 767 BUG_ON(bytes < sizeof(int) || bytes > 2 * GRU_CACHE_LINE_BYTES); 768 769 clines = DIV_ROUND_UP(bytes, GRU_CACHE_LINE_BYTES); 770 if (gru_get_cpu_resources(bytes, &cb, &dsr)) 771 return MQE_BUG_NO_RESOURCES; 772 memcpy(dsr, mesg, bytes); 773 mhdr = dsr; 774 mhdr->present = MQS_FULL; 775 mhdr->lines = clines; 776 if (clines == 2) { 777 mhdr->present2 = get_present2(mhdr); 778 restore_present2(mhdr, MQS_FULL); 779 } 780 781 do { 782 ret = MQE_OK; 783 gru_mesq(cb, mqd->mq_gpa, gru_get_tri(mhdr), clines, IMA); 784 istatus = gru_wait(cb); 785 if (istatus != CBS_IDLE) 786 ret = send_message_failure(cb, mqd, dsr, clines); 787 } while (ret == MQIE_AGAIN); 788 gru_free_cpu_resources(cb, dsr); 789 790 if (ret) 791 STAT(mesq_send_failed); 792 return ret; 793 } 794 EXPORT_SYMBOL_GPL(gru_send_message_gpa); 795 796 /* 797 * Advance the receive pointer for the queue to the next message. 798 */ 799 void gru_free_message(struct gru_message_queue_desc *mqd, void *mesg) 800 { 801 struct message_queue *mq = mqd->mq; 802 struct message_header *mhdr = mq->next; 803 void *next, *pnext; 804 int half = -1; 805 int lines = mhdr->lines; 806 807 if (lines == 2) 808 restore_present2(mhdr, MQS_EMPTY); 809 mhdr->present = MQS_EMPTY; 810 811 pnext = mq->next; 812 next = pnext + GRU_CACHE_LINE_BYTES * lines; 813 if (next == mq->limit) { 814 next = mq->start; 815 half = 1; 816 } else if (pnext < mq->start2 && next >= mq->start2) { 817 half = 0; 818 } 819 820 if (half >= 0) 821 mq->hstatus[half] = 1; 822 mq->next = next; 823 } 824 EXPORT_SYMBOL_GPL(gru_free_message); 825 826 /* 827 * Get next message from message queue. Return NULL if no message 828 * present. User must call next_message() to move to next message. 829 * rmq message queue 830 */ 831 void *gru_get_next_message(struct gru_message_queue_desc *mqd) 832 { 833 struct message_queue *mq = mqd->mq; 834 struct message_header *mhdr = mq->next; 835 int present = mhdr->present; 836 837 /* skip NOOP messages */ 838 STAT(mesq_receive); 839 while (present == MQS_NOOP) { 840 gru_free_message(mqd, mhdr); 841 mhdr = mq->next; 842 present = mhdr->present; 843 } 844 845 /* Wait for both halves of 2 line messages */ 846 if (present == MQS_FULL && mhdr->lines == 2 && 847 get_present2(mhdr) == MQS_EMPTY) 848 present = MQS_EMPTY; 849 850 if (!present) { 851 STAT(mesq_receive_none); 852 return NULL; 853 } 854 855 if (mhdr->lines == 2) 856 restore_present2(mhdr, mhdr->present2); 857 858 return mhdr; 859 } 860 EXPORT_SYMBOL_GPL(gru_get_next_message); 861 862 /* ---------------------- GRU DATA COPY FUNCTIONS ---------------------------*/ 863 864 /* 865 * Load a DW from a global GPA. The GPA can be a memory or MMR address. 866 */ 867 int gru_read_gpa(unsigned long *value, unsigned long gpa) 868 { 869 void *cb; 870 void *dsr; 871 int ret, iaa; 872 873 STAT(read_gpa); 874 if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES, &cb, &dsr)) 875 return MQE_BUG_NO_RESOURCES; 876 iaa = gpa >> 62; 877 gru_vload_phys(cb, gpa, gru_get_tri(dsr), iaa, IMA); 878 ret = gru_wait(cb); 879 if (ret == CBS_IDLE) 880 *value = *(unsigned long *)dsr; 881 gru_free_cpu_resources(cb, dsr); 882 return ret; 883 } 884 EXPORT_SYMBOL_GPL(gru_read_gpa); 885 886 887 /* 888 * Copy a block of data using the GRU resources 889 */ 890 int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa, 891 unsigned int bytes) 892 { 893 void *cb; 894 void *dsr; 895 int ret; 896 897 STAT(copy_gpa); 898 if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES, &cb, &dsr)) 899 return MQE_BUG_NO_RESOURCES; 900 gru_bcopy(cb, src_gpa, dest_gpa, gru_get_tri(dsr), 901 XTYPE_B, bytes, GRU_NUM_KERNEL_DSR_CL, IMA); 902 ret = gru_wait(cb); 903 gru_free_cpu_resources(cb, dsr); 904 return ret; 905 } 906 EXPORT_SYMBOL_GPL(gru_copy_gpa); 907 908 /* ------------------- KERNEL QUICKTESTS RUN AT STARTUP ----------------*/ 909 /* Temp - will delete after we gain confidence in the GRU */ 910 911 static int quicktest0(unsigned long arg) 912 { 913 unsigned long word0; 914 unsigned long word1; 915 void *cb; 916 void *dsr; 917 unsigned long *p; 918 int ret = -EIO; 919 920 if (gru_get_cpu_resources(GRU_CACHE_LINE_BYTES, &cb, &dsr)) 921 return MQE_BUG_NO_RESOURCES; 922 p = dsr; 923 word0 = MAGIC; 924 word1 = 0; 925 926 gru_vload(cb, uv_gpa(&word0), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA); 927 if (gru_wait(cb) != CBS_IDLE) { 928 printk(KERN_DEBUG "GRU quicktest0: CBR failure 1\n"); 929 goto done; 930 } 931 932 if (*p != MAGIC) { 933 printk(KERN_DEBUG "GRU: quicktest0 bad magic 0x%lx\n", *p); 934 goto done; 935 } 936 gru_vstore(cb, uv_gpa(&word1), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA); 937 if (gru_wait(cb) != CBS_IDLE) { 938 printk(KERN_DEBUG "GRU quicktest0: CBR failure 2\n"); 939 goto done; 940 } 941 942 if (word0 != word1 || word1 != MAGIC) { 943 printk(KERN_DEBUG 944 "GRU quicktest0 err: found 0x%lx, expected 0x%lx\n", 945 word1, MAGIC); 946 goto done; 947 } 948 ret = 0; 949 950 done: 951 gru_free_cpu_resources(cb, dsr); 952 return ret; 953 } 954 955 #define ALIGNUP(p, q) ((void *)(((unsigned long)(p) + (q) - 1) & ~(q - 1))) 956 957 static int quicktest1(unsigned long arg) 958 { 959 struct gru_message_queue_desc mqd; 960 void *p, *mq; 961 unsigned long *dw; 962 int i, ret = -EIO; 963 char mes[GRU_CACHE_LINE_BYTES], *m; 964 965 /* Need 1K cacheline aligned that does not cross page boundary */ 966 p = kmalloc(4096, 0); 967 if (p == NULL) 968 return -ENOMEM; 969 mq = ALIGNUP(p, 1024); 970 memset(mes, 0xee, sizeof(mes)); 971 dw = mq; 972 973 gru_create_message_queue(&mqd, mq, 8 * GRU_CACHE_LINE_BYTES, 0, 0, 0); 974 for (i = 0; i < 6; i++) { 975 mes[8] = i; 976 do { 977 ret = gru_send_message_gpa(&mqd, mes, sizeof(mes)); 978 } while (ret == MQE_CONGESTION); 979 if (ret) 980 break; 981 } 982 if (ret != MQE_QUEUE_FULL || i != 4) 983 goto done; 984 985 for (i = 0; i < 6; i++) { 986 m = gru_get_next_message(&mqd); 987 if (!m || m[8] != i) 988 break; 989 gru_free_message(&mqd, m); 990 } 991 ret = (i == 4) ? 0 : -EIO; 992 993 done: 994 kfree(p); 995 return ret; 996 } 997 998 static int quicktest2(unsigned long arg) 999 { 1000 static DECLARE_COMPLETION(cmp); 1001 unsigned long han; 1002 int blade_id = 0; 1003 int numcb = 4; 1004 int ret = 0; 1005 unsigned long *buf; 1006 void *cb0, *cb; 1007 struct gru_control_block_status *gen; 1008 int i, k, istatus, bytes; 1009 1010 bytes = numcb * 4 * 8; 1011 buf = kmalloc(bytes, GFP_KERNEL); 1012 if (!buf) 1013 return -ENOMEM; 1014 1015 ret = -EBUSY; 1016 han = gru_reserve_async_resources(blade_id, numcb, 0, &cmp); 1017 if (!han) 1018 goto done; 1019 1020 gru_lock_async_resource(han, &cb0, NULL); 1021 memset(buf, 0xee, bytes); 1022 for (i = 0; i < numcb; i++) 1023 gru_vset(cb0 + i * GRU_HANDLE_STRIDE, uv_gpa(&buf[i * 4]), 0, 1024 XTYPE_DW, 4, 1, IMA_INTERRUPT); 1025 1026 ret = 0; 1027 k = numcb; 1028 do { 1029 gru_wait_async_cbr(han); 1030 for (i = 0; i < numcb; i++) { 1031 cb = cb0 + i * GRU_HANDLE_STRIDE; 1032 istatus = gru_check_status(cb); 1033 if (istatus != CBS_ACTIVE && istatus != CBS_CALL_OS) 1034 break; 1035 } 1036 if (i == numcb) 1037 continue; 1038 if (istatus != CBS_IDLE) { 1039 printk(KERN_DEBUG "GRU:%d quicktest2: cb %d, exception\n", smp_processor_id(), i); 1040 ret = -EFAULT; 1041 } else if (buf[4 * i] || buf[4 * i + 1] || buf[4 * i + 2] || 1042 buf[4 * i + 3]) { 1043 printk(KERN_DEBUG "GRU:%d quicktest2:cb %d, buf 0x%lx, 0x%lx, 0x%lx, 0x%lx\n", 1044 smp_processor_id(), i, buf[4 * i], buf[4 * i + 1], buf[4 * i + 2], buf[4 * i + 3]); 1045 ret = -EIO; 1046 } 1047 k--; 1048 gen = cb; 1049 gen->istatus = CBS_CALL_OS; /* don't handle this CBR again */ 1050 } while (k); 1051 BUG_ON(cmp.done); 1052 1053 gru_unlock_async_resource(han); 1054 gru_release_async_resources(han); 1055 done: 1056 kfree(buf); 1057 return ret; 1058 } 1059 1060 #define BUFSIZE 200 1061 static int quicktest3(unsigned long arg) 1062 { 1063 char buf1[BUFSIZE], buf2[BUFSIZE]; 1064 int ret = 0; 1065 1066 memset(buf2, 0, sizeof(buf2)); 1067 memset(buf1, get_cycles() & 255, sizeof(buf1)); 1068 gru_copy_gpa(uv_gpa(buf2), uv_gpa(buf1), BUFSIZE); 1069 if (memcmp(buf1, buf2, BUFSIZE)) { 1070 printk(KERN_DEBUG "GRU quicktest3 error\n"); 1071 ret = -EIO; 1072 } 1073 return ret; 1074 } 1075 1076 /* 1077 * Debugging only. User hook for various kernel tests 1078 * of driver & gru. 1079 */ 1080 int gru_ktest(unsigned long arg) 1081 { 1082 int ret = -EINVAL; 1083 1084 switch (arg & 0xff) { 1085 case 0: 1086 ret = quicktest0(arg); 1087 break; 1088 case 1: 1089 ret = quicktest1(arg); 1090 break; 1091 case 2: 1092 ret = quicktest2(arg); 1093 break; 1094 case 3: 1095 ret = quicktest3(arg); 1096 break; 1097 case 99: 1098 ret = gru_free_kernel_contexts(); 1099 break; 1100 } 1101 return ret; 1102 1103 } 1104 1105 int gru_kservices_init(void) 1106 { 1107 return 0; 1108 } 1109 1110 void gru_kservices_exit(void) 1111 { 1112 if (gru_free_kernel_contexts()) 1113 BUG(); 1114 } 1115 1116