1 /* 2 * SN Platform GRU Driver 3 * 4 * KERNEL SERVICES THAT USE THE GRU 5 * 6 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. 7 * 8 * This program is free software; you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License as published by 10 * the Free Software Foundation; either version 2 of the License, or 11 * (at your option) any later version. 12 * 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU General Public License for more details. 17 * 18 * You should have received a copy of the GNU General Public License 19 * along with this program; if not, write to the Free Software 20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 21 */ 22 23 #include <linux/kernel.h> 24 #include <linux/errno.h> 25 #include <linux/slab.h> 26 #include <linux/mm.h> 27 #include <linux/spinlock.h> 28 #include <linux/device.h> 29 #include <linux/miscdevice.h> 30 #include <linux/proc_fs.h> 31 #include <linux/interrupt.h> 32 #include <linux/uaccess.h> 33 #include <linux/delay.h> 34 #include <linux/export.h> 35 #include <asm/io_apic.h> 36 #include "gru.h" 37 #include "grulib.h" 38 #include "grutables.h" 39 #include "grukservices.h" 40 #include "gru_instructions.h" 41 #include <asm/uv/uv_hub.h> 42 43 /* 44 * Kernel GRU Usage 45 * 46 * The following is an interim algorithm for management of kernel GRU 47 * resources. This will likely be replaced when we better understand the 48 * kernel/user requirements. 49 * 50 * Blade percpu resources reserved for kernel use. These resources are 51 * reserved whenever the the kernel context for the blade is loaded. Note 52 * that the kernel context is not guaranteed to be always available. It is 53 * loaded on demand & can be stolen by a user if the user demand exceeds the 54 * kernel demand. The kernel can always reload the kernel context but 55 * a SLEEP may be required!!!. 56 * 57 * Async Overview: 58 * 59 * Each blade has one "kernel context" that owns GRU kernel resources 60 * located on the blade. Kernel drivers use GRU resources in this context 61 * for sending messages, zeroing memory, etc. 62 * 63 * The kernel context is dynamically loaded on demand. If it is not in 64 * use by the kernel, the kernel context can be unloaded & given to a user. 65 * The kernel context will be reloaded when needed. This may require that 66 * a context be stolen from a user. 67 * NOTE: frequent unloading/reloading of the kernel context is 68 * expensive. We are depending on batch schedulers, cpusets, sane 69 * drivers or some other mechanism to prevent the need for frequent 70 * stealing/reloading. 71 * 72 * The kernel context consists of two parts: 73 * - 1 CB & a few DSRs that are reserved for each cpu on the blade. 74 * Each cpu has it's own private resources & does not share them 75 * with other cpus. These resources are used serially, ie, 76 * locked, used & unlocked on each call to a function in 77 * grukservices. 78 * (Now that we have dynamic loading of kernel contexts, I 79 * may rethink this & allow sharing between cpus....) 80 * 81 * - Additional resources can be reserved long term & used directly 82 * by UV drivers located in the kernel. Drivers using these GRU 83 * resources can use asynchronous GRU instructions that send 84 * interrupts on completion. 85 * - these resources must be explicitly locked/unlocked 86 * - locked resources prevent (obviously) the kernel 87 * context from being unloaded. 88 * - drivers using these resource directly issue their own 89 * GRU instruction and must wait/check completion. 90 * 91 * When these resources are reserved, the caller can optionally 92 * associate a wait_queue with the resources and use asynchronous 93 * GRU instructions. When an async GRU instruction completes, the 94 * driver will do a wakeup on the event. 95 * 96 */ 97 98 99 #define ASYNC_HAN_TO_BID(h) ((h) - 1) 100 #define ASYNC_BID_TO_HAN(b) ((b) + 1) 101 #define ASYNC_HAN_TO_BS(h) gru_base[ASYNC_HAN_TO_BID(h)] 102 103 #define GRU_NUM_KERNEL_CBR 1 104 #define GRU_NUM_KERNEL_DSR_BYTES 256 105 #define GRU_NUM_KERNEL_DSR_CL (GRU_NUM_KERNEL_DSR_BYTES / \ 106 GRU_CACHE_LINE_BYTES) 107 108 /* GRU instruction attributes for all instructions */ 109 #define IMA IMA_CB_DELAY 110 111 /* GRU cacheline size is always 64 bytes - even on arches with 128 byte lines */ 112 #define __gru_cacheline_aligned__ \ 113 __attribute__((__aligned__(GRU_CACHE_LINE_BYTES))) 114 115 #define MAGIC 0x1234567887654321UL 116 117 /* Default retry count for GRU errors on kernel instructions */ 118 #define EXCEPTION_RETRY_LIMIT 3 119 120 /* Status of message queue sections */ 121 #define MQS_EMPTY 0 122 #define MQS_FULL 1 123 #define MQS_NOOP 2 124 125 /*----------------- RESOURCE MANAGEMENT -------------------------------------*/ 126 /* optimized for x86_64 */ 127 struct message_queue { 128 union gru_mesqhead head __gru_cacheline_aligned__; /* CL 0 */ 129 int qlines; /* DW 1 */ 130 long hstatus[2]; 131 void *next __gru_cacheline_aligned__;/* CL 1 */ 132 void *limit; 133 void *start; 134 void *start2; 135 char data ____cacheline_aligned; /* CL 2 */ 136 }; 137 138 /* First word in every message - used by mesq interface */ 139 struct message_header { 140 char present; 141 char present2; 142 char lines; 143 char fill; 144 }; 145 146 #define HSTATUS(mq, h) ((mq) + offsetof(struct message_queue, hstatus[h])) 147 148 /* 149 * Reload the blade's kernel context into a GRU chiplet. Called holding 150 * the bs_kgts_sema for READ. Will steal user contexts if necessary. 151 */ 152 static void gru_load_kernel_context(struct gru_blade_state *bs, int blade_id) 153 { 154 struct gru_state *gru; 155 struct gru_thread_state *kgts; 156 void *vaddr; 157 int ctxnum, ncpus; 158 159 up_read(&bs->bs_kgts_sema); 160 down_write(&bs->bs_kgts_sema); 161 162 if (!bs->bs_kgts) { 163 do { 164 bs->bs_kgts = gru_alloc_gts(NULL, 0, 0, 0, 0, 0); 165 if (!IS_ERR(bs->bs_kgts)) 166 break; 167 msleep(1); 168 } while (true); 169 bs->bs_kgts->ts_user_blade_id = blade_id; 170 } 171 kgts = bs->bs_kgts; 172 173 if (!kgts->ts_gru) { 174 STAT(load_kernel_context); 175 ncpus = uv_blade_nr_possible_cpus(blade_id); 176 kgts->ts_cbr_au_count = GRU_CB_COUNT_TO_AU( 177 GRU_NUM_KERNEL_CBR * ncpus + bs->bs_async_cbrs); 178 kgts->ts_dsr_au_count = GRU_DS_BYTES_TO_AU( 179 GRU_NUM_KERNEL_DSR_BYTES * ncpus + 180 bs->bs_async_dsr_bytes); 181 while (!gru_assign_gru_context(kgts)) { 182 msleep(1); 183 gru_steal_context(kgts); 184 } 185 gru_load_context(kgts); 186 gru = bs->bs_kgts->ts_gru; 187 vaddr = gru->gs_gru_base_vaddr; 188 ctxnum = kgts->ts_ctxnum; 189 bs->kernel_cb = get_gseg_base_address_cb(vaddr, ctxnum, 0); 190 bs->kernel_dsr = get_gseg_base_address_ds(vaddr, ctxnum, 0); 191 } 192 downgrade_write(&bs->bs_kgts_sema); 193 } 194 195 /* 196 * Free all kernel contexts that are not currently in use. 197 * Returns 0 if all freed, else number of inuse context. 198 */ 199 static int gru_free_kernel_contexts(void) 200 { 201 struct gru_blade_state *bs; 202 struct gru_thread_state *kgts; 203 int bid, ret = 0; 204 205 for (bid = 0; bid < GRU_MAX_BLADES; bid++) { 206 bs = gru_base[bid]; 207 if (!bs) 208 continue; 209 210 /* Ignore busy contexts. Don't want to block here. */ 211 if (down_write_trylock(&bs->bs_kgts_sema)) { 212 kgts = bs->bs_kgts; 213 if (kgts && kgts->ts_gru) 214 gru_unload_context(kgts, 0); 215 bs->bs_kgts = NULL; 216 up_write(&bs->bs_kgts_sema); 217 kfree(kgts); 218 } else { 219 ret++; 220 } 221 } 222 return ret; 223 } 224 225 /* 226 * Lock & load the kernel context for the specified blade. 227 */ 228 static struct gru_blade_state *gru_lock_kernel_context(int blade_id) 229 { 230 struct gru_blade_state *bs; 231 int bid; 232 233 STAT(lock_kernel_context); 234 again: 235 bid = blade_id < 0 ? uv_numa_blade_id() : blade_id; 236 bs = gru_base[bid]; 237 238 /* Handle the case where migration occurred while waiting for the sema */ 239 down_read(&bs->bs_kgts_sema); 240 if (blade_id < 0 && bid != uv_numa_blade_id()) { 241 up_read(&bs->bs_kgts_sema); 242 goto again; 243 } 244 if (!bs->bs_kgts || !bs->bs_kgts->ts_gru) 245 gru_load_kernel_context(bs, bid); 246 return bs; 247 248 } 249 250 /* 251 * Unlock the kernel context for the specified blade. Context is not 252 * unloaded but may be stolen before next use. 253 */ 254 static void gru_unlock_kernel_context(int blade_id) 255 { 256 struct gru_blade_state *bs; 257 258 bs = gru_base[blade_id]; 259 up_read(&bs->bs_kgts_sema); 260 STAT(unlock_kernel_context); 261 } 262 263 /* 264 * Reserve & get pointers to the DSR/CBRs reserved for the current cpu. 265 * - returns with preemption disabled 266 */ 267 static int gru_get_cpu_resources(int dsr_bytes, void **cb, void **dsr) 268 { 269 struct gru_blade_state *bs; 270 int lcpu; 271 272 BUG_ON(dsr_bytes > GRU_NUM_KERNEL_DSR_BYTES); 273 preempt_disable(); 274 bs = gru_lock_kernel_context(-1); 275 lcpu = uv_blade_processor_id(); 276 *cb = bs->kernel_cb + lcpu * GRU_HANDLE_STRIDE; 277 *dsr = bs->kernel_dsr + lcpu * GRU_NUM_KERNEL_DSR_BYTES; 278 return 0; 279 } 280 281 /* 282 * Free the current cpus reserved DSR/CBR resources. 283 */ 284 static void gru_free_cpu_resources(void *cb, void *dsr) 285 { 286 gru_unlock_kernel_context(uv_numa_blade_id()); 287 preempt_enable(); 288 } 289 290 /* 291 * Reserve GRU resources to be used asynchronously. 292 * Note: currently supports only 1 reservation per blade. 293 * 294 * input: 295 * blade_id - blade on which resources should be reserved 296 * cbrs - number of CBRs 297 * dsr_bytes - number of DSR bytes needed 298 * output: 299 * handle to identify resource 300 * (0 = async resources already reserved) 301 */ 302 unsigned long gru_reserve_async_resources(int blade_id, int cbrs, int dsr_bytes, 303 struct completion *cmp) 304 { 305 struct gru_blade_state *bs; 306 struct gru_thread_state *kgts; 307 int ret = 0; 308 309 bs = gru_base[blade_id]; 310 311 down_write(&bs->bs_kgts_sema); 312 313 /* Verify no resources already reserved */ 314 if (bs->bs_async_dsr_bytes + bs->bs_async_cbrs) 315 goto done; 316 bs->bs_async_dsr_bytes = dsr_bytes; 317 bs->bs_async_cbrs = cbrs; 318 bs->bs_async_wq = cmp; 319 kgts = bs->bs_kgts; 320 321 /* Resources changed. Unload context if already loaded */ 322 if (kgts && kgts->ts_gru) 323 gru_unload_context(kgts, 0); 324 ret = ASYNC_BID_TO_HAN(blade_id); 325 326 done: 327 up_write(&bs->bs_kgts_sema); 328 return ret; 329 } 330 331 /* 332 * Release async resources previously reserved. 333 * 334 * input: 335 * han - handle to identify resources 336 */ 337 void gru_release_async_resources(unsigned long han) 338 { 339 struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han); 340 341 down_write(&bs->bs_kgts_sema); 342 bs->bs_async_dsr_bytes = 0; 343 bs->bs_async_cbrs = 0; 344 bs->bs_async_wq = NULL; 345 up_write(&bs->bs_kgts_sema); 346 } 347 348 /* 349 * Wait for async GRU instructions to complete. 350 * 351 * input: 352 * han - handle to identify resources 353 */ 354 void gru_wait_async_cbr(unsigned long han) 355 { 356 struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han); 357 358 wait_for_completion(bs->bs_async_wq); 359 mb(); 360 } 361 362 /* 363 * Lock previous reserved async GRU resources 364 * 365 * input: 366 * han - handle to identify resources 367 * output: 368 * cb - pointer to first CBR 369 * dsr - pointer to first DSR 370 */ 371 void gru_lock_async_resource(unsigned long han, void **cb, void **dsr) 372 { 373 struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han); 374 int blade_id = ASYNC_HAN_TO_BID(han); 375 int ncpus; 376 377 gru_lock_kernel_context(blade_id); 378 ncpus = uv_blade_nr_possible_cpus(blade_id); 379 if (cb) 380 *cb = bs->kernel_cb + ncpus * GRU_HANDLE_STRIDE; 381 if (dsr) 382 *dsr = bs->kernel_dsr + ncpus * GRU_NUM_KERNEL_DSR_BYTES; 383 } 384 385 /* 386 * Unlock previous reserved async GRU resources 387 * 388 * input: 389 * han - handle to identify resources 390 */ 391 void gru_unlock_async_resource(unsigned long han) 392 { 393 int blade_id = ASYNC_HAN_TO_BID(han); 394 395 gru_unlock_kernel_context(blade_id); 396 } 397 398 /*----------------------------------------------------------------------*/ 399 int gru_get_cb_exception_detail(void *cb, 400 struct control_block_extended_exc_detail *excdet) 401 { 402 struct gru_control_block_extended *cbe; 403 struct gru_thread_state *kgts = NULL; 404 unsigned long off; 405 int cbrnum, bid; 406 407 /* 408 * Locate kgts for cb. This algorithm is SLOW but 409 * this function is rarely called (ie., almost never). 410 * Performance does not matter. 411 */ 412 for_each_possible_blade(bid) { 413 if (!gru_base[bid]) 414 break; 415 kgts = gru_base[bid]->bs_kgts; 416 if (!kgts || !kgts->ts_gru) 417 continue; 418 off = cb - kgts->ts_gru->gs_gru_base_vaddr; 419 if (off < GRU_SIZE) 420 break; 421 kgts = NULL; 422 } 423 BUG_ON(!kgts); 424 cbrnum = thread_cbr_number(kgts, get_cb_number(cb)); 425 cbe = get_cbe(GRUBASE(cb), cbrnum); 426 gru_flush_cache(cbe); /* CBE not coherent */ 427 sync_core(); 428 excdet->opc = cbe->opccpy; 429 excdet->exopc = cbe->exopccpy; 430 excdet->ecause = cbe->ecause; 431 excdet->exceptdet0 = cbe->idef1upd; 432 excdet->exceptdet1 = cbe->idef3upd; 433 gru_flush_cache(cbe); 434 return 0; 435 } 436 437 static char *gru_get_cb_exception_detail_str(int ret, void *cb, 438 char *buf, int size) 439 { 440 struct gru_control_block_status *gen = (void *)cb; 441 struct control_block_extended_exc_detail excdet; 442 443 if (ret > 0 && gen->istatus == CBS_EXCEPTION) { 444 gru_get_cb_exception_detail(cb, &excdet); 445 snprintf(buf, size, 446 "GRU:%d exception: cb %p, opc %d, exopc %d, ecause 0x%x," 447 "excdet0 0x%lx, excdet1 0x%x", smp_processor_id(), 448 gen, excdet.opc, excdet.exopc, excdet.ecause, 449 excdet.exceptdet0, excdet.exceptdet1); 450 } else { 451 snprintf(buf, size, "No exception"); 452 } 453 return buf; 454 } 455 456 static int gru_wait_idle_or_exception(struct gru_control_block_status *gen) 457 { 458 while (gen->istatus >= CBS_ACTIVE) { 459 cpu_relax(); 460 barrier(); 461 } 462 return gen->istatus; 463 } 464 465 static int gru_retry_exception(void *cb) 466 { 467 struct gru_control_block_status *gen = (void *)cb; 468 struct control_block_extended_exc_detail excdet; 469 int retry = EXCEPTION_RETRY_LIMIT; 470 471 while (1) { 472 if (gru_wait_idle_or_exception(gen) == CBS_IDLE) 473 return CBS_IDLE; 474 if (gru_get_cb_message_queue_substatus(cb)) 475 return CBS_EXCEPTION; 476 gru_get_cb_exception_detail(cb, &excdet); 477 if ((excdet.ecause & ~EXCEPTION_RETRY_BITS) || 478 (excdet.cbrexecstatus & CBR_EXS_ABORT_OCC)) 479 break; 480 if (retry-- == 0) 481 break; 482 gen->icmd = 1; 483 gru_flush_cache(gen); 484 } 485 return CBS_EXCEPTION; 486 } 487 488 int gru_check_status_proc(void *cb) 489 { 490 struct gru_control_block_status *gen = (void *)cb; 491 int ret; 492 493 ret = gen->istatus; 494 if (ret == CBS_EXCEPTION) 495 ret = gru_retry_exception(cb); 496 rmb(); 497 return ret; 498 499 } 500 501 int gru_wait_proc(void *cb) 502 { 503 struct gru_control_block_status *gen = (void *)cb; 504 int ret; 505 506 ret = gru_wait_idle_or_exception(gen); 507 if (ret == CBS_EXCEPTION) 508 ret = gru_retry_exception(cb); 509 rmb(); 510 return ret; 511 } 512 513 static void gru_abort(int ret, void *cb, char *str) 514 { 515 char buf[GRU_EXC_STR_SIZE]; 516 517 panic("GRU FATAL ERROR: %s - %s\n", str, 518 gru_get_cb_exception_detail_str(ret, cb, buf, sizeof(buf))); 519 } 520 521 void gru_wait_abort_proc(void *cb) 522 { 523 int ret; 524 525 ret = gru_wait_proc(cb); 526 if (ret) 527 gru_abort(ret, cb, "gru_wait_abort"); 528 } 529 530 531 /*------------------------------ MESSAGE QUEUES -----------------------------*/ 532 533 /* Internal status . These are NOT returned to the user. */ 534 #define MQIE_AGAIN -1 /* try again */ 535 536 537 /* 538 * Save/restore the "present" flag that is in the second line of 2-line 539 * messages 540 */ 541 static inline int get_present2(void *p) 542 { 543 struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES; 544 return mhdr->present; 545 } 546 547 static inline void restore_present2(void *p, int val) 548 { 549 struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES; 550 mhdr->present = val; 551 } 552 553 /* 554 * Create a message queue. 555 * qlines - message queue size in cache lines. Includes 2-line header. 556 */ 557 int gru_create_message_queue(struct gru_message_queue_desc *mqd, 558 void *p, unsigned int bytes, int nasid, int vector, int apicid) 559 { 560 struct message_queue *mq = p; 561 unsigned int qlines; 562 563 qlines = bytes / GRU_CACHE_LINE_BYTES - 2; 564 memset(mq, 0, bytes); 565 mq->start = &mq->data; 566 mq->start2 = &mq->data + (qlines / 2 - 1) * GRU_CACHE_LINE_BYTES; 567 mq->next = &mq->data; 568 mq->limit = &mq->data + (qlines - 2) * GRU_CACHE_LINE_BYTES; 569 mq->qlines = qlines; 570 mq->hstatus[0] = 0; 571 mq->hstatus[1] = 1; 572 mq->head = gru_mesq_head(2, qlines / 2 + 1); 573 mqd->mq = mq; 574 mqd->mq_gpa = uv_gpa(mq); 575 mqd->qlines = qlines; 576 mqd->interrupt_pnode = nasid >> 1; 577 mqd->interrupt_vector = vector; 578 mqd->interrupt_apicid = apicid; 579 return 0; 580 } 581 EXPORT_SYMBOL_GPL(gru_create_message_queue); 582 583 /* 584 * Send a NOOP message to a message queue 585 * Returns: 586 * 0 - if queue is full after the send. This is the normal case 587 * but various races can change this. 588 * -1 - if mesq sent successfully but queue not full 589 * >0 - unexpected error. MQE_xxx returned 590 */ 591 static int send_noop_message(void *cb, struct gru_message_queue_desc *mqd, 592 void *mesg) 593 { 594 const struct message_header noop_header = { 595 .present = MQS_NOOP, .lines = 1}; 596 unsigned long m; 597 int substatus, ret; 598 struct message_header save_mhdr, *mhdr = mesg; 599 600 STAT(mesq_noop); 601 save_mhdr = *mhdr; 602 *mhdr = noop_header; 603 gru_mesq(cb, mqd->mq_gpa, gru_get_tri(mhdr), 1, IMA); 604 ret = gru_wait(cb); 605 606 if (ret) { 607 substatus = gru_get_cb_message_queue_substatus(cb); 608 switch (substatus) { 609 case CBSS_NO_ERROR: 610 STAT(mesq_noop_unexpected_error); 611 ret = MQE_UNEXPECTED_CB_ERR; 612 break; 613 case CBSS_LB_OVERFLOWED: 614 STAT(mesq_noop_lb_overflow); 615 ret = MQE_CONGESTION; 616 break; 617 case CBSS_QLIMIT_REACHED: 618 STAT(mesq_noop_qlimit_reached); 619 ret = 0; 620 break; 621 case CBSS_AMO_NACKED: 622 STAT(mesq_noop_amo_nacked); 623 ret = MQE_CONGESTION; 624 break; 625 case CBSS_PUT_NACKED: 626 STAT(mesq_noop_put_nacked); 627 m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6); 628 gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, 1, 1, 629 IMA); 630 if (gru_wait(cb) == CBS_IDLE) 631 ret = MQIE_AGAIN; 632 else 633 ret = MQE_UNEXPECTED_CB_ERR; 634 break; 635 case CBSS_PAGE_OVERFLOW: 636 STAT(mesq_noop_page_overflow); 637 /* fallthru */ 638 default: 639 BUG(); 640 } 641 } 642 *mhdr = save_mhdr; 643 return ret; 644 } 645 646 /* 647 * Handle a gru_mesq full. 648 */ 649 static int send_message_queue_full(void *cb, struct gru_message_queue_desc *mqd, 650 void *mesg, int lines) 651 { 652 union gru_mesqhead mqh; 653 unsigned int limit, head; 654 unsigned long avalue; 655 int half, qlines; 656 657 /* Determine if switching to first/second half of q */ 658 avalue = gru_get_amo_value(cb); 659 head = gru_get_amo_value_head(cb); 660 limit = gru_get_amo_value_limit(cb); 661 662 qlines = mqd->qlines; 663 half = (limit != qlines); 664 665 if (half) 666 mqh = gru_mesq_head(qlines / 2 + 1, qlines); 667 else 668 mqh = gru_mesq_head(2, qlines / 2 + 1); 669 670 /* Try to get lock for switching head pointer */ 671 gru_gamir(cb, EOP_IR_CLR, HSTATUS(mqd->mq_gpa, half), XTYPE_DW, IMA); 672 if (gru_wait(cb) != CBS_IDLE) 673 goto cberr; 674 if (!gru_get_amo_value(cb)) { 675 STAT(mesq_qf_locked); 676 return MQE_QUEUE_FULL; 677 } 678 679 /* Got the lock. Send optional NOP if queue not full, */ 680 if (head != limit) { 681 if (send_noop_message(cb, mqd, mesg)) { 682 gru_gamir(cb, EOP_IR_INC, HSTATUS(mqd->mq_gpa, half), 683 XTYPE_DW, IMA); 684 if (gru_wait(cb) != CBS_IDLE) 685 goto cberr; 686 STAT(mesq_qf_noop_not_full); 687 return MQIE_AGAIN; 688 } 689 avalue++; 690 } 691 692 /* Then flip queuehead to other half of queue. */ 693 gru_gamer(cb, EOP_ERR_CSWAP, mqd->mq_gpa, XTYPE_DW, mqh.val, avalue, 694 IMA); 695 if (gru_wait(cb) != CBS_IDLE) 696 goto cberr; 697 698 /* If not successfully in swapping queue head, clear the hstatus lock */ 699 if (gru_get_amo_value(cb) != avalue) { 700 STAT(mesq_qf_switch_head_failed); 701 gru_gamir(cb, EOP_IR_INC, HSTATUS(mqd->mq_gpa, half), XTYPE_DW, 702 IMA); 703 if (gru_wait(cb) != CBS_IDLE) 704 goto cberr; 705 } 706 return MQIE_AGAIN; 707 cberr: 708 STAT(mesq_qf_unexpected_error); 709 return MQE_UNEXPECTED_CB_ERR; 710 } 711 712 /* 713 * Handle a PUT failure. Note: if message was a 2-line message, one of the 714 * lines might have successfully have been written. Before sending the 715 * message, "present" must be cleared in BOTH lines to prevent the receiver 716 * from prematurely seeing the full message. 717 */ 718 static int send_message_put_nacked(void *cb, struct gru_message_queue_desc *mqd, 719 void *mesg, int lines) 720 { 721 unsigned long m, *val = mesg, gpa, save; 722 int ret; 723 724 m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6); 725 if (lines == 2) { 726 gru_vset(cb, m, 0, XTYPE_CL, lines, 1, IMA); 727 if (gru_wait(cb) != CBS_IDLE) 728 return MQE_UNEXPECTED_CB_ERR; 729 } 730 gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, lines, 1, IMA); 731 if (gru_wait(cb) != CBS_IDLE) 732 return MQE_UNEXPECTED_CB_ERR; 733 734 if (!mqd->interrupt_vector) 735 return MQE_OK; 736 737 /* 738 * Send a cross-partition interrupt to the SSI that contains the target 739 * message queue. Normally, the interrupt is automatically delivered by 740 * hardware but some error conditions require explicit delivery. 741 * Use the GRU to deliver the interrupt. Otherwise partition failures 742 * could cause unrecovered errors. 743 */ 744 gpa = uv_global_gru_mmr_address(mqd->interrupt_pnode, UVH_IPI_INT); 745 save = *val; 746 *val = uv_hub_ipi_value(mqd->interrupt_apicid, mqd->interrupt_vector, 747 dest_Fixed); 748 gru_vstore_phys(cb, gpa, gru_get_tri(mesg), IAA_REGISTER, IMA); 749 ret = gru_wait(cb); 750 *val = save; 751 if (ret != CBS_IDLE) 752 return MQE_UNEXPECTED_CB_ERR; 753 return MQE_OK; 754 } 755 756 /* 757 * Handle a gru_mesq failure. Some of these failures are software recoverable 758 * or retryable. 759 */ 760 static int send_message_failure(void *cb, struct gru_message_queue_desc *mqd, 761 void *mesg, int lines) 762 { 763 int substatus, ret = 0; 764 765 substatus = gru_get_cb_message_queue_substatus(cb); 766 switch (substatus) { 767 case CBSS_NO_ERROR: 768 STAT(mesq_send_unexpected_error); 769 ret = MQE_UNEXPECTED_CB_ERR; 770 break; 771 case CBSS_LB_OVERFLOWED: 772 STAT(mesq_send_lb_overflow); 773 ret = MQE_CONGESTION; 774 break; 775 case CBSS_QLIMIT_REACHED: 776 STAT(mesq_send_qlimit_reached); 777 ret = send_message_queue_full(cb, mqd, mesg, lines); 778 break; 779 case CBSS_AMO_NACKED: 780 STAT(mesq_send_amo_nacked); 781 ret = MQE_CONGESTION; 782 break; 783 case CBSS_PUT_NACKED: 784 STAT(mesq_send_put_nacked); 785 ret = send_message_put_nacked(cb, mqd, mesg, lines); 786 break; 787 case CBSS_PAGE_OVERFLOW: 788 STAT(mesq_page_overflow); 789 /* fallthru */ 790 default: 791 BUG(); 792 } 793 return ret; 794 } 795 796 /* 797 * Send a message to a message queue 798 * mqd message queue descriptor 799 * mesg message. ust be vaddr within a GSEG 800 * bytes message size (<= 2 CL) 801 */ 802 int gru_send_message_gpa(struct gru_message_queue_desc *mqd, void *mesg, 803 unsigned int bytes) 804 { 805 struct message_header *mhdr; 806 void *cb; 807 void *dsr; 808 int istatus, clines, ret; 809 810 STAT(mesq_send); 811 BUG_ON(bytes < sizeof(int) || bytes > 2 * GRU_CACHE_LINE_BYTES); 812 813 clines = DIV_ROUND_UP(bytes, GRU_CACHE_LINE_BYTES); 814 if (gru_get_cpu_resources(bytes, &cb, &dsr)) 815 return MQE_BUG_NO_RESOURCES; 816 memcpy(dsr, mesg, bytes); 817 mhdr = dsr; 818 mhdr->present = MQS_FULL; 819 mhdr->lines = clines; 820 if (clines == 2) { 821 mhdr->present2 = get_present2(mhdr); 822 restore_present2(mhdr, MQS_FULL); 823 } 824 825 do { 826 ret = MQE_OK; 827 gru_mesq(cb, mqd->mq_gpa, gru_get_tri(mhdr), clines, IMA); 828 istatus = gru_wait(cb); 829 if (istatus != CBS_IDLE) 830 ret = send_message_failure(cb, mqd, dsr, clines); 831 } while (ret == MQIE_AGAIN); 832 gru_free_cpu_resources(cb, dsr); 833 834 if (ret) 835 STAT(mesq_send_failed); 836 return ret; 837 } 838 EXPORT_SYMBOL_GPL(gru_send_message_gpa); 839 840 /* 841 * Advance the receive pointer for the queue to the next message. 842 */ 843 void gru_free_message(struct gru_message_queue_desc *mqd, void *mesg) 844 { 845 struct message_queue *mq = mqd->mq; 846 struct message_header *mhdr = mq->next; 847 void *next, *pnext; 848 int half = -1; 849 int lines = mhdr->lines; 850 851 if (lines == 2) 852 restore_present2(mhdr, MQS_EMPTY); 853 mhdr->present = MQS_EMPTY; 854 855 pnext = mq->next; 856 next = pnext + GRU_CACHE_LINE_BYTES * lines; 857 if (next == mq->limit) { 858 next = mq->start; 859 half = 1; 860 } else if (pnext < mq->start2 && next >= mq->start2) { 861 half = 0; 862 } 863 864 if (half >= 0) 865 mq->hstatus[half] = 1; 866 mq->next = next; 867 } 868 EXPORT_SYMBOL_GPL(gru_free_message); 869 870 /* 871 * Get next message from message queue. Return NULL if no message 872 * present. User must call next_message() to move to next message. 873 * rmq message queue 874 */ 875 void *gru_get_next_message(struct gru_message_queue_desc *mqd) 876 { 877 struct message_queue *mq = mqd->mq; 878 struct message_header *mhdr = mq->next; 879 int present = mhdr->present; 880 881 /* skip NOOP messages */ 882 while (present == MQS_NOOP) { 883 gru_free_message(mqd, mhdr); 884 mhdr = mq->next; 885 present = mhdr->present; 886 } 887 888 /* Wait for both halves of 2 line messages */ 889 if (present == MQS_FULL && mhdr->lines == 2 && 890 get_present2(mhdr) == MQS_EMPTY) 891 present = MQS_EMPTY; 892 893 if (!present) { 894 STAT(mesq_receive_none); 895 return NULL; 896 } 897 898 if (mhdr->lines == 2) 899 restore_present2(mhdr, mhdr->present2); 900 901 STAT(mesq_receive); 902 return mhdr; 903 } 904 EXPORT_SYMBOL_GPL(gru_get_next_message); 905 906 /* ---------------------- GRU DATA COPY FUNCTIONS ---------------------------*/ 907 908 /* 909 * Load a DW from a global GPA. The GPA can be a memory or MMR address. 910 */ 911 int gru_read_gpa(unsigned long *value, unsigned long gpa) 912 { 913 void *cb; 914 void *dsr; 915 int ret, iaa; 916 917 STAT(read_gpa); 918 if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES, &cb, &dsr)) 919 return MQE_BUG_NO_RESOURCES; 920 iaa = gpa >> 62; 921 gru_vload_phys(cb, gpa, gru_get_tri(dsr), iaa, IMA); 922 ret = gru_wait(cb); 923 if (ret == CBS_IDLE) 924 *value = *(unsigned long *)dsr; 925 gru_free_cpu_resources(cb, dsr); 926 return ret; 927 } 928 EXPORT_SYMBOL_GPL(gru_read_gpa); 929 930 931 /* 932 * Copy a block of data using the GRU resources 933 */ 934 int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa, 935 unsigned int bytes) 936 { 937 void *cb; 938 void *dsr; 939 int ret; 940 941 STAT(copy_gpa); 942 if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES, &cb, &dsr)) 943 return MQE_BUG_NO_RESOURCES; 944 gru_bcopy(cb, src_gpa, dest_gpa, gru_get_tri(dsr), 945 XTYPE_B, bytes, GRU_NUM_KERNEL_DSR_CL, IMA); 946 ret = gru_wait(cb); 947 gru_free_cpu_resources(cb, dsr); 948 return ret; 949 } 950 EXPORT_SYMBOL_GPL(gru_copy_gpa); 951 952 /* ------------------- KERNEL QUICKTESTS RUN AT STARTUP ----------------*/ 953 /* Temp - will delete after we gain confidence in the GRU */ 954 955 static int quicktest0(unsigned long arg) 956 { 957 unsigned long word0; 958 unsigned long word1; 959 void *cb; 960 void *dsr; 961 unsigned long *p; 962 int ret = -EIO; 963 964 if (gru_get_cpu_resources(GRU_CACHE_LINE_BYTES, &cb, &dsr)) 965 return MQE_BUG_NO_RESOURCES; 966 p = dsr; 967 word0 = MAGIC; 968 word1 = 0; 969 970 gru_vload(cb, uv_gpa(&word0), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA); 971 if (gru_wait(cb) != CBS_IDLE) { 972 printk(KERN_DEBUG "GRU:%d quicktest0: CBR failure 1\n", smp_processor_id()); 973 goto done; 974 } 975 976 if (*p != MAGIC) { 977 printk(KERN_DEBUG "GRU:%d quicktest0 bad magic 0x%lx\n", smp_processor_id(), *p); 978 goto done; 979 } 980 gru_vstore(cb, uv_gpa(&word1), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA); 981 if (gru_wait(cb) != CBS_IDLE) { 982 printk(KERN_DEBUG "GRU:%d quicktest0: CBR failure 2\n", smp_processor_id()); 983 goto done; 984 } 985 986 if (word0 != word1 || word1 != MAGIC) { 987 printk(KERN_DEBUG 988 "GRU:%d quicktest0 err: found 0x%lx, expected 0x%lx\n", 989 smp_processor_id(), word1, MAGIC); 990 goto done; 991 } 992 ret = 0; 993 994 done: 995 gru_free_cpu_resources(cb, dsr); 996 return ret; 997 } 998 999 #define ALIGNUP(p, q) ((void *)(((unsigned long)(p) + (q) - 1) & ~(q - 1))) 1000 1001 static int quicktest1(unsigned long arg) 1002 { 1003 struct gru_message_queue_desc mqd; 1004 void *p, *mq; 1005 int i, ret = -EIO; 1006 char mes[GRU_CACHE_LINE_BYTES], *m; 1007 1008 /* Need 1K cacheline aligned that does not cross page boundary */ 1009 p = kmalloc(4096, 0); 1010 if (p == NULL) 1011 return -ENOMEM; 1012 mq = ALIGNUP(p, 1024); 1013 memset(mes, 0xee, sizeof(mes)); 1014 1015 gru_create_message_queue(&mqd, mq, 8 * GRU_CACHE_LINE_BYTES, 0, 0, 0); 1016 for (i = 0; i < 6; i++) { 1017 mes[8] = i; 1018 do { 1019 ret = gru_send_message_gpa(&mqd, mes, sizeof(mes)); 1020 } while (ret == MQE_CONGESTION); 1021 if (ret) 1022 break; 1023 } 1024 if (ret != MQE_QUEUE_FULL || i != 4) { 1025 printk(KERN_DEBUG "GRU:%d quicktest1: unexpect status %d, i %d\n", 1026 smp_processor_id(), ret, i); 1027 goto done; 1028 } 1029 1030 for (i = 0; i < 6; i++) { 1031 m = gru_get_next_message(&mqd); 1032 if (!m || m[8] != i) 1033 break; 1034 gru_free_message(&mqd, m); 1035 } 1036 if (i != 4) { 1037 printk(KERN_DEBUG "GRU:%d quicktest2: bad message, i %d, m %p, m8 %d\n", 1038 smp_processor_id(), i, m, m ? m[8] : -1); 1039 goto done; 1040 } 1041 ret = 0; 1042 1043 done: 1044 kfree(p); 1045 return ret; 1046 } 1047 1048 static int quicktest2(unsigned long arg) 1049 { 1050 static DECLARE_COMPLETION(cmp); 1051 unsigned long han; 1052 int blade_id = 0; 1053 int numcb = 4; 1054 int ret = 0; 1055 unsigned long *buf; 1056 void *cb0, *cb; 1057 struct gru_control_block_status *gen; 1058 int i, k, istatus, bytes; 1059 1060 bytes = numcb * 4 * 8; 1061 buf = kmalloc(bytes, GFP_KERNEL); 1062 if (!buf) 1063 return -ENOMEM; 1064 1065 ret = -EBUSY; 1066 han = gru_reserve_async_resources(blade_id, numcb, 0, &cmp); 1067 if (!han) 1068 goto done; 1069 1070 gru_lock_async_resource(han, &cb0, NULL); 1071 memset(buf, 0xee, bytes); 1072 for (i = 0; i < numcb; i++) 1073 gru_vset(cb0 + i * GRU_HANDLE_STRIDE, uv_gpa(&buf[i * 4]), 0, 1074 XTYPE_DW, 4, 1, IMA_INTERRUPT); 1075 1076 ret = 0; 1077 k = numcb; 1078 do { 1079 gru_wait_async_cbr(han); 1080 for (i = 0; i < numcb; i++) { 1081 cb = cb0 + i * GRU_HANDLE_STRIDE; 1082 istatus = gru_check_status(cb); 1083 if (istatus != CBS_ACTIVE && istatus != CBS_CALL_OS) 1084 break; 1085 } 1086 if (i == numcb) 1087 continue; 1088 if (istatus != CBS_IDLE) { 1089 printk(KERN_DEBUG "GRU:%d quicktest2: cb %d, exception\n", smp_processor_id(), i); 1090 ret = -EFAULT; 1091 } else if (buf[4 * i] || buf[4 * i + 1] || buf[4 * i + 2] || 1092 buf[4 * i + 3]) { 1093 printk(KERN_DEBUG "GRU:%d quicktest2:cb %d, buf 0x%lx, 0x%lx, 0x%lx, 0x%lx\n", 1094 smp_processor_id(), i, buf[4 * i], buf[4 * i + 1], buf[4 * i + 2], buf[4 * i + 3]); 1095 ret = -EIO; 1096 } 1097 k--; 1098 gen = cb; 1099 gen->istatus = CBS_CALL_OS; /* don't handle this CBR again */ 1100 } while (k); 1101 BUG_ON(cmp.done); 1102 1103 gru_unlock_async_resource(han); 1104 gru_release_async_resources(han); 1105 done: 1106 kfree(buf); 1107 return ret; 1108 } 1109 1110 #define BUFSIZE 200 1111 static int quicktest3(unsigned long arg) 1112 { 1113 char buf1[BUFSIZE], buf2[BUFSIZE]; 1114 int ret = 0; 1115 1116 memset(buf2, 0, sizeof(buf2)); 1117 memset(buf1, get_cycles() & 255, sizeof(buf1)); 1118 gru_copy_gpa(uv_gpa(buf2), uv_gpa(buf1), BUFSIZE); 1119 if (memcmp(buf1, buf2, BUFSIZE)) { 1120 printk(KERN_DEBUG "GRU:%d quicktest3 error\n", smp_processor_id()); 1121 ret = -EIO; 1122 } 1123 return ret; 1124 } 1125 1126 /* 1127 * Debugging only. User hook for various kernel tests 1128 * of driver & gru. 1129 */ 1130 int gru_ktest(unsigned long arg) 1131 { 1132 int ret = -EINVAL; 1133 1134 switch (arg & 0xff) { 1135 case 0: 1136 ret = quicktest0(arg); 1137 break; 1138 case 1: 1139 ret = quicktest1(arg); 1140 break; 1141 case 2: 1142 ret = quicktest2(arg); 1143 break; 1144 case 3: 1145 ret = quicktest3(arg); 1146 break; 1147 case 99: 1148 ret = gru_free_kernel_contexts(); 1149 break; 1150 } 1151 return ret; 1152 1153 } 1154 1155 int gru_kservices_init(void) 1156 { 1157 return 0; 1158 } 1159 1160 void gru_kservices_exit(void) 1161 { 1162 if (gru_free_kernel_contexts()) 1163 BUG(); 1164 } 1165 1166