1 /* 2 * SN Platform GRU Driver 3 * 4 * KERNEL SERVICES THAT USE THE GRU 5 * 6 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. 7 * 8 * This program is free software; you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License as published by 10 * the Free Software Foundation; either version 2 of the License, or 11 * (at your option) any later version. 12 * 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU General Public License for more details. 17 * 18 * You should have received a copy of the GNU General Public License 19 * along with this program; if not, write to the Free Software 20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 21 */ 22 23 #include <linux/kernel.h> 24 #include <linux/errno.h> 25 #include <linux/slab.h> 26 #include <linux/mm.h> 27 #include <linux/spinlock.h> 28 #include <linux/device.h> 29 #include <linux/miscdevice.h> 30 #include <linux/proc_fs.h> 31 #include <linux/interrupt.h> 32 #include <linux/uaccess.h> 33 #include <linux/delay.h> 34 #include <asm/io_apic.h> 35 #include "gru.h" 36 #include "grulib.h" 37 #include "grutables.h" 38 #include "grukservices.h" 39 #include "gru_instructions.h" 40 #include <asm/uv/uv_hub.h> 41 42 /* 43 * Kernel GRU Usage 44 * 45 * The following is an interim algorithm for management of kernel GRU 46 * resources. This will likely be replaced when we better understand the 47 * kernel/user requirements. 48 * 49 * Blade percpu resources reserved for kernel use. These resources are 50 * reserved whenever the the kernel context for the blade is loaded. Note 51 * that the kernel context is not guaranteed to be always available. It is 52 * loaded on demand & can be stolen by a user if the user demand exceeds the 53 * kernel demand. The kernel can always reload the kernel context but 54 * a SLEEP may be required!!!. 55 * 56 * Async Overview: 57 * 58 * Each blade has one "kernel context" that owns GRU kernel resources 59 * located on the blade. Kernel drivers use GRU resources in this context 60 * for sending messages, zeroing memory, etc. 61 * 62 * The kernel context is dynamically loaded on demand. If it is not in 63 * use by the kernel, the kernel context can be unloaded & given to a user. 64 * The kernel context will be reloaded when needed. This may require that 65 * a context be stolen from a user. 66 * NOTE: frequent unloading/reloading of the kernel context is 67 * expensive. We are depending on batch schedulers, cpusets, sane 68 * drivers or some other mechanism to prevent the need for frequent 69 * stealing/reloading. 70 * 71 * The kernel context consists of two parts: 72 * - 1 CB & a few DSRs that are reserved for each cpu on the blade. 73 * Each cpu has it's own private resources & does not share them 74 * with other cpus. These resources are used serially, ie, 75 * locked, used & unlocked on each call to a function in 76 * grukservices. 77 * (Now that we have dynamic loading of kernel contexts, I 78 * may rethink this & allow sharing between cpus....) 79 * 80 * - Additional resources can be reserved long term & used directly 81 * by UV drivers located in the kernel. Drivers using these GRU 82 * resources can use asynchronous GRU instructions that send 83 * interrupts on completion. 84 * - these resources must be explicitly locked/unlocked 85 * - locked resources prevent (obviously) the kernel 86 * context from being unloaded. 87 * - drivers using these resource directly issue their own 88 * GRU instruction and must wait/check completion. 89 * 90 * When these resources are reserved, the caller can optionally 91 * associate a wait_queue with the resources and use asynchronous 92 * GRU instructions. When an async GRU instruction completes, the 93 * driver will do a wakeup on the event. 94 * 95 */ 96 97 98 #define ASYNC_HAN_TO_BID(h) ((h) - 1) 99 #define ASYNC_BID_TO_HAN(b) ((b) + 1) 100 #define ASYNC_HAN_TO_BS(h) gru_base[ASYNC_HAN_TO_BID(h)] 101 102 #define GRU_NUM_KERNEL_CBR 1 103 #define GRU_NUM_KERNEL_DSR_BYTES 256 104 #define GRU_NUM_KERNEL_DSR_CL (GRU_NUM_KERNEL_DSR_BYTES / \ 105 GRU_CACHE_LINE_BYTES) 106 107 /* GRU instruction attributes for all instructions */ 108 #define IMA IMA_CB_DELAY 109 110 /* GRU cacheline size is always 64 bytes - even on arches with 128 byte lines */ 111 #define __gru_cacheline_aligned__ \ 112 __attribute__((__aligned__(GRU_CACHE_LINE_BYTES))) 113 114 #define MAGIC 0x1234567887654321UL 115 116 /* Default retry count for GRU errors on kernel instructions */ 117 #define EXCEPTION_RETRY_LIMIT 3 118 119 /* Status of message queue sections */ 120 #define MQS_EMPTY 0 121 #define MQS_FULL 1 122 #define MQS_NOOP 2 123 124 /*----------------- RESOURCE MANAGEMENT -------------------------------------*/ 125 /* optimized for x86_64 */ 126 struct message_queue { 127 union gru_mesqhead head __gru_cacheline_aligned__; /* CL 0 */ 128 int qlines; /* DW 1 */ 129 long hstatus[2]; 130 void *next __gru_cacheline_aligned__;/* CL 1 */ 131 void *limit; 132 void *start; 133 void *start2; 134 char data ____cacheline_aligned; /* CL 2 */ 135 }; 136 137 /* First word in every message - used by mesq interface */ 138 struct message_header { 139 char present; 140 char present2; 141 char lines; 142 char fill; 143 }; 144 145 #define HSTATUS(mq, h) ((mq) + offsetof(struct message_queue, hstatus[h])) 146 147 /* 148 * Reload the blade's kernel context into a GRU chiplet. Called holding 149 * the bs_kgts_sema for READ. Will steal user contexts if necessary. 150 */ 151 static void gru_load_kernel_context(struct gru_blade_state *bs, int blade_id) 152 { 153 struct gru_state *gru; 154 struct gru_thread_state *kgts; 155 void *vaddr; 156 int ctxnum, ncpus; 157 158 up_read(&bs->bs_kgts_sema); 159 down_write(&bs->bs_kgts_sema); 160 161 if (!bs->bs_kgts) { 162 bs->bs_kgts = gru_alloc_gts(NULL, 0, 0, 0, 0, 0); 163 bs->bs_kgts->ts_user_blade_id = blade_id; 164 } 165 kgts = bs->bs_kgts; 166 167 if (!kgts->ts_gru) { 168 STAT(load_kernel_context); 169 ncpus = uv_blade_nr_possible_cpus(blade_id); 170 kgts->ts_cbr_au_count = GRU_CB_COUNT_TO_AU( 171 GRU_NUM_KERNEL_CBR * ncpus + bs->bs_async_cbrs); 172 kgts->ts_dsr_au_count = GRU_DS_BYTES_TO_AU( 173 GRU_NUM_KERNEL_DSR_BYTES * ncpus + 174 bs->bs_async_dsr_bytes); 175 while (!gru_assign_gru_context(kgts)) { 176 msleep(1); 177 gru_steal_context(kgts); 178 } 179 gru_load_context(kgts); 180 gru = bs->bs_kgts->ts_gru; 181 vaddr = gru->gs_gru_base_vaddr; 182 ctxnum = kgts->ts_ctxnum; 183 bs->kernel_cb = get_gseg_base_address_cb(vaddr, ctxnum, 0); 184 bs->kernel_dsr = get_gseg_base_address_ds(vaddr, ctxnum, 0); 185 } 186 downgrade_write(&bs->bs_kgts_sema); 187 } 188 189 /* 190 * Free all kernel contexts that are not currently in use. 191 * Returns 0 if all freed, else number of inuse context. 192 */ 193 static int gru_free_kernel_contexts(void) 194 { 195 struct gru_blade_state *bs; 196 struct gru_thread_state *kgts; 197 int bid, ret = 0; 198 199 for (bid = 0; bid < GRU_MAX_BLADES; bid++) { 200 bs = gru_base[bid]; 201 if (!bs) 202 continue; 203 204 /* Ignore busy contexts. Don't want to block here. */ 205 if (down_write_trylock(&bs->bs_kgts_sema)) { 206 kgts = bs->bs_kgts; 207 if (kgts && kgts->ts_gru) 208 gru_unload_context(kgts, 0); 209 bs->bs_kgts = NULL; 210 up_write(&bs->bs_kgts_sema); 211 kfree(kgts); 212 } else { 213 ret++; 214 } 215 } 216 return ret; 217 } 218 219 /* 220 * Lock & load the kernel context for the specified blade. 221 */ 222 static struct gru_blade_state *gru_lock_kernel_context(int blade_id) 223 { 224 struct gru_blade_state *bs; 225 int bid; 226 227 STAT(lock_kernel_context); 228 again: 229 bid = blade_id < 0 ? uv_numa_blade_id() : blade_id; 230 bs = gru_base[bid]; 231 232 /* Handle the case where migration occured while waiting for the sema */ 233 down_read(&bs->bs_kgts_sema); 234 if (blade_id < 0 && bid != uv_numa_blade_id()) { 235 up_read(&bs->bs_kgts_sema); 236 goto again; 237 } 238 if (!bs->bs_kgts || !bs->bs_kgts->ts_gru) 239 gru_load_kernel_context(bs, bid); 240 return bs; 241 242 } 243 244 /* 245 * Unlock the kernel context for the specified blade. Context is not 246 * unloaded but may be stolen before next use. 247 */ 248 static void gru_unlock_kernel_context(int blade_id) 249 { 250 struct gru_blade_state *bs; 251 252 bs = gru_base[blade_id]; 253 up_read(&bs->bs_kgts_sema); 254 STAT(unlock_kernel_context); 255 } 256 257 /* 258 * Reserve & get pointers to the DSR/CBRs reserved for the current cpu. 259 * - returns with preemption disabled 260 */ 261 static int gru_get_cpu_resources(int dsr_bytes, void **cb, void **dsr) 262 { 263 struct gru_blade_state *bs; 264 int lcpu; 265 266 BUG_ON(dsr_bytes > GRU_NUM_KERNEL_DSR_BYTES); 267 preempt_disable(); 268 bs = gru_lock_kernel_context(-1); 269 lcpu = uv_blade_processor_id(); 270 *cb = bs->kernel_cb + lcpu * GRU_HANDLE_STRIDE; 271 *dsr = bs->kernel_dsr + lcpu * GRU_NUM_KERNEL_DSR_BYTES; 272 return 0; 273 } 274 275 /* 276 * Free the current cpus reserved DSR/CBR resources. 277 */ 278 static void gru_free_cpu_resources(void *cb, void *dsr) 279 { 280 gru_unlock_kernel_context(uv_numa_blade_id()); 281 preempt_enable(); 282 } 283 284 /* 285 * Reserve GRU resources to be used asynchronously. 286 * Note: currently supports only 1 reservation per blade. 287 * 288 * input: 289 * blade_id - blade on which resources should be reserved 290 * cbrs - number of CBRs 291 * dsr_bytes - number of DSR bytes needed 292 * output: 293 * handle to identify resource 294 * (0 = async resources already reserved) 295 */ 296 unsigned long gru_reserve_async_resources(int blade_id, int cbrs, int dsr_bytes, 297 struct completion *cmp) 298 { 299 struct gru_blade_state *bs; 300 struct gru_thread_state *kgts; 301 int ret = 0; 302 303 bs = gru_base[blade_id]; 304 305 down_write(&bs->bs_kgts_sema); 306 307 /* Verify no resources already reserved */ 308 if (bs->bs_async_dsr_bytes + bs->bs_async_cbrs) 309 goto done; 310 bs->bs_async_dsr_bytes = dsr_bytes; 311 bs->bs_async_cbrs = cbrs; 312 bs->bs_async_wq = cmp; 313 kgts = bs->bs_kgts; 314 315 /* Resources changed. Unload context if already loaded */ 316 if (kgts && kgts->ts_gru) 317 gru_unload_context(kgts, 0); 318 ret = ASYNC_BID_TO_HAN(blade_id); 319 320 done: 321 up_write(&bs->bs_kgts_sema); 322 return ret; 323 } 324 325 /* 326 * Release async resources previously reserved. 327 * 328 * input: 329 * han - handle to identify resources 330 */ 331 void gru_release_async_resources(unsigned long han) 332 { 333 struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han); 334 335 down_write(&bs->bs_kgts_sema); 336 bs->bs_async_dsr_bytes = 0; 337 bs->bs_async_cbrs = 0; 338 bs->bs_async_wq = NULL; 339 up_write(&bs->bs_kgts_sema); 340 } 341 342 /* 343 * Wait for async GRU instructions to complete. 344 * 345 * input: 346 * han - handle to identify resources 347 */ 348 void gru_wait_async_cbr(unsigned long han) 349 { 350 struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han); 351 352 wait_for_completion(bs->bs_async_wq); 353 mb(); 354 } 355 356 /* 357 * Lock previous reserved async GRU resources 358 * 359 * input: 360 * han - handle to identify resources 361 * output: 362 * cb - pointer to first CBR 363 * dsr - pointer to first DSR 364 */ 365 void gru_lock_async_resource(unsigned long han, void **cb, void **dsr) 366 { 367 struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han); 368 int blade_id = ASYNC_HAN_TO_BID(han); 369 int ncpus; 370 371 gru_lock_kernel_context(blade_id); 372 ncpus = uv_blade_nr_possible_cpus(blade_id); 373 if (cb) 374 *cb = bs->kernel_cb + ncpus * GRU_HANDLE_STRIDE; 375 if (dsr) 376 *dsr = bs->kernel_dsr + ncpus * GRU_NUM_KERNEL_DSR_BYTES; 377 } 378 379 /* 380 * Unlock previous reserved async GRU resources 381 * 382 * input: 383 * han - handle to identify resources 384 */ 385 void gru_unlock_async_resource(unsigned long han) 386 { 387 int blade_id = ASYNC_HAN_TO_BID(han); 388 389 gru_unlock_kernel_context(blade_id); 390 } 391 392 /*----------------------------------------------------------------------*/ 393 int gru_get_cb_exception_detail(void *cb, 394 struct control_block_extended_exc_detail *excdet) 395 { 396 struct gru_control_block_extended *cbe; 397 struct gru_thread_state *kgts = NULL; 398 unsigned long off; 399 int cbrnum, bid; 400 401 /* 402 * Locate kgts for cb. This algorithm is SLOW but 403 * this function is rarely called (ie., almost never). 404 * Performance does not matter. 405 */ 406 for_each_possible_blade(bid) { 407 if (!gru_base[bid]) 408 break; 409 kgts = gru_base[bid]->bs_kgts; 410 if (!kgts || !kgts->ts_gru) 411 continue; 412 off = cb - kgts->ts_gru->gs_gru_base_vaddr; 413 if (off < GRU_SIZE) 414 break; 415 kgts = NULL; 416 } 417 BUG_ON(!kgts); 418 cbrnum = thread_cbr_number(kgts, get_cb_number(cb)); 419 cbe = get_cbe(GRUBASE(cb), cbrnum); 420 gru_flush_cache(cbe); /* CBE not coherent */ 421 sync_core(); 422 excdet->opc = cbe->opccpy; 423 excdet->exopc = cbe->exopccpy; 424 excdet->ecause = cbe->ecause; 425 excdet->exceptdet0 = cbe->idef1upd; 426 excdet->exceptdet1 = cbe->idef3upd; 427 gru_flush_cache(cbe); 428 return 0; 429 } 430 431 char *gru_get_cb_exception_detail_str(int ret, void *cb, 432 char *buf, int size) 433 { 434 struct gru_control_block_status *gen = (void *)cb; 435 struct control_block_extended_exc_detail excdet; 436 437 if (ret > 0 && gen->istatus == CBS_EXCEPTION) { 438 gru_get_cb_exception_detail(cb, &excdet); 439 snprintf(buf, size, 440 "GRU:%d exception: cb %p, opc %d, exopc %d, ecause 0x%x," 441 "excdet0 0x%lx, excdet1 0x%x", smp_processor_id(), 442 gen, excdet.opc, excdet.exopc, excdet.ecause, 443 excdet.exceptdet0, excdet.exceptdet1); 444 } else { 445 snprintf(buf, size, "No exception"); 446 } 447 return buf; 448 } 449 450 static int gru_wait_idle_or_exception(struct gru_control_block_status *gen) 451 { 452 while (gen->istatus >= CBS_ACTIVE) { 453 cpu_relax(); 454 barrier(); 455 } 456 return gen->istatus; 457 } 458 459 static int gru_retry_exception(void *cb) 460 { 461 struct gru_control_block_status *gen = (void *)cb; 462 struct control_block_extended_exc_detail excdet; 463 int retry = EXCEPTION_RETRY_LIMIT; 464 465 while (1) { 466 if (gru_wait_idle_or_exception(gen) == CBS_IDLE) 467 return CBS_IDLE; 468 if (gru_get_cb_message_queue_substatus(cb)) 469 return CBS_EXCEPTION; 470 gru_get_cb_exception_detail(cb, &excdet); 471 if ((excdet.ecause & ~EXCEPTION_RETRY_BITS) || 472 (excdet.cbrexecstatus & CBR_EXS_ABORT_OCC)) 473 break; 474 if (retry-- == 0) 475 break; 476 gen->icmd = 1; 477 gru_flush_cache(gen); 478 } 479 return CBS_EXCEPTION; 480 } 481 482 int gru_check_status_proc(void *cb) 483 { 484 struct gru_control_block_status *gen = (void *)cb; 485 int ret; 486 487 ret = gen->istatus; 488 if (ret == CBS_EXCEPTION) 489 ret = gru_retry_exception(cb); 490 rmb(); 491 return ret; 492 493 } 494 495 int gru_wait_proc(void *cb) 496 { 497 struct gru_control_block_status *gen = (void *)cb; 498 int ret; 499 500 ret = gru_wait_idle_or_exception(gen); 501 if (ret == CBS_EXCEPTION) 502 ret = gru_retry_exception(cb); 503 rmb(); 504 return ret; 505 } 506 507 void gru_abort(int ret, void *cb, char *str) 508 { 509 char buf[GRU_EXC_STR_SIZE]; 510 511 panic("GRU FATAL ERROR: %s - %s\n", str, 512 gru_get_cb_exception_detail_str(ret, cb, buf, sizeof(buf))); 513 } 514 515 void gru_wait_abort_proc(void *cb) 516 { 517 int ret; 518 519 ret = gru_wait_proc(cb); 520 if (ret) 521 gru_abort(ret, cb, "gru_wait_abort"); 522 } 523 524 525 /*------------------------------ MESSAGE QUEUES -----------------------------*/ 526 527 /* Internal status . These are NOT returned to the user. */ 528 #define MQIE_AGAIN -1 /* try again */ 529 530 531 /* 532 * Save/restore the "present" flag that is in the second line of 2-line 533 * messages 534 */ 535 static inline int get_present2(void *p) 536 { 537 struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES; 538 return mhdr->present; 539 } 540 541 static inline void restore_present2(void *p, int val) 542 { 543 struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES; 544 mhdr->present = val; 545 } 546 547 /* 548 * Create a message queue. 549 * qlines - message queue size in cache lines. Includes 2-line header. 550 */ 551 int gru_create_message_queue(struct gru_message_queue_desc *mqd, 552 void *p, unsigned int bytes, int nasid, int vector, int apicid) 553 { 554 struct message_queue *mq = p; 555 unsigned int qlines; 556 557 qlines = bytes / GRU_CACHE_LINE_BYTES - 2; 558 memset(mq, 0, bytes); 559 mq->start = &mq->data; 560 mq->start2 = &mq->data + (qlines / 2 - 1) * GRU_CACHE_LINE_BYTES; 561 mq->next = &mq->data; 562 mq->limit = &mq->data + (qlines - 2) * GRU_CACHE_LINE_BYTES; 563 mq->qlines = qlines; 564 mq->hstatus[0] = 0; 565 mq->hstatus[1] = 1; 566 mq->head = gru_mesq_head(2, qlines / 2 + 1); 567 mqd->mq = mq; 568 mqd->mq_gpa = uv_gpa(mq); 569 mqd->qlines = qlines; 570 mqd->interrupt_pnode = nasid >> 1; 571 mqd->interrupt_vector = vector; 572 mqd->interrupt_apicid = apicid; 573 return 0; 574 } 575 EXPORT_SYMBOL_GPL(gru_create_message_queue); 576 577 /* 578 * Send a NOOP message to a message queue 579 * Returns: 580 * 0 - if queue is full after the send. This is the normal case 581 * but various races can change this. 582 * -1 - if mesq sent successfully but queue not full 583 * >0 - unexpected error. MQE_xxx returned 584 */ 585 static int send_noop_message(void *cb, struct gru_message_queue_desc *mqd, 586 void *mesg) 587 { 588 const struct message_header noop_header = { 589 .present = MQS_NOOP, .lines = 1}; 590 unsigned long m; 591 int substatus, ret; 592 struct message_header save_mhdr, *mhdr = mesg; 593 594 STAT(mesq_noop); 595 save_mhdr = *mhdr; 596 *mhdr = noop_header; 597 gru_mesq(cb, mqd->mq_gpa, gru_get_tri(mhdr), 1, IMA); 598 ret = gru_wait(cb); 599 600 if (ret) { 601 substatus = gru_get_cb_message_queue_substatus(cb); 602 switch (substatus) { 603 case CBSS_NO_ERROR: 604 STAT(mesq_noop_unexpected_error); 605 ret = MQE_UNEXPECTED_CB_ERR; 606 break; 607 case CBSS_LB_OVERFLOWED: 608 STAT(mesq_noop_lb_overflow); 609 ret = MQE_CONGESTION; 610 break; 611 case CBSS_QLIMIT_REACHED: 612 STAT(mesq_noop_qlimit_reached); 613 ret = 0; 614 break; 615 case CBSS_AMO_NACKED: 616 STAT(mesq_noop_amo_nacked); 617 ret = MQE_CONGESTION; 618 break; 619 case CBSS_PUT_NACKED: 620 STAT(mesq_noop_put_nacked); 621 m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6); 622 gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, 1, 1, 623 IMA); 624 if (gru_wait(cb) == CBS_IDLE) 625 ret = MQIE_AGAIN; 626 else 627 ret = MQE_UNEXPECTED_CB_ERR; 628 break; 629 case CBSS_PAGE_OVERFLOW: 630 STAT(mesq_noop_page_overflow); 631 /* fallthru */ 632 default: 633 BUG(); 634 } 635 } 636 *mhdr = save_mhdr; 637 return ret; 638 } 639 640 /* 641 * Handle a gru_mesq full. 642 */ 643 static int send_message_queue_full(void *cb, struct gru_message_queue_desc *mqd, 644 void *mesg, int lines) 645 { 646 union gru_mesqhead mqh; 647 unsigned int limit, head; 648 unsigned long avalue; 649 int half, qlines; 650 651 /* Determine if switching to first/second half of q */ 652 avalue = gru_get_amo_value(cb); 653 head = gru_get_amo_value_head(cb); 654 limit = gru_get_amo_value_limit(cb); 655 656 qlines = mqd->qlines; 657 half = (limit != qlines); 658 659 if (half) 660 mqh = gru_mesq_head(qlines / 2 + 1, qlines); 661 else 662 mqh = gru_mesq_head(2, qlines / 2 + 1); 663 664 /* Try to get lock for switching head pointer */ 665 gru_gamir(cb, EOP_IR_CLR, HSTATUS(mqd->mq_gpa, half), XTYPE_DW, IMA); 666 if (gru_wait(cb) != CBS_IDLE) 667 goto cberr; 668 if (!gru_get_amo_value(cb)) { 669 STAT(mesq_qf_locked); 670 return MQE_QUEUE_FULL; 671 } 672 673 /* Got the lock. Send optional NOP if queue not full, */ 674 if (head != limit) { 675 if (send_noop_message(cb, mqd, mesg)) { 676 gru_gamir(cb, EOP_IR_INC, HSTATUS(mqd->mq_gpa, half), 677 XTYPE_DW, IMA); 678 if (gru_wait(cb) != CBS_IDLE) 679 goto cberr; 680 STAT(mesq_qf_noop_not_full); 681 return MQIE_AGAIN; 682 } 683 avalue++; 684 } 685 686 /* Then flip queuehead to other half of queue. */ 687 gru_gamer(cb, EOP_ERR_CSWAP, mqd->mq_gpa, XTYPE_DW, mqh.val, avalue, 688 IMA); 689 if (gru_wait(cb) != CBS_IDLE) 690 goto cberr; 691 692 /* If not successfully in swapping queue head, clear the hstatus lock */ 693 if (gru_get_amo_value(cb) != avalue) { 694 STAT(mesq_qf_switch_head_failed); 695 gru_gamir(cb, EOP_IR_INC, HSTATUS(mqd->mq_gpa, half), XTYPE_DW, 696 IMA); 697 if (gru_wait(cb) != CBS_IDLE) 698 goto cberr; 699 } 700 return MQIE_AGAIN; 701 cberr: 702 STAT(mesq_qf_unexpected_error); 703 return MQE_UNEXPECTED_CB_ERR; 704 } 705 706 /* 707 * Handle a PUT failure. Note: if message was a 2-line message, one of the 708 * lines might have successfully have been written. Before sending the 709 * message, "present" must be cleared in BOTH lines to prevent the receiver 710 * from prematurely seeing the full message. 711 */ 712 static int send_message_put_nacked(void *cb, struct gru_message_queue_desc *mqd, 713 void *mesg, int lines) 714 { 715 unsigned long m, *val = mesg, gpa, save; 716 int ret; 717 718 m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6); 719 if (lines == 2) { 720 gru_vset(cb, m, 0, XTYPE_CL, lines, 1, IMA); 721 if (gru_wait(cb) != CBS_IDLE) 722 return MQE_UNEXPECTED_CB_ERR; 723 } 724 gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, lines, 1, IMA); 725 if (gru_wait(cb) != CBS_IDLE) 726 return MQE_UNEXPECTED_CB_ERR; 727 728 if (!mqd->interrupt_vector) 729 return MQE_OK; 730 731 /* 732 * Send a cross-partition interrupt to the SSI that contains the target 733 * message queue. Normally, the interrupt is automatically delivered by 734 * hardware but some error conditions require explicit delivery. 735 * Use the GRU to deliver the interrupt. Otherwise partition failures 736 * could cause unrecovered errors. 737 */ 738 gpa = uv_global_gru_mmr_address(mqd->interrupt_pnode, UVH_IPI_INT); 739 save = *val; 740 *val = uv_hub_ipi_value(mqd->interrupt_apicid, mqd->interrupt_vector, 741 dest_Fixed); 742 gru_vstore_phys(cb, gpa, gru_get_tri(mesg), IAA_REGISTER, IMA); 743 ret = gru_wait(cb); 744 *val = save; 745 if (ret != CBS_IDLE) 746 return MQE_UNEXPECTED_CB_ERR; 747 return MQE_OK; 748 } 749 750 /* 751 * Handle a gru_mesq failure. Some of these failures are software recoverable 752 * or retryable. 753 */ 754 static int send_message_failure(void *cb, struct gru_message_queue_desc *mqd, 755 void *mesg, int lines) 756 { 757 int substatus, ret = 0; 758 759 substatus = gru_get_cb_message_queue_substatus(cb); 760 switch (substatus) { 761 case CBSS_NO_ERROR: 762 STAT(mesq_send_unexpected_error); 763 ret = MQE_UNEXPECTED_CB_ERR; 764 break; 765 case CBSS_LB_OVERFLOWED: 766 STAT(mesq_send_lb_overflow); 767 ret = MQE_CONGESTION; 768 break; 769 case CBSS_QLIMIT_REACHED: 770 STAT(mesq_send_qlimit_reached); 771 ret = send_message_queue_full(cb, mqd, mesg, lines); 772 break; 773 case CBSS_AMO_NACKED: 774 STAT(mesq_send_amo_nacked); 775 ret = MQE_CONGESTION; 776 break; 777 case CBSS_PUT_NACKED: 778 STAT(mesq_send_put_nacked); 779 ret = send_message_put_nacked(cb, mqd, mesg, lines); 780 break; 781 case CBSS_PAGE_OVERFLOW: 782 STAT(mesq_page_overflow); 783 /* fallthru */ 784 default: 785 BUG(); 786 } 787 return ret; 788 } 789 790 /* 791 * Send a message to a message queue 792 * mqd message queue descriptor 793 * mesg message. ust be vaddr within a GSEG 794 * bytes message size (<= 2 CL) 795 */ 796 int gru_send_message_gpa(struct gru_message_queue_desc *mqd, void *mesg, 797 unsigned int bytes) 798 { 799 struct message_header *mhdr; 800 void *cb; 801 void *dsr; 802 int istatus, clines, ret; 803 804 STAT(mesq_send); 805 BUG_ON(bytes < sizeof(int) || bytes > 2 * GRU_CACHE_LINE_BYTES); 806 807 clines = DIV_ROUND_UP(bytes, GRU_CACHE_LINE_BYTES); 808 if (gru_get_cpu_resources(bytes, &cb, &dsr)) 809 return MQE_BUG_NO_RESOURCES; 810 memcpy(dsr, mesg, bytes); 811 mhdr = dsr; 812 mhdr->present = MQS_FULL; 813 mhdr->lines = clines; 814 if (clines == 2) { 815 mhdr->present2 = get_present2(mhdr); 816 restore_present2(mhdr, MQS_FULL); 817 } 818 819 do { 820 ret = MQE_OK; 821 gru_mesq(cb, mqd->mq_gpa, gru_get_tri(mhdr), clines, IMA); 822 istatus = gru_wait(cb); 823 if (istatus != CBS_IDLE) 824 ret = send_message_failure(cb, mqd, dsr, clines); 825 } while (ret == MQIE_AGAIN); 826 gru_free_cpu_resources(cb, dsr); 827 828 if (ret) 829 STAT(mesq_send_failed); 830 return ret; 831 } 832 EXPORT_SYMBOL_GPL(gru_send_message_gpa); 833 834 /* 835 * Advance the receive pointer for the queue to the next message. 836 */ 837 void gru_free_message(struct gru_message_queue_desc *mqd, void *mesg) 838 { 839 struct message_queue *mq = mqd->mq; 840 struct message_header *mhdr = mq->next; 841 void *next, *pnext; 842 int half = -1; 843 int lines = mhdr->lines; 844 845 if (lines == 2) 846 restore_present2(mhdr, MQS_EMPTY); 847 mhdr->present = MQS_EMPTY; 848 849 pnext = mq->next; 850 next = pnext + GRU_CACHE_LINE_BYTES * lines; 851 if (next == mq->limit) { 852 next = mq->start; 853 half = 1; 854 } else if (pnext < mq->start2 && next >= mq->start2) { 855 half = 0; 856 } 857 858 if (half >= 0) 859 mq->hstatus[half] = 1; 860 mq->next = next; 861 } 862 EXPORT_SYMBOL_GPL(gru_free_message); 863 864 /* 865 * Get next message from message queue. Return NULL if no message 866 * present. User must call next_message() to move to next message. 867 * rmq message queue 868 */ 869 void *gru_get_next_message(struct gru_message_queue_desc *mqd) 870 { 871 struct message_queue *mq = mqd->mq; 872 struct message_header *mhdr = mq->next; 873 int present = mhdr->present; 874 875 /* skip NOOP messages */ 876 while (present == MQS_NOOP) { 877 gru_free_message(mqd, mhdr); 878 mhdr = mq->next; 879 present = mhdr->present; 880 } 881 882 /* Wait for both halves of 2 line messages */ 883 if (present == MQS_FULL && mhdr->lines == 2 && 884 get_present2(mhdr) == MQS_EMPTY) 885 present = MQS_EMPTY; 886 887 if (!present) { 888 STAT(mesq_receive_none); 889 return NULL; 890 } 891 892 if (mhdr->lines == 2) 893 restore_present2(mhdr, mhdr->present2); 894 895 STAT(mesq_receive); 896 return mhdr; 897 } 898 EXPORT_SYMBOL_GPL(gru_get_next_message); 899 900 /* ---------------------- GRU DATA COPY FUNCTIONS ---------------------------*/ 901 902 /* 903 * Load a DW from a global GPA. The GPA can be a memory or MMR address. 904 */ 905 int gru_read_gpa(unsigned long *value, unsigned long gpa) 906 { 907 void *cb; 908 void *dsr; 909 int ret, iaa; 910 911 STAT(read_gpa); 912 if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES, &cb, &dsr)) 913 return MQE_BUG_NO_RESOURCES; 914 iaa = gpa >> 62; 915 gru_vload_phys(cb, gpa, gru_get_tri(dsr), iaa, IMA); 916 ret = gru_wait(cb); 917 if (ret == CBS_IDLE) 918 *value = *(unsigned long *)dsr; 919 gru_free_cpu_resources(cb, dsr); 920 return ret; 921 } 922 EXPORT_SYMBOL_GPL(gru_read_gpa); 923 924 925 /* 926 * Copy a block of data using the GRU resources 927 */ 928 int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa, 929 unsigned int bytes) 930 { 931 void *cb; 932 void *dsr; 933 int ret; 934 935 STAT(copy_gpa); 936 if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES, &cb, &dsr)) 937 return MQE_BUG_NO_RESOURCES; 938 gru_bcopy(cb, src_gpa, dest_gpa, gru_get_tri(dsr), 939 XTYPE_B, bytes, GRU_NUM_KERNEL_DSR_CL, IMA); 940 ret = gru_wait(cb); 941 gru_free_cpu_resources(cb, dsr); 942 return ret; 943 } 944 EXPORT_SYMBOL_GPL(gru_copy_gpa); 945 946 /* ------------------- KERNEL QUICKTESTS RUN AT STARTUP ----------------*/ 947 /* Temp - will delete after we gain confidence in the GRU */ 948 949 static int quicktest0(unsigned long arg) 950 { 951 unsigned long word0; 952 unsigned long word1; 953 void *cb; 954 void *dsr; 955 unsigned long *p; 956 int ret = -EIO; 957 958 if (gru_get_cpu_resources(GRU_CACHE_LINE_BYTES, &cb, &dsr)) 959 return MQE_BUG_NO_RESOURCES; 960 p = dsr; 961 word0 = MAGIC; 962 word1 = 0; 963 964 gru_vload(cb, uv_gpa(&word0), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA); 965 if (gru_wait(cb) != CBS_IDLE) { 966 printk(KERN_DEBUG "GRU:%d quicktest0: CBR failure 1\n", smp_processor_id()); 967 goto done; 968 } 969 970 if (*p != MAGIC) { 971 printk(KERN_DEBUG "GRU:%d quicktest0 bad magic 0x%lx\n", smp_processor_id(), *p); 972 goto done; 973 } 974 gru_vstore(cb, uv_gpa(&word1), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA); 975 if (gru_wait(cb) != CBS_IDLE) { 976 printk(KERN_DEBUG "GRU:%d quicktest0: CBR failure 2\n", smp_processor_id()); 977 goto done; 978 } 979 980 if (word0 != word1 || word1 != MAGIC) { 981 printk(KERN_DEBUG 982 "GRU:%d quicktest0 err: found 0x%lx, expected 0x%lx\n", 983 smp_processor_id(), word1, MAGIC); 984 goto done; 985 } 986 ret = 0; 987 988 done: 989 gru_free_cpu_resources(cb, dsr); 990 return ret; 991 } 992 993 #define ALIGNUP(p, q) ((void *)(((unsigned long)(p) + (q) - 1) & ~(q - 1))) 994 995 static int quicktest1(unsigned long arg) 996 { 997 struct gru_message_queue_desc mqd; 998 void *p, *mq; 999 unsigned long *dw; 1000 int i, ret = -EIO; 1001 char mes[GRU_CACHE_LINE_BYTES], *m; 1002 1003 /* Need 1K cacheline aligned that does not cross page boundary */ 1004 p = kmalloc(4096, 0); 1005 if (p == NULL) 1006 return -ENOMEM; 1007 mq = ALIGNUP(p, 1024); 1008 memset(mes, 0xee, sizeof(mes)); 1009 dw = mq; 1010 1011 gru_create_message_queue(&mqd, mq, 8 * GRU_CACHE_LINE_BYTES, 0, 0, 0); 1012 for (i = 0; i < 6; i++) { 1013 mes[8] = i; 1014 do { 1015 ret = gru_send_message_gpa(&mqd, mes, sizeof(mes)); 1016 } while (ret == MQE_CONGESTION); 1017 if (ret) 1018 break; 1019 } 1020 if (ret != MQE_QUEUE_FULL || i != 4) { 1021 printk(KERN_DEBUG "GRU:%d quicktest1: unexpect status %d, i %d\n", 1022 smp_processor_id(), ret, i); 1023 goto done; 1024 } 1025 1026 for (i = 0; i < 6; i++) { 1027 m = gru_get_next_message(&mqd); 1028 if (!m || m[8] != i) 1029 break; 1030 gru_free_message(&mqd, m); 1031 } 1032 if (i != 4) { 1033 printk(KERN_DEBUG "GRU:%d quicktest2: bad message, i %d, m %p, m8 %d\n", 1034 smp_processor_id(), i, m, m ? m[8] : -1); 1035 goto done; 1036 } 1037 ret = 0; 1038 1039 done: 1040 kfree(p); 1041 return ret; 1042 } 1043 1044 static int quicktest2(unsigned long arg) 1045 { 1046 static DECLARE_COMPLETION(cmp); 1047 unsigned long han; 1048 int blade_id = 0; 1049 int numcb = 4; 1050 int ret = 0; 1051 unsigned long *buf; 1052 void *cb0, *cb; 1053 struct gru_control_block_status *gen; 1054 int i, k, istatus, bytes; 1055 1056 bytes = numcb * 4 * 8; 1057 buf = kmalloc(bytes, GFP_KERNEL); 1058 if (!buf) 1059 return -ENOMEM; 1060 1061 ret = -EBUSY; 1062 han = gru_reserve_async_resources(blade_id, numcb, 0, &cmp); 1063 if (!han) 1064 goto done; 1065 1066 gru_lock_async_resource(han, &cb0, NULL); 1067 memset(buf, 0xee, bytes); 1068 for (i = 0; i < numcb; i++) 1069 gru_vset(cb0 + i * GRU_HANDLE_STRIDE, uv_gpa(&buf[i * 4]), 0, 1070 XTYPE_DW, 4, 1, IMA_INTERRUPT); 1071 1072 ret = 0; 1073 k = numcb; 1074 do { 1075 gru_wait_async_cbr(han); 1076 for (i = 0; i < numcb; i++) { 1077 cb = cb0 + i * GRU_HANDLE_STRIDE; 1078 istatus = gru_check_status(cb); 1079 if (istatus != CBS_ACTIVE && istatus != CBS_CALL_OS) 1080 break; 1081 } 1082 if (i == numcb) 1083 continue; 1084 if (istatus != CBS_IDLE) { 1085 printk(KERN_DEBUG "GRU:%d quicktest2: cb %d, exception\n", smp_processor_id(), i); 1086 ret = -EFAULT; 1087 } else if (buf[4 * i] || buf[4 * i + 1] || buf[4 * i + 2] || 1088 buf[4 * i + 3]) { 1089 printk(KERN_DEBUG "GRU:%d quicktest2:cb %d, buf 0x%lx, 0x%lx, 0x%lx, 0x%lx\n", 1090 smp_processor_id(), i, buf[4 * i], buf[4 * i + 1], buf[4 * i + 2], buf[4 * i + 3]); 1091 ret = -EIO; 1092 } 1093 k--; 1094 gen = cb; 1095 gen->istatus = CBS_CALL_OS; /* don't handle this CBR again */ 1096 } while (k); 1097 BUG_ON(cmp.done); 1098 1099 gru_unlock_async_resource(han); 1100 gru_release_async_resources(han); 1101 done: 1102 kfree(buf); 1103 return ret; 1104 } 1105 1106 #define BUFSIZE 200 1107 static int quicktest3(unsigned long arg) 1108 { 1109 char buf1[BUFSIZE], buf2[BUFSIZE]; 1110 int ret = 0; 1111 1112 memset(buf2, 0, sizeof(buf2)); 1113 memset(buf1, get_cycles() & 255, sizeof(buf1)); 1114 gru_copy_gpa(uv_gpa(buf2), uv_gpa(buf1), BUFSIZE); 1115 if (memcmp(buf1, buf2, BUFSIZE)) { 1116 printk(KERN_DEBUG "GRU:%d quicktest3 error\n", smp_processor_id()); 1117 ret = -EIO; 1118 } 1119 return ret; 1120 } 1121 1122 /* 1123 * Debugging only. User hook for various kernel tests 1124 * of driver & gru. 1125 */ 1126 int gru_ktest(unsigned long arg) 1127 { 1128 int ret = -EINVAL; 1129 1130 switch (arg & 0xff) { 1131 case 0: 1132 ret = quicktest0(arg); 1133 break; 1134 case 1: 1135 ret = quicktest1(arg); 1136 break; 1137 case 2: 1138 ret = quicktest2(arg); 1139 break; 1140 case 3: 1141 ret = quicktest3(arg); 1142 break; 1143 case 99: 1144 ret = gru_free_kernel_contexts(); 1145 break; 1146 } 1147 return ret; 1148 1149 } 1150 1151 int gru_kservices_init(void) 1152 { 1153 return 0; 1154 } 1155 1156 void gru_kservices_exit(void) 1157 { 1158 if (gru_free_kernel_contexts()) 1159 BUG(); 1160 } 1161 1162