1 /* 2 * SN Platform GRU Driver 3 * 4 * KERNEL SERVICES THAT USE THE GRU 5 * 6 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. 7 * 8 * This program is free software; you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License as published by 10 * the Free Software Foundation; either version 2 of the License, or 11 * (at your option) any later version. 12 * 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU General Public License for more details. 17 * 18 * You should have received a copy of the GNU General Public License 19 * along with this program; if not, write to the Free Software 20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 21 */ 22 23 #include <linux/kernel.h> 24 #include <linux/errno.h> 25 #include <linux/slab.h> 26 #include <linux/mm.h> 27 #include <linux/spinlock.h> 28 #include <linux/device.h> 29 #include <linux/miscdevice.h> 30 #include <linux/proc_fs.h> 31 #include <linux/interrupt.h> 32 #include <linux/uaccess.h> 33 #include <linux/delay.h> 34 #include <linux/export.h> 35 #include <asm/io_apic.h> 36 #include "gru.h" 37 #include "grulib.h" 38 #include "grutables.h" 39 #include "grukservices.h" 40 #include "gru_instructions.h" 41 #include <asm/uv/uv_hub.h> 42 43 /* 44 * Kernel GRU Usage 45 * 46 * The following is an interim algorithm for management of kernel GRU 47 * resources. This will likely be replaced when we better understand the 48 * kernel/user requirements. 49 * 50 * Blade percpu resources reserved for kernel use. These resources are 51 * reserved whenever the the kernel context for the blade is loaded. Note 52 * that the kernel context is not guaranteed to be always available. It is 53 * loaded on demand & can be stolen by a user if the user demand exceeds the 54 * kernel demand. The kernel can always reload the kernel context but 55 * a SLEEP may be required!!!. 56 * 57 * Async Overview: 58 * 59 * Each blade has one "kernel context" that owns GRU kernel resources 60 * located on the blade. Kernel drivers use GRU resources in this context 61 * for sending messages, zeroing memory, etc. 62 * 63 * The kernel context is dynamically loaded on demand. If it is not in 64 * use by the kernel, the kernel context can be unloaded & given to a user. 65 * The kernel context will be reloaded when needed. This may require that 66 * a context be stolen from a user. 67 * NOTE: frequent unloading/reloading of the kernel context is 68 * expensive. We are depending on batch schedulers, cpusets, sane 69 * drivers or some other mechanism to prevent the need for frequent 70 * stealing/reloading. 71 * 72 * The kernel context consists of two parts: 73 * - 1 CB & a few DSRs that are reserved for each cpu on the blade. 74 * Each cpu has it's own private resources & does not share them 75 * with other cpus. These resources are used serially, ie, 76 * locked, used & unlocked on each call to a function in 77 * grukservices. 78 * (Now that we have dynamic loading of kernel contexts, I 79 * may rethink this & allow sharing between cpus....) 80 * 81 * - Additional resources can be reserved long term & used directly 82 * by UV drivers located in the kernel. Drivers using these GRU 83 * resources can use asynchronous GRU instructions that send 84 * interrupts on completion. 85 * - these resources must be explicitly locked/unlocked 86 * - locked resources prevent (obviously) the kernel 87 * context from being unloaded. 88 * - drivers using these resource directly issue their own 89 * GRU instruction and must wait/check completion. 90 * 91 * When these resources are reserved, the caller can optionally 92 * associate a wait_queue with the resources and use asynchronous 93 * GRU instructions. When an async GRU instruction completes, the 94 * driver will do a wakeup on the event. 95 * 96 */ 97 98 99 #define ASYNC_HAN_TO_BID(h) ((h) - 1) 100 #define ASYNC_BID_TO_HAN(b) ((b) + 1) 101 #define ASYNC_HAN_TO_BS(h) gru_base[ASYNC_HAN_TO_BID(h)] 102 103 #define GRU_NUM_KERNEL_CBR 1 104 #define GRU_NUM_KERNEL_DSR_BYTES 256 105 #define GRU_NUM_KERNEL_DSR_CL (GRU_NUM_KERNEL_DSR_BYTES / \ 106 GRU_CACHE_LINE_BYTES) 107 108 /* GRU instruction attributes for all instructions */ 109 #define IMA IMA_CB_DELAY 110 111 /* GRU cacheline size is always 64 bytes - even on arches with 128 byte lines */ 112 #define __gru_cacheline_aligned__ \ 113 __attribute__((__aligned__(GRU_CACHE_LINE_BYTES))) 114 115 #define MAGIC 0x1234567887654321UL 116 117 /* Default retry count for GRU errors on kernel instructions */ 118 #define EXCEPTION_RETRY_LIMIT 3 119 120 /* Status of message queue sections */ 121 #define MQS_EMPTY 0 122 #define MQS_FULL 1 123 #define MQS_NOOP 2 124 125 /*----------------- RESOURCE MANAGEMENT -------------------------------------*/ 126 /* optimized for x86_64 */ 127 struct message_queue { 128 union gru_mesqhead head __gru_cacheline_aligned__; /* CL 0 */ 129 int qlines; /* DW 1 */ 130 long hstatus[2]; 131 void *next __gru_cacheline_aligned__;/* CL 1 */ 132 void *limit; 133 void *start; 134 void *start2; 135 char data ____cacheline_aligned; /* CL 2 */ 136 }; 137 138 /* First word in every message - used by mesq interface */ 139 struct message_header { 140 char present; 141 char present2; 142 char lines; 143 char fill; 144 }; 145 146 #define HSTATUS(mq, h) ((mq) + offsetof(struct message_queue, hstatus[h])) 147 148 /* 149 * Reload the blade's kernel context into a GRU chiplet. Called holding 150 * the bs_kgts_sema for READ. Will steal user contexts if necessary. 151 */ 152 static void gru_load_kernel_context(struct gru_blade_state *bs, int blade_id) 153 { 154 struct gru_state *gru; 155 struct gru_thread_state *kgts; 156 void *vaddr; 157 int ctxnum, ncpus; 158 159 up_read(&bs->bs_kgts_sema); 160 down_write(&bs->bs_kgts_sema); 161 162 if (!bs->bs_kgts) { 163 do { 164 bs->bs_kgts = gru_alloc_gts(NULL, 0, 0, 0, 0, 0); 165 if (!IS_ERR(bs->bs_kgts)) 166 break; 167 msleep(1); 168 } while (true); 169 bs->bs_kgts->ts_user_blade_id = blade_id; 170 } 171 kgts = bs->bs_kgts; 172 173 if (!kgts->ts_gru) { 174 STAT(load_kernel_context); 175 ncpus = uv_blade_nr_possible_cpus(blade_id); 176 kgts->ts_cbr_au_count = GRU_CB_COUNT_TO_AU( 177 GRU_NUM_KERNEL_CBR * ncpus + bs->bs_async_cbrs); 178 kgts->ts_dsr_au_count = GRU_DS_BYTES_TO_AU( 179 GRU_NUM_KERNEL_DSR_BYTES * ncpus + 180 bs->bs_async_dsr_bytes); 181 while (!gru_assign_gru_context(kgts)) { 182 msleep(1); 183 gru_steal_context(kgts); 184 } 185 gru_load_context(kgts); 186 gru = bs->bs_kgts->ts_gru; 187 vaddr = gru->gs_gru_base_vaddr; 188 ctxnum = kgts->ts_ctxnum; 189 bs->kernel_cb = get_gseg_base_address_cb(vaddr, ctxnum, 0); 190 bs->kernel_dsr = get_gseg_base_address_ds(vaddr, ctxnum, 0); 191 } 192 downgrade_write(&bs->bs_kgts_sema); 193 } 194 195 /* 196 * Free all kernel contexts that are not currently in use. 197 * Returns 0 if all freed, else number of inuse context. 198 */ 199 static int gru_free_kernel_contexts(void) 200 { 201 struct gru_blade_state *bs; 202 struct gru_thread_state *kgts; 203 int bid, ret = 0; 204 205 for (bid = 0; bid < GRU_MAX_BLADES; bid++) { 206 bs = gru_base[bid]; 207 if (!bs) 208 continue; 209 210 /* Ignore busy contexts. Don't want to block here. */ 211 if (down_write_trylock(&bs->bs_kgts_sema)) { 212 kgts = bs->bs_kgts; 213 if (kgts && kgts->ts_gru) 214 gru_unload_context(kgts, 0); 215 bs->bs_kgts = NULL; 216 up_write(&bs->bs_kgts_sema); 217 kfree(kgts); 218 } else { 219 ret++; 220 } 221 } 222 return ret; 223 } 224 225 /* 226 * Lock & load the kernel context for the specified blade. 227 */ 228 static struct gru_blade_state *gru_lock_kernel_context(int blade_id) 229 { 230 struct gru_blade_state *bs; 231 int bid; 232 233 STAT(lock_kernel_context); 234 again: 235 bid = blade_id < 0 ? uv_numa_blade_id() : blade_id; 236 bs = gru_base[bid]; 237 238 /* Handle the case where migration occurred while waiting for the sema */ 239 down_read(&bs->bs_kgts_sema); 240 if (blade_id < 0 && bid != uv_numa_blade_id()) { 241 up_read(&bs->bs_kgts_sema); 242 goto again; 243 } 244 if (!bs->bs_kgts || !bs->bs_kgts->ts_gru) 245 gru_load_kernel_context(bs, bid); 246 return bs; 247 248 } 249 250 /* 251 * Unlock the kernel context for the specified blade. Context is not 252 * unloaded but may be stolen before next use. 253 */ 254 static void gru_unlock_kernel_context(int blade_id) 255 { 256 struct gru_blade_state *bs; 257 258 bs = gru_base[blade_id]; 259 up_read(&bs->bs_kgts_sema); 260 STAT(unlock_kernel_context); 261 } 262 263 /* 264 * Reserve & get pointers to the DSR/CBRs reserved for the current cpu. 265 * - returns with preemption disabled 266 */ 267 static int gru_get_cpu_resources(int dsr_bytes, void **cb, void **dsr) 268 { 269 struct gru_blade_state *bs; 270 int lcpu; 271 272 BUG_ON(dsr_bytes > GRU_NUM_KERNEL_DSR_BYTES); 273 preempt_disable(); 274 bs = gru_lock_kernel_context(-1); 275 lcpu = uv_blade_processor_id(); 276 *cb = bs->kernel_cb + lcpu * GRU_HANDLE_STRIDE; 277 *dsr = bs->kernel_dsr + lcpu * GRU_NUM_KERNEL_DSR_BYTES; 278 return 0; 279 } 280 281 /* 282 * Free the current cpus reserved DSR/CBR resources. 283 */ 284 static void gru_free_cpu_resources(void *cb, void *dsr) 285 { 286 gru_unlock_kernel_context(uv_numa_blade_id()); 287 preempt_enable(); 288 } 289 290 /* 291 * Reserve GRU resources to be used asynchronously. 292 * Note: currently supports only 1 reservation per blade. 293 * 294 * input: 295 * blade_id - blade on which resources should be reserved 296 * cbrs - number of CBRs 297 * dsr_bytes - number of DSR bytes needed 298 * output: 299 * handle to identify resource 300 * (0 = async resources already reserved) 301 */ 302 unsigned long gru_reserve_async_resources(int blade_id, int cbrs, int dsr_bytes, 303 struct completion *cmp) 304 { 305 struct gru_blade_state *bs; 306 struct gru_thread_state *kgts; 307 int ret = 0; 308 309 bs = gru_base[blade_id]; 310 311 down_write(&bs->bs_kgts_sema); 312 313 /* Verify no resources already reserved */ 314 if (bs->bs_async_dsr_bytes + bs->bs_async_cbrs) 315 goto done; 316 bs->bs_async_dsr_bytes = dsr_bytes; 317 bs->bs_async_cbrs = cbrs; 318 bs->bs_async_wq = cmp; 319 kgts = bs->bs_kgts; 320 321 /* Resources changed. Unload context if already loaded */ 322 if (kgts && kgts->ts_gru) 323 gru_unload_context(kgts, 0); 324 ret = ASYNC_BID_TO_HAN(blade_id); 325 326 done: 327 up_write(&bs->bs_kgts_sema); 328 return ret; 329 } 330 331 /* 332 * Release async resources previously reserved. 333 * 334 * input: 335 * han - handle to identify resources 336 */ 337 void gru_release_async_resources(unsigned long han) 338 { 339 struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han); 340 341 down_write(&bs->bs_kgts_sema); 342 bs->bs_async_dsr_bytes = 0; 343 bs->bs_async_cbrs = 0; 344 bs->bs_async_wq = NULL; 345 up_write(&bs->bs_kgts_sema); 346 } 347 348 /* 349 * Wait for async GRU instructions to complete. 350 * 351 * input: 352 * han - handle to identify resources 353 */ 354 void gru_wait_async_cbr(unsigned long han) 355 { 356 struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han); 357 358 wait_for_completion(bs->bs_async_wq); 359 mb(); 360 } 361 362 /* 363 * Lock previous reserved async GRU resources 364 * 365 * input: 366 * han - handle to identify resources 367 * output: 368 * cb - pointer to first CBR 369 * dsr - pointer to first DSR 370 */ 371 void gru_lock_async_resource(unsigned long han, void **cb, void **dsr) 372 { 373 struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han); 374 int blade_id = ASYNC_HAN_TO_BID(han); 375 int ncpus; 376 377 gru_lock_kernel_context(blade_id); 378 ncpus = uv_blade_nr_possible_cpus(blade_id); 379 if (cb) 380 *cb = bs->kernel_cb + ncpus * GRU_HANDLE_STRIDE; 381 if (dsr) 382 *dsr = bs->kernel_dsr + ncpus * GRU_NUM_KERNEL_DSR_BYTES; 383 } 384 385 /* 386 * Unlock previous reserved async GRU resources 387 * 388 * input: 389 * han - handle to identify resources 390 */ 391 void gru_unlock_async_resource(unsigned long han) 392 { 393 int blade_id = ASYNC_HAN_TO_BID(han); 394 395 gru_unlock_kernel_context(blade_id); 396 } 397 398 /*----------------------------------------------------------------------*/ 399 int gru_get_cb_exception_detail(void *cb, 400 struct control_block_extended_exc_detail *excdet) 401 { 402 struct gru_control_block_extended *cbe; 403 struct gru_thread_state *kgts = NULL; 404 unsigned long off; 405 int cbrnum, bid; 406 407 /* 408 * Locate kgts for cb. This algorithm is SLOW but 409 * this function is rarely called (ie., almost never). 410 * Performance does not matter. 411 */ 412 for_each_possible_blade(bid) { 413 if (!gru_base[bid]) 414 break; 415 kgts = gru_base[bid]->bs_kgts; 416 if (!kgts || !kgts->ts_gru) 417 continue; 418 off = cb - kgts->ts_gru->gs_gru_base_vaddr; 419 if (off < GRU_SIZE) 420 break; 421 kgts = NULL; 422 } 423 BUG_ON(!kgts); 424 cbrnum = thread_cbr_number(kgts, get_cb_number(cb)); 425 cbe = get_cbe(GRUBASE(cb), cbrnum); 426 gru_flush_cache(cbe); /* CBE not coherent */ 427 sync_core(); 428 excdet->opc = cbe->opccpy; 429 excdet->exopc = cbe->exopccpy; 430 excdet->ecause = cbe->ecause; 431 excdet->exceptdet0 = cbe->idef1upd; 432 excdet->exceptdet1 = cbe->idef3upd; 433 gru_flush_cache(cbe); 434 return 0; 435 } 436 437 static char *gru_get_cb_exception_detail_str(int ret, void *cb, 438 char *buf, int size) 439 { 440 struct gru_control_block_status *gen = (void *)cb; 441 struct control_block_extended_exc_detail excdet; 442 443 if (ret > 0 && gen->istatus == CBS_EXCEPTION) { 444 gru_get_cb_exception_detail(cb, &excdet); 445 snprintf(buf, size, 446 "GRU:%d exception: cb %p, opc %d, exopc %d, ecause 0x%x," 447 "excdet0 0x%lx, excdet1 0x%x", smp_processor_id(), 448 gen, excdet.opc, excdet.exopc, excdet.ecause, 449 excdet.exceptdet0, excdet.exceptdet1); 450 } else { 451 snprintf(buf, size, "No exception"); 452 } 453 return buf; 454 } 455 456 static int gru_wait_idle_or_exception(struct gru_control_block_status *gen) 457 { 458 while (gen->istatus >= CBS_ACTIVE) { 459 cpu_relax(); 460 barrier(); 461 } 462 return gen->istatus; 463 } 464 465 static int gru_retry_exception(void *cb) 466 { 467 struct gru_control_block_status *gen = (void *)cb; 468 struct control_block_extended_exc_detail excdet; 469 int retry = EXCEPTION_RETRY_LIMIT; 470 471 while (1) { 472 if (gru_wait_idle_or_exception(gen) == CBS_IDLE) 473 return CBS_IDLE; 474 if (gru_get_cb_message_queue_substatus(cb)) 475 return CBS_EXCEPTION; 476 gru_get_cb_exception_detail(cb, &excdet); 477 if ((excdet.ecause & ~EXCEPTION_RETRY_BITS) || 478 (excdet.cbrexecstatus & CBR_EXS_ABORT_OCC)) 479 break; 480 if (retry-- == 0) 481 break; 482 gen->icmd = 1; 483 gru_flush_cache(gen); 484 } 485 return CBS_EXCEPTION; 486 } 487 488 int gru_check_status_proc(void *cb) 489 { 490 struct gru_control_block_status *gen = (void *)cb; 491 int ret; 492 493 ret = gen->istatus; 494 if (ret == CBS_EXCEPTION) 495 ret = gru_retry_exception(cb); 496 rmb(); 497 return ret; 498 499 } 500 501 int gru_wait_proc(void *cb) 502 { 503 struct gru_control_block_status *gen = (void *)cb; 504 int ret; 505 506 ret = gru_wait_idle_or_exception(gen); 507 if (ret == CBS_EXCEPTION) 508 ret = gru_retry_exception(cb); 509 rmb(); 510 return ret; 511 } 512 513 static void gru_abort(int ret, void *cb, char *str) 514 { 515 char buf[GRU_EXC_STR_SIZE]; 516 517 panic("GRU FATAL ERROR: %s - %s\n", str, 518 gru_get_cb_exception_detail_str(ret, cb, buf, sizeof(buf))); 519 } 520 521 void gru_wait_abort_proc(void *cb) 522 { 523 int ret; 524 525 ret = gru_wait_proc(cb); 526 if (ret) 527 gru_abort(ret, cb, "gru_wait_abort"); 528 } 529 530 531 /*------------------------------ MESSAGE QUEUES -----------------------------*/ 532 533 /* Internal status . These are NOT returned to the user. */ 534 #define MQIE_AGAIN -1 /* try again */ 535 536 537 /* 538 * Save/restore the "present" flag that is in the second line of 2-line 539 * messages 540 */ 541 static inline int get_present2(void *p) 542 { 543 struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES; 544 return mhdr->present; 545 } 546 547 static inline void restore_present2(void *p, int val) 548 { 549 struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES; 550 mhdr->present = val; 551 } 552 553 /* 554 * Create a message queue. 555 * qlines - message queue size in cache lines. Includes 2-line header. 556 */ 557 int gru_create_message_queue(struct gru_message_queue_desc *mqd, 558 void *p, unsigned int bytes, int nasid, int vector, int apicid) 559 { 560 struct message_queue *mq = p; 561 unsigned int qlines; 562 563 qlines = bytes / GRU_CACHE_LINE_BYTES - 2; 564 memset(mq, 0, bytes); 565 mq->start = &mq->data; 566 mq->start2 = &mq->data + (qlines / 2 - 1) * GRU_CACHE_LINE_BYTES; 567 mq->next = &mq->data; 568 mq->limit = &mq->data + (qlines - 2) * GRU_CACHE_LINE_BYTES; 569 mq->qlines = qlines; 570 mq->hstatus[0] = 0; 571 mq->hstatus[1] = 1; 572 mq->head = gru_mesq_head(2, qlines / 2 + 1); 573 mqd->mq = mq; 574 mqd->mq_gpa = uv_gpa(mq); 575 mqd->qlines = qlines; 576 mqd->interrupt_pnode = nasid >> 1; 577 mqd->interrupt_vector = vector; 578 mqd->interrupt_apicid = apicid; 579 return 0; 580 } 581 EXPORT_SYMBOL_GPL(gru_create_message_queue); 582 583 /* 584 * Send a NOOP message to a message queue 585 * Returns: 586 * 0 - if queue is full after the send. This is the normal case 587 * but various races can change this. 588 * -1 - if mesq sent successfully but queue not full 589 * >0 - unexpected error. MQE_xxx returned 590 */ 591 static int send_noop_message(void *cb, struct gru_message_queue_desc *mqd, 592 void *mesg) 593 { 594 const struct message_header noop_header = { 595 .present = MQS_NOOP, .lines = 1}; 596 unsigned long m; 597 int substatus, ret; 598 struct message_header save_mhdr, *mhdr = mesg; 599 600 STAT(mesq_noop); 601 save_mhdr = *mhdr; 602 *mhdr = noop_header; 603 gru_mesq(cb, mqd->mq_gpa, gru_get_tri(mhdr), 1, IMA); 604 ret = gru_wait(cb); 605 606 if (ret) { 607 substatus = gru_get_cb_message_queue_substatus(cb); 608 switch (substatus) { 609 case CBSS_NO_ERROR: 610 STAT(mesq_noop_unexpected_error); 611 ret = MQE_UNEXPECTED_CB_ERR; 612 break; 613 case CBSS_LB_OVERFLOWED: 614 STAT(mesq_noop_lb_overflow); 615 ret = MQE_CONGESTION; 616 break; 617 case CBSS_QLIMIT_REACHED: 618 STAT(mesq_noop_qlimit_reached); 619 ret = 0; 620 break; 621 case CBSS_AMO_NACKED: 622 STAT(mesq_noop_amo_nacked); 623 ret = MQE_CONGESTION; 624 break; 625 case CBSS_PUT_NACKED: 626 STAT(mesq_noop_put_nacked); 627 m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6); 628 gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, 1, 1, 629 IMA); 630 if (gru_wait(cb) == CBS_IDLE) 631 ret = MQIE_AGAIN; 632 else 633 ret = MQE_UNEXPECTED_CB_ERR; 634 break; 635 case CBSS_PAGE_OVERFLOW: 636 STAT(mesq_noop_page_overflow); 637 /* fallthru */ 638 default: 639 BUG(); 640 } 641 } 642 *mhdr = save_mhdr; 643 return ret; 644 } 645 646 /* 647 * Handle a gru_mesq full. 648 */ 649 static int send_message_queue_full(void *cb, struct gru_message_queue_desc *mqd, 650 void *mesg, int lines) 651 { 652 union gru_mesqhead mqh; 653 unsigned int limit, head; 654 unsigned long avalue; 655 int half, qlines; 656 657 /* Determine if switching to first/second half of q */ 658 avalue = gru_get_amo_value(cb); 659 head = gru_get_amo_value_head(cb); 660 limit = gru_get_amo_value_limit(cb); 661 662 qlines = mqd->qlines; 663 half = (limit != qlines); 664 665 if (half) 666 mqh = gru_mesq_head(qlines / 2 + 1, qlines); 667 else 668 mqh = gru_mesq_head(2, qlines / 2 + 1); 669 670 /* Try to get lock for switching head pointer */ 671 gru_gamir(cb, EOP_IR_CLR, HSTATUS(mqd->mq_gpa, half), XTYPE_DW, IMA); 672 if (gru_wait(cb) != CBS_IDLE) 673 goto cberr; 674 if (!gru_get_amo_value(cb)) { 675 STAT(mesq_qf_locked); 676 return MQE_QUEUE_FULL; 677 } 678 679 /* Got the lock. Send optional NOP if queue not full, */ 680 if (head != limit) { 681 if (send_noop_message(cb, mqd, mesg)) { 682 gru_gamir(cb, EOP_IR_INC, HSTATUS(mqd->mq_gpa, half), 683 XTYPE_DW, IMA); 684 if (gru_wait(cb) != CBS_IDLE) 685 goto cberr; 686 STAT(mesq_qf_noop_not_full); 687 return MQIE_AGAIN; 688 } 689 avalue++; 690 } 691 692 /* Then flip queuehead to other half of queue. */ 693 gru_gamer(cb, EOP_ERR_CSWAP, mqd->mq_gpa, XTYPE_DW, mqh.val, avalue, 694 IMA); 695 if (gru_wait(cb) != CBS_IDLE) 696 goto cberr; 697 698 /* If not successfully in swapping queue head, clear the hstatus lock */ 699 if (gru_get_amo_value(cb) != avalue) { 700 STAT(mesq_qf_switch_head_failed); 701 gru_gamir(cb, EOP_IR_INC, HSTATUS(mqd->mq_gpa, half), XTYPE_DW, 702 IMA); 703 if (gru_wait(cb) != CBS_IDLE) 704 goto cberr; 705 } 706 return MQIE_AGAIN; 707 cberr: 708 STAT(mesq_qf_unexpected_error); 709 return MQE_UNEXPECTED_CB_ERR; 710 } 711 712 /* 713 * Handle a PUT failure. Note: if message was a 2-line message, one of the 714 * lines might have successfully have been written. Before sending the 715 * message, "present" must be cleared in BOTH lines to prevent the receiver 716 * from prematurely seeing the full message. 717 */ 718 static int send_message_put_nacked(void *cb, struct gru_message_queue_desc *mqd, 719 void *mesg, int lines) 720 { 721 unsigned long m; 722 int ret, loops = 200; /* experimentally determined */ 723 724 m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6); 725 if (lines == 2) { 726 gru_vset(cb, m, 0, XTYPE_CL, lines, 1, IMA); 727 if (gru_wait(cb) != CBS_IDLE) 728 return MQE_UNEXPECTED_CB_ERR; 729 } 730 gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, lines, 1, IMA); 731 if (gru_wait(cb) != CBS_IDLE) 732 return MQE_UNEXPECTED_CB_ERR; 733 734 if (!mqd->interrupt_vector) 735 return MQE_OK; 736 737 /* 738 * Send a noop message in order to deliver a cross-partition interrupt 739 * to the SSI that contains the target message queue. Normally, the 740 * interrupt is automatically delivered by hardware following mesq 741 * operations, but some error conditions require explicit delivery. 742 * The noop message will trigger delivery. Otherwise partition failures 743 * could cause unrecovered errors. 744 */ 745 do { 746 ret = send_noop_message(cb, mqd, mesg); 747 } while ((ret == MQIE_AGAIN || ret == MQE_CONGESTION) && (loops-- > 0)); 748 749 if (ret == MQIE_AGAIN || ret == MQE_CONGESTION) { 750 /* 751 * Don't indicate to the app to resend the message, as it's 752 * already been successfully sent. We simply send an OK 753 * (rather than fail the send with MQE_UNEXPECTED_CB_ERR), 754 * assuming that the other side is receiving enough 755 * interrupts to get this message processed anyway. 756 */ 757 ret = MQE_OK; 758 } 759 return ret; 760 } 761 762 /* 763 * Handle a gru_mesq failure. Some of these failures are software recoverable 764 * or retryable. 765 */ 766 static int send_message_failure(void *cb, struct gru_message_queue_desc *mqd, 767 void *mesg, int lines) 768 { 769 int substatus, ret = 0; 770 771 substatus = gru_get_cb_message_queue_substatus(cb); 772 switch (substatus) { 773 case CBSS_NO_ERROR: 774 STAT(mesq_send_unexpected_error); 775 ret = MQE_UNEXPECTED_CB_ERR; 776 break; 777 case CBSS_LB_OVERFLOWED: 778 STAT(mesq_send_lb_overflow); 779 ret = MQE_CONGESTION; 780 break; 781 case CBSS_QLIMIT_REACHED: 782 STAT(mesq_send_qlimit_reached); 783 ret = send_message_queue_full(cb, mqd, mesg, lines); 784 break; 785 case CBSS_AMO_NACKED: 786 STAT(mesq_send_amo_nacked); 787 ret = MQE_CONGESTION; 788 break; 789 case CBSS_PUT_NACKED: 790 STAT(mesq_send_put_nacked); 791 ret = send_message_put_nacked(cb, mqd, mesg, lines); 792 break; 793 case CBSS_PAGE_OVERFLOW: 794 STAT(mesq_page_overflow); 795 /* fallthru */ 796 default: 797 BUG(); 798 } 799 return ret; 800 } 801 802 /* 803 * Send a message to a message queue 804 * mqd message queue descriptor 805 * mesg message. ust be vaddr within a GSEG 806 * bytes message size (<= 2 CL) 807 */ 808 int gru_send_message_gpa(struct gru_message_queue_desc *mqd, void *mesg, 809 unsigned int bytes) 810 { 811 struct message_header *mhdr; 812 void *cb; 813 void *dsr; 814 int istatus, clines, ret; 815 816 STAT(mesq_send); 817 BUG_ON(bytes < sizeof(int) || bytes > 2 * GRU_CACHE_LINE_BYTES); 818 819 clines = DIV_ROUND_UP(bytes, GRU_CACHE_LINE_BYTES); 820 if (gru_get_cpu_resources(bytes, &cb, &dsr)) 821 return MQE_BUG_NO_RESOURCES; 822 memcpy(dsr, mesg, bytes); 823 mhdr = dsr; 824 mhdr->present = MQS_FULL; 825 mhdr->lines = clines; 826 if (clines == 2) { 827 mhdr->present2 = get_present2(mhdr); 828 restore_present2(mhdr, MQS_FULL); 829 } 830 831 do { 832 ret = MQE_OK; 833 gru_mesq(cb, mqd->mq_gpa, gru_get_tri(mhdr), clines, IMA); 834 istatus = gru_wait(cb); 835 if (istatus != CBS_IDLE) 836 ret = send_message_failure(cb, mqd, dsr, clines); 837 } while (ret == MQIE_AGAIN); 838 gru_free_cpu_resources(cb, dsr); 839 840 if (ret) 841 STAT(mesq_send_failed); 842 return ret; 843 } 844 EXPORT_SYMBOL_GPL(gru_send_message_gpa); 845 846 /* 847 * Advance the receive pointer for the queue to the next message. 848 */ 849 void gru_free_message(struct gru_message_queue_desc *mqd, void *mesg) 850 { 851 struct message_queue *mq = mqd->mq; 852 struct message_header *mhdr = mq->next; 853 void *next, *pnext; 854 int half = -1; 855 int lines = mhdr->lines; 856 857 if (lines == 2) 858 restore_present2(mhdr, MQS_EMPTY); 859 mhdr->present = MQS_EMPTY; 860 861 pnext = mq->next; 862 next = pnext + GRU_CACHE_LINE_BYTES * lines; 863 if (next == mq->limit) { 864 next = mq->start; 865 half = 1; 866 } else if (pnext < mq->start2 && next >= mq->start2) { 867 half = 0; 868 } 869 870 if (half >= 0) 871 mq->hstatus[half] = 1; 872 mq->next = next; 873 } 874 EXPORT_SYMBOL_GPL(gru_free_message); 875 876 /* 877 * Get next message from message queue. Return NULL if no message 878 * present. User must call next_message() to move to next message. 879 * rmq message queue 880 */ 881 void *gru_get_next_message(struct gru_message_queue_desc *mqd) 882 { 883 struct message_queue *mq = mqd->mq; 884 struct message_header *mhdr = mq->next; 885 int present = mhdr->present; 886 887 /* skip NOOP messages */ 888 while (present == MQS_NOOP) { 889 gru_free_message(mqd, mhdr); 890 mhdr = mq->next; 891 present = mhdr->present; 892 } 893 894 /* Wait for both halves of 2 line messages */ 895 if (present == MQS_FULL && mhdr->lines == 2 && 896 get_present2(mhdr) == MQS_EMPTY) 897 present = MQS_EMPTY; 898 899 if (!present) { 900 STAT(mesq_receive_none); 901 return NULL; 902 } 903 904 if (mhdr->lines == 2) 905 restore_present2(mhdr, mhdr->present2); 906 907 STAT(mesq_receive); 908 return mhdr; 909 } 910 EXPORT_SYMBOL_GPL(gru_get_next_message); 911 912 /* ---------------------- GRU DATA COPY FUNCTIONS ---------------------------*/ 913 914 /* 915 * Load a DW from a global GPA. The GPA can be a memory or MMR address. 916 */ 917 int gru_read_gpa(unsigned long *value, unsigned long gpa) 918 { 919 void *cb; 920 void *dsr; 921 int ret, iaa; 922 923 STAT(read_gpa); 924 if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES, &cb, &dsr)) 925 return MQE_BUG_NO_RESOURCES; 926 iaa = gpa >> 62; 927 gru_vload_phys(cb, gpa, gru_get_tri(dsr), iaa, IMA); 928 ret = gru_wait(cb); 929 if (ret == CBS_IDLE) 930 *value = *(unsigned long *)dsr; 931 gru_free_cpu_resources(cb, dsr); 932 return ret; 933 } 934 EXPORT_SYMBOL_GPL(gru_read_gpa); 935 936 937 /* 938 * Copy a block of data using the GRU resources 939 */ 940 int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa, 941 unsigned int bytes) 942 { 943 void *cb; 944 void *dsr; 945 int ret; 946 947 STAT(copy_gpa); 948 if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES, &cb, &dsr)) 949 return MQE_BUG_NO_RESOURCES; 950 gru_bcopy(cb, src_gpa, dest_gpa, gru_get_tri(dsr), 951 XTYPE_B, bytes, GRU_NUM_KERNEL_DSR_CL, IMA); 952 ret = gru_wait(cb); 953 gru_free_cpu_resources(cb, dsr); 954 return ret; 955 } 956 EXPORT_SYMBOL_GPL(gru_copy_gpa); 957 958 /* ------------------- KERNEL QUICKTESTS RUN AT STARTUP ----------------*/ 959 /* Temp - will delete after we gain confidence in the GRU */ 960 961 static int quicktest0(unsigned long arg) 962 { 963 unsigned long word0; 964 unsigned long word1; 965 void *cb; 966 void *dsr; 967 unsigned long *p; 968 int ret = -EIO; 969 970 if (gru_get_cpu_resources(GRU_CACHE_LINE_BYTES, &cb, &dsr)) 971 return MQE_BUG_NO_RESOURCES; 972 p = dsr; 973 word0 = MAGIC; 974 word1 = 0; 975 976 gru_vload(cb, uv_gpa(&word0), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA); 977 if (gru_wait(cb) != CBS_IDLE) { 978 printk(KERN_DEBUG "GRU:%d quicktest0: CBR failure 1\n", smp_processor_id()); 979 goto done; 980 } 981 982 if (*p != MAGIC) { 983 printk(KERN_DEBUG "GRU:%d quicktest0 bad magic 0x%lx\n", smp_processor_id(), *p); 984 goto done; 985 } 986 gru_vstore(cb, uv_gpa(&word1), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA); 987 if (gru_wait(cb) != CBS_IDLE) { 988 printk(KERN_DEBUG "GRU:%d quicktest0: CBR failure 2\n", smp_processor_id()); 989 goto done; 990 } 991 992 if (word0 != word1 || word1 != MAGIC) { 993 printk(KERN_DEBUG 994 "GRU:%d quicktest0 err: found 0x%lx, expected 0x%lx\n", 995 smp_processor_id(), word1, MAGIC); 996 goto done; 997 } 998 ret = 0; 999 1000 done: 1001 gru_free_cpu_resources(cb, dsr); 1002 return ret; 1003 } 1004 1005 #define ALIGNUP(p, q) ((void *)(((unsigned long)(p) + (q) - 1) & ~(q - 1))) 1006 1007 static int quicktest1(unsigned long arg) 1008 { 1009 struct gru_message_queue_desc mqd; 1010 void *p, *mq; 1011 int i, ret = -EIO; 1012 char mes[GRU_CACHE_LINE_BYTES], *m; 1013 1014 /* Need 1K cacheline aligned that does not cross page boundary */ 1015 p = kmalloc(4096, 0); 1016 if (p == NULL) 1017 return -ENOMEM; 1018 mq = ALIGNUP(p, 1024); 1019 memset(mes, 0xee, sizeof(mes)); 1020 1021 gru_create_message_queue(&mqd, mq, 8 * GRU_CACHE_LINE_BYTES, 0, 0, 0); 1022 for (i = 0; i < 6; i++) { 1023 mes[8] = i; 1024 do { 1025 ret = gru_send_message_gpa(&mqd, mes, sizeof(mes)); 1026 } while (ret == MQE_CONGESTION); 1027 if (ret) 1028 break; 1029 } 1030 if (ret != MQE_QUEUE_FULL || i != 4) { 1031 printk(KERN_DEBUG "GRU:%d quicktest1: unexpect status %d, i %d\n", 1032 smp_processor_id(), ret, i); 1033 goto done; 1034 } 1035 1036 for (i = 0; i < 6; i++) { 1037 m = gru_get_next_message(&mqd); 1038 if (!m || m[8] != i) 1039 break; 1040 gru_free_message(&mqd, m); 1041 } 1042 if (i != 4) { 1043 printk(KERN_DEBUG "GRU:%d quicktest2: bad message, i %d, m %p, m8 %d\n", 1044 smp_processor_id(), i, m, m ? m[8] : -1); 1045 goto done; 1046 } 1047 ret = 0; 1048 1049 done: 1050 kfree(p); 1051 return ret; 1052 } 1053 1054 static int quicktest2(unsigned long arg) 1055 { 1056 static DECLARE_COMPLETION(cmp); 1057 unsigned long han; 1058 int blade_id = 0; 1059 int numcb = 4; 1060 int ret = 0; 1061 unsigned long *buf; 1062 void *cb0, *cb; 1063 struct gru_control_block_status *gen; 1064 int i, k, istatus, bytes; 1065 1066 bytes = numcb * 4 * 8; 1067 buf = kmalloc(bytes, GFP_KERNEL); 1068 if (!buf) 1069 return -ENOMEM; 1070 1071 ret = -EBUSY; 1072 han = gru_reserve_async_resources(blade_id, numcb, 0, &cmp); 1073 if (!han) 1074 goto done; 1075 1076 gru_lock_async_resource(han, &cb0, NULL); 1077 memset(buf, 0xee, bytes); 1078 for (i = 0; i < numcb; i++) 1079 gru_vset(cb0 + i * GRU_HANDLE_STRIDE, uv_gpa(&buf[i * 4]), 0, 1080 XTYPE_DW, 4, 1, IMA_INTERRUPT); 1081 1082 ret = 0; 1083 k = numcb; 1084 do { 1085 gru_wait_async_cbr(han); 1086 for (i = 0; i < numcb; i++) { 1087 cb = cb0 + i * GRU_HANDLE_STRIDE; 1088 istatus = gru_check_status(cb); 1089 if (istatus != CBS_ACTIVE && istatus != CBS_CALL_OS) 1090 break; 1091 } 1092 if (i == numcb) 1093 continue; 1094 if (istatus != CBS_IDLE) { 1095 printk(KERN_DEBUG "GRU:%d quicktest2: cb %d, exception\n", smp_processor_id(), i); 1096 ret = -EFAULT; 1097 } else if (buf[4 * i] || buf[4 * i + 1] || buf[4 * i + 2] || 1098 buf[4 * i + 3]) { 1099 printk(KERN_DEBUG "GRU:%d quicktest2:cb %d, buf 0x%lx, 0x%lx, 0x%lx, 0x%lx\n", 1100 smp_processor_id(), i, buf[4 * i], buf[4 * i + 1], buf[4 * i + 2], buf[4 * i + 3]); 1101 ret = -EIO; 1102 } 1103 k--; 1104 gen = cb; 1105 gen->istatus = CBS_CALL_OS; /* don't handle this CBR again */ 1106 } while (k); 1107 BUG_ON(cmp.done); 1108 1109 gru_unlock_async_resource(han); 1110 gru_release_async_resources(han); 1111 done: 1112 kfree(buf); 1113 return ret; 1114 } 1115 1116 #define BUFSIZE 200 1117 static int quicktest3(unsigned long arg) 1118 { 1119 char buf1[BUFSIZE], buf2[BUFSIZE]; 1120 int ret = 0; 1121 1122 memset(buf2, 0, sizeof(buf2)); 1123 memset(buf1, get_cycles() & 255, sizeof(buf1)); 1124 gru_copy_gpa(uv_gpa(buf2), uv_gpa(buf1), BUFSIZE); 1125 if (memcmp(buf1, buf2, BUFSIZE)) { 1126 printk(KERN_DEBUG "GRU:%d quicktest3 error\n", smp_processor_id()); 1127 ret = -EIO; 1128 } 1129 return ret; 1130 } 1131 1132 /* 1133 * Debugging only. User hook for various kernel tests 1134 * of driver & gru. 1135 */ 1136 int gru_ktest(unsigned long arg) 1137 { 1138 int ret = -EINVAL; 1139 1140 switch (arg & 0xff) { 1141 case 0: 1142 ret = quicktest0(arg); 1143 break; 1144 case 1: 1145 ret = quicktest1(arg); 1146 break; 1147 case 2: 1148 ret = quicktest2(arg); 1149 break; 1150 case 3: 1151 ret = quicktest3(arg); 1152 break; 1153 case 99: 1154 ret = gru_free_kernel_contexts(); 1155 break; 1156 } 1157 return ret; 1158 1159 } 1160 1161 int gru_kservices_init(void) 1162 { 1163 return 0; 1164 } 1165 1166 void gru_kservices_exit(void) 1167 { 1168 if (gru_free_kernel_contexts()) 1169 BUG(); 1170 } 1171 1172