1 /* 2 * SN Platform GRU Driver 3 * 4 * KERNEL SERVICES THAT USE THE GRU 5 * 6 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. 7 * 8 * This program is free software; you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License as published by 10 * the Free Software Foundation; either version 2 of the License, or 11 * (at your option) any later version. 12 * 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU General Public License for more details. 17 * 18 * You should have received a copy of the GNU General Public License 19 * along with this program; if not, write to the Free Software 20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 21 */ 22 23 #include <linux/kernel.h> 24 #include <linux/errno.h> 25 #include <linux/slab.h> 26 #include <linux/mm.h> 27 #include <linux/spinlock.h> 28 #include <linux/device.h> 29 #include <linux/miscdevice.h> 30 #include <linux/proc_fs.h> 31 #include <linux/interrupt.h> 32 #include <linux/uaccess.h> 33 #include <linux/delay.h> 34 #include "gru.h" 35 #include "grulib.h" 36 #include "grutables.h" 37 #include "grukservices.h" 38 #include "gru_instructions.h" 39 #include <asm/uv/uv_hub.h> 40 41 /* 42 * Kernel GRU Usage 43 * 44 * The following is an interim algorithm for management of kernel GRU 45 * resources. This will likely be replaced when we better understand the 46 * kernel/user requirements. 47 * 48 * Blade percpu resources reserved for kernel use. These resources are 49 * reserved whenever the the kernel context for the blade is loaded. Note 50 * that the kernel context is not guaranteed to be always available. It is 51 * loaded on demand & can be stolen by a user if the user demand exceeds the 52 * kernel demand. The kernel can always reload the kernel context but 53 * a SLEEP may be required!!!. 54 * 55 * Async Overview: 56 * 57 * Each blade has one "kernel context" that owns GRU kernel resources 58 * located on the blade. Kernel drivers use GRU resources in this context 59 * for sending messages, zeroing memory, etc. 60 * 61 * The kernel context is dynamically loaded on demand. If it is not in 62 * use by the kernel, the kernel context can be unloaded & given to a user. 63 * The kernel context will be reloaded when needed. This may require that 64 * a context be stolen from a user. 65 * NOTE: frequent unloading/reloading of the kernel context is 66 * expensive. We are depending on batch schedulers, cpusets, sane 67 * drivers or some other mechanism to prevent the need for frequent 68 * stealing/reloading. 69 * 70 * The kernel context consists of two parts: 71 * - 1 CB & a few DSRs that are reserved for each cpu on the blade. 72 * Each cpu has it's own private resources & does not share them 73 * with other cpus. These resources are used serially, ie, 74 * locked, used & unlocked on each call to a function in 75 * grukservices. 76 * (Now that we have dynamic loading of kernel contexts, I 77 * may rethink this & allow sharing between cpus....) 78 * 79 * - Additional resources can be reserved long term & used directly 80 * by UV drivers located in the kernel. Drivers using these GRU 81 * resources can use asynchronous GRU instructions that send 82 * interrupts on completion. 83 * - these resources must be explicitly locked/unlocked 84 * - locked resources prevent (obviously) the kernel 85 * context from being unloaded. 86 * - drivers using these resource directly issue their own 87 * GRU instruction and must wait/check completion. 88 * 89 * When these resources are reserved, the caller can optionally 90 * associate a wait_queue with the resources and use asynchronous 91 * GRU instructions. When an async GRU instruction completes, the 92 * driver will do a wakeup on the event. 93 * 94 */ 95 96 97 #define ASYNC_HAN_TO_BID(h) ((h) - 1) 98 #define ASYNC_BID_TO_HAN(b) ((b) + 1) 99 #define ASYNC_HAN_TO_BS(h) gru_base[ASYNC_HAN_TO_BID(h)] 100 #define KCB_TO_GID(cb) ((cb - gru_start_vaddr) / \ 101 (GRU_SIZE * GRU_CHIPLETS_PER_BLADE)) 102 #define KCB_TO_BS(cb) gru_base[KCB_TO_GID(cb)] 103 104 #define GRU_NUM_KERNEL_CBR 1 105 #define GRU_NUM_KERNEL_DSR_BYTES 256 106 #define GRU_NUM_KERNEL_DSR_CL (GRU_NUM_KERNEL_DSR_BYTES / \ 107 GRU_CACHE_LINE_BYTES) 108 109 /* GRU instruction attributes for all instructions */ 110 #define IMA IMA_CB_DELAY 111 112 /* GRU cacheline size is always 64 bytes - even on arches with 128 byte lines */ 113 #define __gru_cacheline_aligned__ \ 114 __attribute__((__aligned__(GRU_CACHE_LINE_BYTES))) 115 116 #define MAGIC 0x1234567887654321UL 117 118 /* Default retry count for GRU errors on kernel instructions */ 119 #define EXCEPTION_RETRY_LIMIT 3 120 121 /* Status of message queue sections */ 122 #define MQS_EMPTY 0 123 #define MQS_FULL 1 124 #define MQS_NOOP 2 125 126 /*----------------- RESOURCE MANAGEMENT -------------------------------------*/ 127 /* optimized for x86_64 */ 128 struct message_queue { 129 union gru_mesqhead head __gru_cacheline_aligned__; /* CL 0 */ 130 int qlines; /* DW 1 */ 131 long hstatus[2]; 132 void *next __gru_cacheline_aligned__;/* CL 1 */ 133 void *limit; 134 void *start; 135 void *start2; 136 char data ____cacheline_aligned; /* CL 2 */ 137 }; 138 139 /* First word in every message - used by mesq interface */ 140 struct message_header { 141 char present; 142 char present2; 143 char lines; 144 char fill; 145 }; 146 147 #define HSTATUS(mq, h) ((mq) + offsetof(struct message_queue, hstatus[h])) 148 149 /* 150 * Reload the blade's kernel context into a GRU chiplet. Called holding 151 * the bs_kgts_sema for READ. Will steal user contexts if necessary. 152 */ 153 static void gru_load_kernel_context(struct gru_blade_state *bs, int blade_id) 154 { 155 struct gru_state *gru; 156 struct gru_thread_state *kgts; 157 void *vaddr; 158 int ctxnum, ncpus; 159 160 up_read(&bs->bs_kgts_sema); 161 down_write(&bs->bs_kgts_sema); 162 163 if (!bs->bs_kgts) 164 bs->bs_kgts = gru_alloc_gts(NULL, 0, 0, 0, 0); 165 kgts = bs->bs_kgts; 166 167 if (!kgts->ts_gru) { 168 STAT(load_kernel_context); 169 ncpus = uv_blade_nr_possible_cpus(blade_id); 170 kgts->ts_cbr_au_count = GRU_CB_COUNT_TO_AU( 171 GRU_NUM_KERNEL_CBR * ncpus + bs->bs_async_cbrs); 172 kgts->ts_dsr_au_count = GRU_DS_BYTES_TO_AU( 173 GRU_NUM_KERNEL_DSR_BYTES * ncpus + 174 bs->bs_async_dsr_bytes); 175 while (!gru_assign_gru_context(kgts, blade_id)) { 176 msleep(1); 177 gru_steal_context(kgts, blade_id); 178 } 179 gru_load_context(kgts); 180 gru = bs->bs_kgts->ts_gru; 181 vaddr = gru->gs_gru_base_vaddr; 182 ctxnum = kgts->ts_ctxnum; 183 bs->kernel_cb = get_gseg_base_address_cb(vaddr, ctxnum, 0); 184 bs->kernel_dsr = get_gseg_base_address_ds(vaddr, ctxnum, 0); 185 } 186 downgrade_write(&bs->bs_kgts_sema); 187 } 188 189 /* 190 * Free all kernel contexts that are not currently in use. 191 * Returns 0 if all freed, else number of inuse context. 192 */ 193 static int gru_free_kernel_contexts(void) 194 { 195 struct gru_blade_state *bs; 196 struct gru_thread_state *kgts; 197 int bid, ret = 0; 198 199 for (bid = 0; bid < GRU_MAX_BLADES; bid++) { 200 bs = gru_base[bid]; 201 if (!bs) 202 continue; 203 if (down_write_trylock(&bs->bs_kgts_sema)) { 204 kgts = bs->bs_kgts; 205 if (kgts && kgts->ts_gru) 206 gru_unload_context(kgts, 0); 207 kfree(kgts); 208 bs->bs_kgts = NULL; 209 up_write(&bs->bs_kgts_sema); 210 } else { 211 ret++; 212 } 213 } 214 return ret; 215 } 216 217 /* 218 * Lock & load the kernel context for the specified blade. 219 */ 220 static struct gru_blade_state *gru_lock_kernel_context(int blade_id) 221 { 222 struct gru_blade_state *bs; 223 224 STAT(lock_kernel_context); 225 bs = gru_base[blade_id]; 226 227 down_read(&bs->bs_kgts_sema); 228 if (!bs->bs_kgts || !bs->bs_kgts->ts_gru) 229 gru_load_kernel_context(bs, blade_id); 230 return bs; 231 232 } 233 234 /* 235 * Unlock the kernel context for the specified blade. Context is not 236 * unloaded but may be stolen before next use. 237 */ 238 static void gru_unlock_kernel_context(int blade_id) 239 { 240 struct gru_blade_state *bs; 241 242 bs = gru_base[blade_id]; 243 up_read(&bs->bs_kgts_sema); 244 STAT(unlock_kernel_context); 245 } 246 247 /* 248 * Reserve & get pointers to the DSR/CBRs reserved for the current cpu. 249 * - returns with preemption disabled 250 */ 251 static int gru_get_cpu_resources(int dsr_bytes, void **cb, void **dsr) 252 { 253 struct gru_blade_state *bs; 254 int lcpu; 255 256 BUG_ON(dsr_bytes > GRU_NUM_KERNEL_DSR_BYTES); 257 preempt_disable(); 258 bs = gru_lock_kernel_context(uv_numa_blade_id()); 259 lcpu = uv_blade_processor_id(); 260 *cb = bs->kernel_cb + lcpu * GRU_HANDLE_STRIDE; 261 *dsr = bs->kernel_dsr + lcpu * GRU_NUM_KERNEL_DSR_BYTES; 262 return 0; 263 } 264 265 /* 266 * Free the current cpus reserved DSR/CBR resources. 267 */ 268 static void gru_free_cpu_resources(void *cb, void *dsr) 269 { 270 gru_unlock_kernel_context(uv_numa_blade_id()); 271 preempt_enable(); 272 } 273 274 /* 275 * Reserve GRU resources to be used asynchronously. 276 * Note: currently supports only 1 reservation per blade. 277 * 278 * input: 279 * blade_id - blade on which resources should be reserved 280 * cbrs - number of CBRs 281 * dsr_bytes - number of DSR bytes needed 282 * output: 283 * handle to identify resource 284 * (0 = async resources already reserved) 285 */ 286 unsigned long gru_reserve_async_resources(int blade_id, int cbrs, int dsr_bytes, 287 struct completion *cmp) 288 { 289 struct gru_blade_state *bs; 290 struct gru_thread_state *kgts; 291 int ret = 0; 292 293 bs = gru_base[blade_id]; 294 295 down_write(&bs->bs_kgts_sema); 296 297 /* Verify no resources already reserved */ 298 if (bs->bs_async_dsr_bytes + bs->bs_async_cbrs) 299 goto done; 300 bs->bs_async_dsr_bytes = dsr_bytes; 301 bs->bs_async_cbrs = cbrs; 302 bs->bs_async_wq = cmp; 303 kgts = bs->bs_kgts; 304 305 /* Resources changed. Unload context if already loaded */ 306 if (kgts && kgts->ts_gru) 307 gru_unload_context(kgts, 0); 308 ret = ASYNC_BID_TO_HAN(blade_id); 309 310 done: 311 up_write(&bs->bs_kgts_sema); 312 return ret; 313 } 314 315 /* 316 * Release async resources previously reserved. 317 * 318 * input: 319 * han - handle to identify resources 320 */ 321 void gru_release_async_resources(unsigned long han) 322 { 323 struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han); 324 325 down_write(&bs->bs_kgts_sema); 326 bs->bs_async_dsr_bytes = 0; 327 bs->bs_async_cbrs = 0; 328 bs->bs_async_wq = NULL; 329 up_write(&bs->bs_kgts_sema); 330 } 331 332 /* 333 * Wait for async GRU instructions to complete. 334 * 335 * input: 336 * han - handle to identify resources 337 */ 338 void gru_wait_async_cbr(unsigned long han) 339 { 340 struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han); 341 342 wait_for_completion(bs->bs_async_wq); 343 mb(); 344 } 345 346 /* 347 * Lock previous reserved async GRU resources 348 * 349 * input: 350 * han - handle to identify resources 351 * output: 352 * cb - pointer to first CBR 353 * dsr - pointer to first DSR 354 */ 355 void gru_lock_async_resource(unsigned long han, void **cb, void **dsr) 356 { 357 struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han); 358 int blade_id = ASYNC_HAN_TO_BID(han); 359 int ncpus; 360 361 gru_lock_kernel_context(blade_id); 362 ncpus = uv_blade_nr_possible_cpus(blade_id); 363 if (cb) 364 *cb = bs->kernel_cb + ncpus * GRU_HANDLE_STRIDE; 365 if (dsr) 366 *dsr = bs->kernel_dsr + ncpus * GRU_NUM_KERNEL_DSR_BYTES; 367 } 368 369 /* 370 * Unlock previous reserved async GRU resources 371 * 372 * input: 373 * han - handle to identify resources 374 */ 375 void gru_unlock_async_resource(unsigned long han) 376 { 377 int blade_id = ASYNC_HAN_TO_BID(han); 378 379 gru_unlock_kernel_context(blade_id); 380 } 381 382 /*----------------------------------------------------------------------*/ 383 int gru_get_cb_exception_detail(void *cb, 384 struct control_block_extended_exc_detail *excdet) 385 { 386 struct gru_control_block_extended *cbe; 387 struct gru_blade_state *bs; 388 int cbrnum; 389 390 bs = KCB_TO_BS(cb); 391 cbrnum = thread_cbr_number(bs->bs_kgts, get_cb_number(cb)); 392 cbe = get_cbe(GRUBASE(cb), cbrnum); 393 gru_flush_cache(cbe); /* CBE not coherent */ 394 excdet->opc = cbe->opccpy; 395 excdet->exopc = cbe->exopccpy; 396 excdet->ecause = cbe->ecause; 397 excdet->exceptdet0 = cbe->idef1upd; 398 excdet->exceptdet1 = cbe->idef3upd; 399 gru_flush_cache(cbe); 400 return 0; 401 } 402 403 char *gru_get_cb_exception_detail_str(int ret, void *cb, 404 char *buf, int size) 405 { 406 struct gru_control_block_status *gen = (void *)cb; 407 struct control_block_extended_exc_detail excdet; 408 409 if (ret > 0 && gen->istatus == CBS_EXCEPTION) { 410 gru_get_cb_exception_detail(cb, &excdet); 411 snprintf(buf, size, 412 "GRU exception: cb %p, opc %d, exopc %d, ecause 0x%x," 413 "excdet0 0x%lx, excdet1 0x%x", 414 gen, excdet.opc, excdet.exopc, excdet.ecause, 415 excdet.exceptdet0, excdet.exceptdet1); 416 } else { 417 snprintf(buf, size, "No exception"); 418 } 419 return buf; 420 } 421 422 static int gru_wait_idle_or_exception(struct gru_control_block_status *gen) 423 { 424 while (gen->istatus >= CBS_ACTIVE) { 425 cpu_relax(); 426 barrier(); 427 } 428 return gen->istatus; 429 } 430 431 static int gru_retry_exception(void *cb) 432 { 433 struct gru_control_block_status *gen = (void *)cb; 434 struct control_block_extended_exc_detail excdet; 435 int retry = EXCEPTION_RETRY_LIMIT; 436 437 while (1) { 438 if (gru_wait_idle_or_exception(gen) == CBS_IDLE) 439 return CBS_IDLE; 440 if (gru_get_cb_message_queue_substatus(cb)) 441 return CBS_EXCEPTION; 442 gru_get_cb_exception_detail(cb, &excdet); 443 if ((excdet.ecause & ~EXCEPTION_RETRY_BITS) || 444 (excdet.cbrexecstatus & CBR_EXS_ABORT_OCC)) 445 break; 446 if (retry-- == 0) 447 break; 448 gen->icmd = 1; 449 gru_flush_cache(gen); 450 } 451 return CBS_EXCEPTION; 452 } 453 454 int gru_check_status_proc(void *cb) 455 { 456 struct gru_control_block_status *gen = (void *)cb; 457 int ret; 458 459 ret = gen->istatus; 460 if (ret != CBS_EXCEPTION) 461 return ret; 462 return gru_retry_exception(cb); 463 464 } 465 466 int gru_wait_proc(void *cb) 467 { 468 struct gru_control_block_status *gen = (void *)cb; 469 int ret; 470 471 ret = gru_wait_idle_or_exception(gen); 472 if (ret == CBS_EXCEPTION) 473 ret = gru_retry_exception(cb); 474 475 return ret; 476 } 477 478 void gru_abort(int ret, void *cb, char *str) 479 { 480 char buf[GRU_EXC_STR_SIZE]; 481 482 panic("GRU FATAL ERROR: %s - %s\n", str, 483 gru_get_cb_exception_detail_str(ret, cb, buf, sizeof(buf))); 484 } 485 486 void gru_wait_abort_proc(void *cb) 487 { 488 int ret; 489 490 ret = gru_wait_proc(cb); 491 if (ret) 492 gru_abort(ret, cb, "gru_wait_abort"); 493 } 494 495 496 /*------------------------------ MESSAGE QUEUES -----------------------------*/ 497 498 /* Internal status . These are NOT returned to the user. */ 499 #define MQIE_AGAIN -1 /* try again */ 500 501 502 /* 503 * Save/restore the "present" flag that is in the second line of 2-line 504 * messages 505 */ 506 static inline int get_present2(void *p) 507 { 508 struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES; 509 return mhdr->present; 510 } 511 512 static inline void restore_present2(void *p, int val) 513 { 514 struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES; 515 mhdr->present = val; 516 } 517 518 /* 519 * Create a message queue. 520 * qlines - message queue size in cache lines. Includes 2-line header. 521 */ 522 int gru_create_message_queue(struct gru_message_queue_desc *mqd, 523 void *p, unsigned int bytes, int nasid, int vector, int apicid) 524 { 525 struct message_queue *mq = p; 526 unsigned int qlines; 527 528 qlines = bytes / GRU_CACHE_LINE_BYTES - 2; 529 memset(mq, 0, bytes); 530 mq->start = &mq->data; 531 mq->start2 = &mq->data + (qlines / 2 - 1) * GRU_CACHE_LINE_BYTES; 532 mq->next = &mq->data; 533 mq->limit = &mq->data + (qlines - 2) * GRU_CACHE_LINE_BYTES; 534 mq->qlines = qlines; 535 mq->hstatus[0] = 0; 536 mq->hstatus[1] = 1; 537 mq->head = gru_mesq_head(2, qlines / 2 + 1); 538 mqd->mq = mq; 539 mqd->mq_gpa = uv_gpa(mq); 540 mqd->qlines = qlines; 541 mqd->interrupt_pnode = UV_NASID_TO_PNODE(nasid); 542 mqd->interrupt_vector = vector; 543 mqd->interrupt_apicid = apicid; 544 return 0; 545 } 546 EXPORT_SYMBOL_GPL(gru_create_message_queue); 547 548 /* 549 * Send a NOOP message to a message queue 550 * Returns: 551 * 0 - if queue is full after the send. This is the normal case 552 * but various races can change this. 553 * -1 - if mesq sent successfully but queue not full 554 * >0 - unexpected error. MQE_xxx returned 555 */ 556 static int send_noop_message(void *cb, struct gru_message_queue_desc *mqd, 557 void *mesg) 558 { 559 const struct message_header noop_header = { 560 .present = MQS_NOOP, .lines = 1}; 561 unsigned long m; 562 int substatus, ret; 563 struct message_header save_mhdr, *mhdr = mesg; 564 565 STAT(mesq_noop); 566 save_mhdr = *mhdr; 567 *mhdr = noop_header; 568 gru_mesq(cb, mqd->mq_gpa, gru_get_tri(mhdr), 1, IMA); 569 ret = gru_wait(cb); 570 571 if (ret) { 572 substatus = gru_get_cb_message_queue_substatus(cb); 573 switch (substatus) { 574 case CBSS_NO_ERROR: 575 STAT(mesq_noop_unexpected_error); 576 ret = MQE_UNEXPECTED_CB_ERR; 577 break; 578 case CBSS_LB_OVERFLOWED: 579 STAT(mesq_noop_lb_overflow); 580 ret = MQE_CONGESTION; 581 break; 582 case CBSS_QLIMIT_REACHED: 583 STAT(mesq_noop_qlimit_reached); 584 ret = 0; 585 break; 586 case CBSS_AMO_NACKED: 587 STAT(mesq_noop_amo_nacked); 588 ret = MQE_CONGESTION; 589 break; 590 case CBSS_PUT_NACKED: 591 STAT(mesq_noop_put_nacked); 592 m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6); 593 gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, 1, 1, 594 IMA); 595 if (gru_wait(cb) == CBS_IDLE) 596 ret = MQIE_AGAIN; 597 else 598 ret = MQE_UNEXPECTED_CB_ERR; 599 break; 600 case CBSS_PAGE_OVERFLOW: 601 default: 602 BUG(); 603 } 604 } 605 *mhdr = save_mhdr; 606 return ret; 607 } 608 609 /* 610 * Handle a gru_mesq full. 611 */ 612 static int send_message_queue_full(void *cb, struct gru_message_queue_desc *mqd, 613 void *mesg, int lines) 614 { 615 union gru_mesqhead mqh; 616 unsigned int limit, head; 617 unsigned long avalue; 618 int half, qlines; 619 620 /* Determine if switching to first/second half of q */ 621 avalue = gru_get_amo_value(cb); 622 head = gru_get_amo_value_head(cb); 623 limit = gru_get_amo_value_limit(cb); 624 625 qlines = mqd->qlines; 626 half = (limit != qlines); 627 628 if (half) 629 mqh = gru_mesq_head(qlines / 2 + 1, qlines); 630 else 631 mqh = gru_mesq_head(2, qlines / 2 + 1); 632 633 /* Try to get lock for switching head pointer */ 634 gru_gamir(cb, EOP_IR_CLR, HSTATUS(mqd->mq_gpa, half), XTYPE_DW, IMA); 635 if (gru_wait(cb) != CBS_IDLE) 636 goto cberr; 637 if (!gru_get_amo_value(cb)) { 638 STAT(mesq_qf_locked); 639 return MQE_QUEUE_FULL; 640 } 641 642 /* Got the lock. Send optional NOP if queue not full, */ 643 if (head != limit) { 644 if (send_noop_message(cb, mqd, mesg)) { 645 gru_gamir(cb, EOP_IR_INC, HSTATUS(mqd->mq_gpa, half), 646 XTYPE_DW, IMA); 647 if (gru_wait(cb) != CBS_IDLE) 648 goto cberr; 649 STAT(mesq_qf_noop_not_full); 650 return MQIE_AGAIN; 651 } 652 avalue++; 653 } 654 655 /* Then flip queuehead to other half of queue. */ 656 gru_gamer(cb, EOP_ERR_CSWAP, mqd->mq_gpa, XTYPE_DW, mqh.val, avalue, 657 IMA); 658 if (gru_wait(cb) != CBS_IDLE) 659 goto cberr; 660 661 /* If not successfully in swapping queue head, clear the hstatus lock */ 662 if (gru_get_amo_value(cb) != avalue) { 663 STAT(mesq_qf_switch_head_failed); 664 gru_gamir(cb, EOP_IR_INC, HSTATUS(mqd->mq_gpa, half), XTYPE_DW, 665 IMA); 666 if (gru_wait(cb) != CBS_IDLE) 667 goto cberr; 668 } 669 return MQIE_AGAIN; 670 cberr: 671 STAT(mesq_qf_unexpected_error); 672 return MQE_UNEXPECTED_CB_ERR; 673 } 674 675 /* 676 * Send a cross-partition interrupt to the SSI that contains the target 677 * message queue. Normally, the interrupt is automatically delivered by hardware 678 * but some error conditions require explicit delivery. 679 */ 680 static void send_message_queue_interrupt(struct gru_message_queue_desc *mqd) 681 { 682 if (mqd->interrupt_vector) 683 uv_hub_send_ipi(mqd->interrupt_pnode, mqd->interrupt_apicid, 684 mqd->interrupt_vector); 685 } 686 687 /* 688 * Handle a PUT failure. Note: if message was a 2-line message, one of the 689 * lines might have successfully have been written. Before sending the 690 * message, "present" must be cleared in BOTH lines to prevent the receiver 691 * from prematurely seeing the full message. 692 */ 693 static int send_message_put_nacked(void *cb, struct gru_message_queue_desc *mqd, 694 void *mesg, int lines) 695 { 696 unsigned long m; 697 698 m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6); 699 if (lines == 2) { 700 gru_vset(cb, m, 0, XTYPE_CL, lines, 1, IMA); 701 if (gru_wait(cb) != CBS_IDLE) 702 return MQE_UNEXPECTED_CB_ERR; 703 } 704 gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, lines, 1, IMA); 705 if (gru_wait(cb) != CBS_IDLE) 706 return MQE_UNEXPECTED_CB_ERR; 707 send_message_queue_interrupt(mqd); 708 return MQE_OK; 709 } 710 711 /* 712 * Handle a gru_mesq failure. Some of these failures are software recoverable 713 * or retryable. 714 */ 715 static int send_message_failure(void *cb, struct gru_message_queue_desc *mqd, 716 void *mesg, int lines) 717 { 718 int substatus, ret = 0; 719 720 substatus = gru_get_cb_message_queue_substatus(cb); 721 switch (substatus) { 722 case CBSS_NO_ERROR: 723 STAT(mesq_send_unexpected_error); 724 ret = MQE_UNEXPECTED_CB_ERR; 725 break; 726 case CBSS_LB_OVERFLOWED: 727 STAT(mesq_send_lb_overflow); 728 ret = MQE_CONGESTION; 729 break; 730 case CBSS_QLIMIT_REACHED: 731 STAT(mesq_send_qlimit_reached); 732 ret = send_message_queue_full(cb, mqd, mesg, lines); 733 break; 734 case CBSS_AMO_NACKED: 735 STAT(mesq_send_amo_nacked); 736 ret = MQE_CONGESTION; 737 break; 738 case CBSS_PUT_NACKED: 739 STAT(mesq_send_put_nacked); 740 ret = send_message_put_nacked(cb, mqd, mesg, lines); 741 break; 742 default: 743 BUG(); 744 } 745 return ret; 746 } 747 748 /* 749 * Send a message to a message queue 750 * mqd message queue descriptor 751 * mesg message. ust be vaddr within a GSEG 752 * bytes message size (<= 2 CL) 753 */ 754 int gru_send_message_gpa(struct gru_message_queue_desc *mqd, void *mesg, 755 unsigned int bytes) 756 { 757 struct message_header *mhdr; 758 void *cb; 759 void *dsr; 760 int istatus, clines, ret; 761 762 STAT(mesq_send); 763 BUG_ON(bytes < sizeof(int) || bytes > 2 * GRU_CACHE_LINE_BYTES); 764 765 clines = DIV_ROUND_UP(bytes, GRU_CACHE_LINE_BYTES); 766 if (gru_get_cpu_resources(bytes, &cb, &dsr)) 767 return MQE_BUG_NO_RESOURCES; 768 memcpy(dsr, mesg, bytes); 769 mhdr = dsr; 770 mhdr->present = MQS_FULL; 771 mhdr->lines = clines; 772 if (clines == 2) { 773 mhdr->present2 = get_present2(mhdr); 774 restore_present2(mhdr, MQS_FULL); 775 } 776 777 do { 778 ret = MQE_OK; 779 gru_mesq(cb, mqd->mq_gpa, gru_get_tri(mhdr), clines, IMA); 780 istatus = gru_wait(cb); 781 if (istatus != CBS_IDLE) 782 ret = send_message_failure(cb, mqd, dsr, clines); 783 } while (ret == MQIE_AGAIN); 784 gru_free_cpu_resources(cb, dsr); 785 786 if (ret) 787 STAT(mesq_send_failed); 788 return ret; 789 } 790 EXPORT_SYMBOL_GPL(gru_send_message_gpa); 791 792 /* 793 * Advance the receive pointer for the queue to the next message. 794 */ 795 void gru_free_message(struct gru_message_queue_desc *mqd, void *mesg) 796 { 797 struct message_queue *mq = mqd->mq; 798 struct message_header *mhdr = mq->next; 799 void *next, *pnext; 800 int half = -1; 801 int lines = mhdr->lines; 802 803 if (lines == 2) 804 restore_present2(mhdr, MQS_EMPTY); 805 mhdr->present = MQS_EMPTY; 806 807 pnext = mq->next; 808 next = pnext + GRU_CACHE_LINE_BYTES * lines; 809 if (next == mq->limit) { 810 next = mq->start; 811 half = 1; 812 } else if (pnext < mq->start2 && next >= mq->start2) { 813 half = 0; 814 } 815 816 if (half >= 0) 817 mq->hstatus[half] = 1; 818 mq->next = next; 819 } 820 EXPORT_SYMBOL_GPL(gru_free_message); 821 822 /* 823 * Get next message from message queue. Return NULL if no message 824 * present. User must call next_message() to move to next message. 825 * rmq message queue 826 */ 827 void *gru_get_next_message(struct gru_message_queue_desc *mqd) 828 { 829 struct message_queue *mq = mqd->mq; 830 struct message_header *mhdr = mq->next; 831 int present = mhdr->present; 832 833 /* skip NOOP messages */ 834 STAT(mesq_receive); 835 while (present == MQS_NOOP) { 836 gru_free_message(mqd, mhdr); 837 mhdr = mq->next; 838 present = mhdr->present; 839 } 840 841 /* Wait for both halves of 2 line messages */ 842 if (present == MQS_FULL && mhdr->lines == 2 && 843 get_present2(mhdr) == MQS_EMPTY) 844 present = MQS_EMPTY; 845 846 if (!present) { 847 STAT(mesq_receive_none); 848 return NULL; 849 } 850 851 if (mhdr->lines == 2) 852 restore_present2(mhdr, mhdr->present2); 853 854 return mhdr; 855 } 856 EXPORT_SYMBOL_GPL(gru_get_next_message); 857 858 /* ---------------------- GRU DATA COPY FUNCTIONS ---------------------------*/ 859 860 /* 861 * Copy a block of data using the GRU resources 862 */ 863 int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa, 864 unsigned int bytes) 865 { 866 void *cb; 867 void *dsr; 868 int ret; 869 870 STAT(copy_gpa); 871 if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES, &cb, &dsr)) 872 return MQE_BUG_NO_RESOURCES; 873 gru_bcopy(cb, src_gpa, dest_gpa, gru_get_tri(dsr), 874 XTYPE_B, bytes, GRU_NUM_KERNEL_DSR_CL, IMA); 875 ret = gru_wait(cb); 876 gru_free_cpu_resources(cb, dsr); 877 return ret; 878 } 879 EXPORT_SYMBOL_GPL(gru_copy_gpa); 880 881 /* ------------------- KERNEL QUICKTESTS RUN AT STARTUP ----------------*/ 882 /* Temp - will delete after we gain confidence in the GRU */ 883 884 static int quicktest0(unsigned long arg) 885 { 886 unsigned long word0; 887 unsigned long word1; 888 void *cb; 889 void *dsr; 890 unsigned long *p; 891 int ret = -EIO; 892 893 if (gru_get_cpu_resources(GRU_CACHE_LINE_BYTES, &cb, &dsr)) 894 return MQE_BUG_NO_RESOURCES; 895 p = dsr; 896 word0 = MAGIC; 897 word1 = 0; 898 899 gru_vload(cb, uv_gpa(&word0), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA); 900 if (gru_wait(cb) != CBS_IDLE) { 901 printk(KERN_DEBUG "GRU quicktest0: CBR failure 1\n"); 902 goto done; 903 } 904 905 if (*p != MAGIC) { 906 printk(KERN_DEBUG "GRU: quicktest0 bad magic 0x%lx\n", *p); 907 goto done; 908 } 909 gru_vstore(cb, uv_gpa(&word1), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA); 910 if (gru_wait(cb) != CBS_IDLE) { 911 printk(KERN_DEBUG "GRU quicktest0: CBR failure 2\n"); 912 goto done; 913 } 914 915 if (word0 != word1 || word1 != MAGIC) { 916 printk(KERN_DEBUG 917 "GRU quicktest0 err: found 0x%lx, expected 0x%lx\n", 918 word1, MAGIC); 919 goto done; 920 } 921 ret = 0; 922 923 done: 924 gru_free_cpu_resources(cb, dsr); 925 return ret; 926 } 927 928 #define ALIGNUP(p, q) ((void *)(((unsigned long)(p) + (q) - 1) & ~(q - 1))) 929 930 static int quicktest1(unsigned long arg) 931 { 932 struct gru_message_queue_desc mqd; 933 void *p, *mq; 934 unsigned long *dw; 935 int i, ret = -EIO; 936 char mes[GRU_CACHE_LINE_BYTES], *m; 937 938 /* Need 1K cacheline aligned that does not cross page boundary */ 939 p = kmalloc(4096, 0); 940 if (p == NULL) 941 return -ENOMEM; 942 mq = ALIGNUP(p, 1024); 943 memset(mes, 0xee, sizeof(mes)); 944 dw = mq; 945 946 gru_create_message_queue(&mqd, mq, 8 * GRU_CACHE_LINE_BYTES, 0, 0, 0); 947 for (i = 0; i < 6; i++) { 948 mes[8] = i; 949 do { 950 ret = gru_send_message_gpa(&mqd, mes, sizeof(mes)); 951 } while (ret == MQE_CONGESTION); 952 if (ret) 953 break; 954 } 955 if (ret != MQE_QUEUE_FULL || i != 4) 956 goto done; 957 958 for (i = 0; i < 6; i++) { 959 m = gru_get_next_message(&mqd); 960 if (!m || m[8] != i) 961 break; 962 gru_free_message(&mqd, m); 963 } 964 ret = (i == 4) ? 0 : -EIO; 965 966 done: 967 kfree(p); 968 return ret; 969 } 970 971 static int quicktest2(unsigned long arg) 972 { 973 static DECLARE_COMPLETION(cmp); 974 unsigned long han; 975 int blade_id = 0; 976 int numcb = 4; 977 int ret = 0; 978 unsigned long *buf; 979 void *cb0, *cb; 980 int i, k, istatus, bytes; 981 982 bytes = numcb * 4 * 8; 983 buf = kmalloc(bytes, GFP_KERNEL); 984 if (!buf) 985 return -ENOMEM; 986 987 ret = -EBUSY; 988 han = gru_reserve_async_resources(blade_id, numcb, 0, &cmp); 989 if (!han) 990 goto done; 991 992 gru_lock_async_resource(han, &cb0, NULL); 993 memset(buf, 0xee, bytes); 994 for (i = 0; i < numcb; i++) 995 gru_vset(cb0 + i * GRU_HANDLE_STRIDE, uv_gpa(&buf[i * 4]), 0, 996 XTYPE_DW, 4, 1, IMA_INTERRUPT); 997 998 ret = 0; 999 for (k = 0; k < numcb; k++) { 1000 gru_wait_async_cbr(han); 1001 for (i = 0; i < numcb; i++) { 1002 cb = cb0 + i * GRU_HANDLE_STRIDE; 1003 istatus = gru_check_status(cb); 1004 if (istatus == CBS_ACTIVE) 1005 continue; 1006 if (istatus == CBS_EXCEPTION) 1007 ret = -EFAULT; 1008 else if (buf[i] || buf[i + 1] || buf[i + 2] || 1009 buf[i + 3]) 1010 ret = -EIO; 1011 } 1012 } 1013 BUG_ON(cmp.done); 1014 1015 gru_unlock_async_resource(han); 1016 gru_release_async_resources(han); 1017 done: 1018 kfree(buf); 1019 return ret; 1020 } 1021 1022 /* 1023 * Debugging only. User hook for various kernel tests 1024 * of driver & gru. 1025 */ 1026 int gru_ktest(unsigned long arg) 1027 { 1028 int ret = -EINVAL; 1029 1030 switch (arg & 0xff) { 1031 case 0: 1032 ret = quicktest0(arg); 1033 break; 1034 case 1: 1035 ret = quicktest1(arg); 1036 break; 1037 case 2: 1038 ret = quicktest2(arg); 1039 break; 1040 case 99: 1041 ret = gru_free_kernel_contexts(); 1042 break; 1043 } 1044 return ret; 1045 1046 } 1047 1048 int gru_kservices_init(void) 1049 { 1050 return 0; 1051 } 1052 1053 void gru_kservices_exit(void) 1054 { 1055 if (gru_free_kernel_contexts()) 1056 BUG(); 1057 } 1058 1059