1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * SN Platform GRU Driver 4 * 5 * KERNEL SERVICES THAT USE THE GRU 6 * 7 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. 8 */ 9 10 #include <linux/kernel.h> 11 #include <linux/errno.h> 12 #include <linux/slab.h> 13 #include <linux/mm.h> 14 #include <linux/spinlock.h> 15 #include <linux/device.h> 16 #include <linux/miscdevice.h> 17 #include <linux/proc_fs.h> 18 #include <linux/interrupt.h> 19 #include <linux/uaccess.h> 20 #include <linux/delay.h> 21 #include <linux/export.h> 22 #include <asm/io_apic.h> 23 #include "gru.h" 24 #include "grulib.h" 25 #include "grutables.h" 26 #include "grukservices.h" 27 #include "gru_instructions.h" 28 #include <asm/uv/uv_hub.h> 29 30 /* 31 * Kernel GRU Usage 32 * 33 * The following is an interim algorithm for management of kernel GRU 34 * resources. This will likely be replaced when we better understand the 35 * kernel/user requirements. 36 * 37 * Blade percpu resources reserved for kernel use. These resources are 38 * reserved whenever the the kernel context for the blade is loaded. Note 39 * that the kernel context is not guaranteed to be always available. It is 40 * loaded on demand & can be stolen by a user if the user demand exceeds the 41 * kernel demand. The kernel can always reload the kernel context but 42 * a SLEEP may be required!!!. 43 * 44 * Async Overview: 45 * 46 * Each blade has one "kernel context" that owns GRU kernel resources 47 * located on the blade. Kernel drivers use GRU resources in this context 48 * for sending messages, zeroing memory, etc. 49 * 50 * The kernel context is dynamically loaded on demand. If it is not in 51 * use by the kernel, the kernel context can be unloaded & given to a user. 52 * The kernel context will be reloaded when needed. This may require that 53 * a context be stolen from a user. 54 * NOTE: frequent unloading/reloading of the kernel context is 55 * expensive. We are depending on batch schedulers, cpusets, sane 56 * drivers or some other mechanism to prevent the need for frequent 57 * stealing/reloading. 58 * 59 * The kernel context consists of two parts: 60 * - 1 CB & a few DSRs that are reserved for each cpu on the blade. 61 * Each cpu has it's own private resources & does not share them 62 * with other cpus. These resources are used serially, ie, 63 * locked, used & unlocked on each call to a function in 64 * grukservices. 65 * (Now that we have dynamic loading of kernel contexts, I 66 * may rethink this & allow sharing between cpus....) 67 * 68 * - Additional resources can be reserved long term & used directly 69 * by UV drivers located in the kernel. Drivers using these GRU 70 * resources can use asynchronous GRU instructions that send 71 * interrupts on completion. 72 * - these resources must be explicitly locked/unlocked 73 * - locked resources prevent (obviously) the kernel 74 * context from being unloaded. 75 * - drivers using these resource directly issue their own 76 * GRU instruction and must wait/check completion. 77 * 78 * When these resources are reserved, the caller can optionally 79 * associate a wait_queue with the resources and use asynchronous 80 * GRU instructions. When an async GRU instruction completes, the 81 * driver will do a wakeup on the event. 82 * 83 */ 84 85 86 #define ASYNC_HAN_TO_BID(h) ((h) - 1) 87 #define ASYNC_BID_TO_HAN(b) ((b) + 1) 88 #define ASYNC_HAN_TO_BS(h) gru_base[ASYNC_HAN_TO_BID(h)] 89 90 #define GRU_NUM_KERNEL_CBR 1 91 #define GRU_NUM_KERNEL_DSR_BYTES 256 92 #define GRU_NUM_KERNEL_DSR_CL (GRU_NUM_KERNEL_DSR_BYTES / \ 93 GRU_CACHE_LINE_BYTES) 94 95 /* GRU instruction attributes for all instructions */ 96 #define IMA IMA_CB_DELAY 97 98 /* GRU cacheline size is always 64 bytes - even on arches with 128 byte lines */ 99 #define __gru_cacheline_aligned__ \ 100 __attribute__((__aligned__(GRU_CACHE_LINE_BYTES))) 101 102 #define MAGIC 0x1234567887654321UL 103 104 /* Default retry count for GRU errors on kernel instructions */ 105 #define EXCEPTION_RETRY_LIMIT 3 106 107 /* Status of message queue sections */ 108 #define MQS_EMPTY 0 109 #define MQS_FULL 1 110 #define MQS_NOOP 2 111 112 /*----------------- RESOURCE MANAGEMENT -------------------------------------*/ 113 /* optimized for x86_64 */ 114 struct message_queue { 115 union gru_mesqhead head __gru_cacheline_aligned__; /* CL 0 */ 116 int qlines; /* DW 1 */ 117 long hstatus[2]; 118 void *next __gru_cacheline_aligned__;/* CL 1 */ 119 void *limit; 120 void *start; 121 void *start2; 122 char data ____cacheline_aligned; /* CL 2 */ 123 }; 124 125 /* First word in every message - used by mesq interface */ 126 struct message_header { 127 char present; 128 char present2; 129 char lines; 130 char fill; 131 }; 132 133 #define HSTATUS(mq, h) ((mq) + offsetof(struct message_queue, hstatus[h])) 134 135 /* 136 * Reload the blade's kernel context into a GRU chiplet. Called holding 137 * the bs_kgts_sema for READ. Will steal user contexts if necessary. 138 */ 139 static void gru_load_kernel_context(struct gru_blade_state *bs, int blade_id) 140 { 141 struct gru_state *gru; 142 struct gru_thread_state *kgts; 143 void *vaddr; 144 int ctxnum, ncpus; 145 146 up_read(&bs->bs_kgts_sema); 147 down_write(&bs->bs_kgts_sema); 148 149 if (!bs->bs_kgts) { 150 do { 151 bs->bs_kgts = gru_alloc_gts(NULL, 0, 0, 0, 0, 0); 152 if (!IS_ERR(bs->bs_kgts)) 153 break; 154 msleep(1); 155 } while (true); 156 bs->bs_kgts->ts_user_blade_id = blade_id; 157 } 158 kgts = bs->bs_kgts; 159 160 if (!kgts->ts_gru) { 161 STAT(load_kernel_context); 162 ncpus = uv_blade_nr_possible_cpus(blade_id); 163 kgts->ts_cbr_au_count = GRU_CB_COUNT_TO_AU( 164 GRU_NUM_KERNEL_CBR * ncpus + bs->bs_async_cbrs); 165 kgts->ts_dsr_au_count = GRU_DS_BYTES_TO_AU( 166 GRU_NUM_KERNEL_DSR_BYTES * ncpus + 167 bs->bs_async_dsr_bytes); 168 while (!gru_assign_gru_context(kgts)) { 169 msleep(1); 170 gru_steal_context(kgts); 171 } 172 gru_load_context(kgts); 173 gru = bs->bs_kgts->ts_gru; 174 vaddr = gru->gs_gru_base_vaddr; 175 ctxnum = kgts->ts_ctxnum; 176 bs->kernel_cb = get_gseg_base_address_cb(vaddr, ctxnum, 0); 177 bs->kernel_dsr = get_gseg_base_address_ds(vaddr, ctxnum, 0); 178 } 179 downgrade_write(&bs->bs_kgts_sema); 180 } 181 182 /* 183 * Free all kernel contexts that are not currently in use. 184 * Returns 0 if all freed, else number of inuse context. 185 */ 186 static int gru_free_kernel_contexts(void) 187 { 188 struct gru_blade_state *bs; 189 struct gru_thread_state *kgts; 190 int bid, ret = 0; 191 192 for (bid = 0; bid < GRU_MAX_BLADES; bid++) { 193 bs = gru_base[bid]; 194 if (!bs) 195 continue; 196 197 /* Ignore busy contexts. Don't want to block here. */ 198 if (down_write_trylock(&bs->bs_kgts_sema)) { 199 kgts = bs->bs_kgts; 200 if (kgts && kgts->ts_gru) 201 gru_unload_context(kgts, 0); 202 bs->bs_kgts = NULL; 203 up_write(&bs->bs_kgts_sema); 204 kfree(kgts); 205 } else { 206 ret++; 207 } 208 } 209 return ret; 210 } 211 212 /* 213 * Lock & load the kernel context for the specified blade. 214 */ 215 static struct gru_blade_state *gru_lock_kernel_context(int blade_id) 216 { 217 struct gru_blade_state *bs; 218 int bid; 219 220 STAT(lock_kernel_context); 221 again: 222 bid = blade_id < 0 ? uv_numa_blade_id() : blade_id; 223 bs = gru_base[bid]; 224 225 /* Handle the case where migration occurred while waiting for the sema */ 226 down_read(&bs->bs_kgts_sema); 227 if (blade_id < 0 && bid != uv_numa_blade_id()) { 228 up_read(&bs->bs_kgts_sema); 229 goto again; 230 } 231 if (!bs->bs_kgts || !bs->bs_kgts->ts_gru) 232 gru_load_kernel_context(bs, bid); 233 return bs; 234 235 } 236 237 /* 238 * Unlock the kernel context for the specified blade. Context is not 239 * unloaded but may be stolen before next use. 240 */ 241 static void gru_unlock_kernel_context(int blade_id) 242 { 243 struct gru_blade_state *bs; 244 245 bs = gru_base[blade_id]; 246 up_read(&bs->bs_kgts_sema); 247 STAT(unlock_kernel_context); 248 } 249 250 /* 251 * Reserve & get pointers to the DSR/CBRs reserved for the current cpu. 252 * - returns with preemption disabled 253 */ 254 static int gru_get_cpu_resources(int dsr_bytes, void **cb, void **dsr) 255 { 256 struct gru_blade_state *bs; 257 int lcpu; 258 259 BUG_ON(dsr_bytes > GRU_NUM_KERNEL_DSR_BYTES); 260 preempt_disable(); 261 bs = gru_lock_kernel_context(-1); 262 lcpu = uv_blade_processor_id(); 263 *cb = bs->kernel_cb + lcpu * GRU_HANDLE_STRIDE; 264 *dsr = bs->kernel_dsr + lcpu * GRU_NUM_KERNEL_DSR_BYTES; 265 return 0; 266 } 267 268 /* 269 * Free the current cpus reserved DSR/CBR resources. 270 */ 271 static void gru_free_cpu_resources(void *cb, void *dsr) 272 { 273 gru_unlock_kernel_context(uv_numa_blade_id()); 274 preempt_enable(); 275 } 276 277 /* 278 * Reserve GRU resources to be used asynchronously. 279 * Note: currently supports only 1 reservation per blade. 280 * 281 * input: 282 * blade_id - blade on which resources should be reserved 283 * cbrs - number of CBRs 284 * dsr_bytes - number of DSR bytes needed 285 * output: 286 * handle to identify resource 287 * (0 = async resources already reserved) 288 */ 289 unsigned long gru_reserve_async_resources(int blade_id, int cbrs, int dsr_bytes, 290 struct completion *cmp) 291 { 292 struct gru_blade_state *bs; 293 struct gru_thread_state *kgts; 294 int ret = 0; 295 296 bs = gru_base[blade_id]; 297 298 down_write(&bs->bs_kgts_sema); 299 300 /* Verify no resources already reserved */ 301 if (bs->bs_async_dsr_bytes + bs->bs_async_cbrs) 302 goto done; 303 bs->bs_async_dsr_bytes = dsr_bytes; 304 bs->bs_async_cbrs = cbrs; 305 bs->bs_async_wq = cmp; 306 kgts = bs->bs_kgts; 307 308 /* Resources changed. Unload context if already loaded */ 309 if (kgts && kgts->ts_gru) 310 gru_unload_context(kgts, 0); 311 ret = ASYNC_BID_TO_HAN(blade_id); 312 313 done: 314 up_write(&bs->bs_kgts_sema); 315 return ret; 316 } 317 318 /* 319 * Release async resources previously reserved. 320 * 321 * input: 322 * han - handle to identify resources 323 */ 324 void gru_release_async_resources(unsigned long han) 325 { 326 struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han); 327 328 down_write(&bs->bs_kgts_sema); 329 bs->bs_async_dsr_bytes = 0; 330 bs->bs_async_cbrs = 0; 331 bs->bs_async_wq = NULL; 332 up_write(&bs->bs_kgts_sema); 333 } 334 335 /* 336 * Wait for async GRU instructions to complete. 337 * 338 * input: 339 * han - handle to identify resources 340 */ 341 void gru_wait_async_cbr(unsigned long han) 342 { 343 struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han); 344 345 wait_for_completion(bs->bs_async_wq); 346 mb(); 347 } 348 349 /* 350 * Lock previous reserved async GRU resources 351 * 352 * input: 353 * han - handle to identify resources 354 * output: 355 * cb - pointer to first CBR 356 * dsr - pointer to first DSR 357 */ 358 void gru_lock_async_resource(unsigned long han, void **cb, void **dsr) 359 { 360 struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han); 361 int blade_id = ASYNC_HAN_TO_BID(han); 362 int ncpus; 363 364 gru_lock_kernel_context(blade_id); 365 ncpus = uv_blade_nr_possible_cpus(blade_id); 366 if (cb) 367 *cb = bs->kernel_cb + ncpus * GRU_HANDLE_STRIDE; 368 if (dsr) 369 *dsr = bs->kernel_dsr + ncpus * GRU_NUM_KERNEL_DSR_BYTES; 370 } 371 372 /* 373 * Unlock previous reserved async GRU resources 374 * 375 * input: 376 * han - handle to identify resources 377 */ 378 void gru_unlock_async_resource(unsigned long han) 379 { 380 int blade_id = ASYNC_HAN_TO_BID(han); 381 382 gru_unlock_kernel_context(blade_id); 383 } 384 385 /*----------------------------------------------------------------------*/ 386 int gru_get_cb_exception_detail(void *cb, 387 struct control_block_extended_exc_detail *excdet) 388 { 389 struct gru_control_block_extended *cbe; 390 struct gru_thread_state *kgts = NULL; 391 unsigned long off; 392 int cbrnum, bid; 393 394 /* 395 * Locate kgts for cb. This algorithm is SLOW but 396 * this function is rarely called (ie., almost never). 397 * Performance does not matter. 398 */ 399 for_each_possible_blade(bid) { 400 if (!gru_base[bid]) 401 break; 402 kgts = gru_base[bid]->bs_kgts; 403 if (!kgts || !kgts->ts_gru) 404 continue; 405 off = cb - kgts->ts_gru->gs_gru_base_vaddr; 406 if (off < GRU_SIZE) 407 break; 408 kgts = NULL; 409 } 410 BUG_ON(!kgts); 411 cbrnum = thread_cbr_number(kgts, get_cb_number(cb)); 412 cbe = get_cbe(GRUBASE(cb), cbrnum); 413 gru_flush_cache(cbe); /* CBE not coherent */ 414 sync_core(); 415 excdet->opc = cbe->opccpy; 416 excdet->exopc = cbe->exopccpy; 417 excdet->ecause = cbe->ecause; 418 excdet->exceptdet0 = cbe->idef1upd; 419 excdet->exceptdet1 = cbe->idef3upd; 420 gru_flush_cache(cbe); 421 return 0; 422 } 423 424 static char *gru_get_cb_exception_detail_str(int ret, void *cb, 425 char *buf, int size) 426 { 427 struct gru_control_block_status *gen = (void *)cb; 428 struct control_block_extended_exc_detail excdet; 429 430 if (ret > 0 && gen->istatus == CBS_EXCEPTION) { 431 gru_get_cb_exception_detail(cb, &excdet); 432 snprintf(buf, size, 433 "GRU:%d exception: cb %p, opc %d, exopc %d, ecause 0x%x," 434 "excdet0 0x%lx, excdet1 0x%x", smp_processor_id(), 435 gen, excdet.opc, excdet.exopc, excdet.ecause, 436 excdet.exceptdet0, excdet.exceptdet1); 437 } else { 438 snprintf(buf, size, "No exception"); 439 } 440 return buf; 441 } 442 443 static int gru_wait_idle_or_exception(struct gru_control_block_status *gen) 444 { 445 while (gen->istatus >= CBS_ACTIVE) { 446 cpu_relax(); 447 barrier(); 448 } 449 return gen->istatus; 450 } 451 452 static int gru_retry_exception(void *cb) 453 { 454 struct gru_control_block_status *gen = (void *)cb; 455 struct control_block_extended_exc_detail excdet; 456 int retry = EXCEPTION_RETRY_LIMIT; 457 458 while (1) { 459 if (gru_wait_idle_or_exception(gen) == CBS_IDLE) 460 return CBS_IDLE; 461 if (gru_get_cb_message_queue_substatus(cb)) 462 return CBS_EXCEPTION; 463 gru_get_cb_exception_detail(cb, &excdet); 464 if ((excdet.ecause & ~EXCEPTION_RETRY_BITS) || 465 (excdet.cbrexecstatus & CBR_EXS_ABORT_OCC)) 466 break; 467 if (retry-- == 0) 468 break; 469 gen->icmd = 1; 470 gru_flush_cache(gen); 471 } 472 return CBS_EXCEPTION; 473 } 474 475 int gru_check_status_proc(void *cb) 476 { 477 struct gru_control_block_status *gen = (void *)cb; 478 int ret; 479 480 ret = gen->istatus; 481 if (ret == CBS_EXCEPTION) 482 ret = gru_retry_exception(cb); 483 rmb(); 484 return ret; 485 486 } 487 488 int gru_wait_proc(void *cb) 489 { 490 struct gru_control_block_status *gen = (void *)cb; 491 int ret; 492 493 ret = gru_wait_idle_or_exception(gen); 494 if (ret == CBS_EXCEPTION) 495 ret = gru_retry_exception(cb); 496 rmb(); 497 return ret; 498 } 499 500 static void gru_abort(int ret, void *cb, char *str) 501 { 502 char buf[GRU_EXC_STR_SIZE]; 503 504 panic("GRU FATAL ERROR: %s - %s\n", str, 505 gru_get_cb_exception_detail_str(ret, cb, buf, sizeof(buf))); 506 } 507 508 void gru_wait_abort_proc(void *cb) 509 { 510 int ret; 511 512 ret = gru_wait_proc(cb); 513 if (ret) 514 gru_abort(ret, cb, "gru_wait_abort"); 515 } 516 517 518 /*------------------------------ MESSAGE QUEUES -----------------------------*/ 519 520 /* Internal status . These are NOT returned to the user. */ 521 #define MQIE_AGAIN -1 /* try again */ 522 523 524 /* 525 * Save/restore the "present" flag that is in the second line of 2-line 526 * messages 527 */ 528 static inline int get_present2(void *p) 529 { 530 struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES; 531 return mhdr->present; 532 } 533 534 static inline void restore_present2(void *p, int val) 535 { 536 struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES; 537 mhdr->present = val; 538 } 539 540 /* 541 * Create a message queue. 542 * qlines - message queue size in cache lines. Includes 2-line header. 543 */ 544 int gru_create_message_queue(struct gru_message_queue_desc *mqd, 545 void *p, unsigned int bytes, int nasid, int vector, int apicid) 546 { 547 struct message_queue *mq = p; 548 unsigned int qlines; 549 550 qlines = bytes / GRU_CACHE_LINE_BYTES - 2; 551 memset(mq, 0, bytes); 552 mq->start = &mq->data; 553 mq->start2 = &mq->data + (qlines / 2 - 1) * GRU_CACHE_LINE_BYTES; 554 mq->next = &mq->data; 555 mq->limit = &mq->data + (qlines - 2) * GRU_CACHE_LINE_BYTES; 556 mq->qlines = qlines; 557 mq->hstatus[0] = 0; 558 mq->hstatus[1] = 1; 559 mq->head = gru_mesq_head(2, qlines / 2 + 1); 560 mqd->mq = mq; 561 mqd->mq_gpa = uv_gpa(mq); 562 mqd->qlines = qlines; 563 mqd->interrupt_pnode = nasid >> 1; 564 mqd->interrupt_vector = vector; 565 mqd->interrupt_apicid = apicid; 566 return 0; 567 } 568 EXPORT_SYMBOL_GPL(gru_create_message_queue); 569 570 /* 571 * Send a NOOP message to a message queue 572 * Returns: 573 * 0 - if queue is full after the send. This is the normal case 574 * but various races can change this. 575 * -1 - if mesq sent successfully but queue not full 576 * >0 - unexpected error. MQE_xxx returned 577 */ 578 static int send_noop_message(void *cb, struct gru_message_queue_desc *mqd, 579 void *mesg) 580 { 581 const struct message_header noop_header = { 582 .present = MQS_NOOP, .lines = 1}; 583 unsigned long m; 584 int substatus, ret; 585 struct message_header save_mhdr, *mhdr = mesg; 586 587 STAT(mesq_noop); 588 save_mhdr = *mhdr; 589 *mhdr = noop_header; 590 gru_mesq(cb, mqd->mq_gpa, gru_get_tri(mhdr), 1, IMA); 591 ret = gru_wait(cb); 592 593 if (ret) { 594 substatus = gru_get_cb_message_queue_substatus(cb); 595 switch (substatus) { 596 case CBSS_NO_ERROR: 597 STAT(mesq_noop_unexpected_error); 598 ret = MQE_UNEXPECTED_CB_ERR; 599 break; 600 case CBSS_LB_OVERFLOWED: 601 STAT(mesq_noop_lb_overflow); 602 ret = MQE_CONGESTION; 603 break; 604 case CBSS_QLIMIT_REACHED: 605 STAT(mesq_noop_qlimit_reached); 606 ret = 0; 607 break; 608 case CBSS_AMO_NACKED: 609 STAT(mesq_noop_amo_nacked); 610 ret = MQE_CONGESTION; 611 break; 612 case CBSS_PUT_NACKED: 613 STAT(mesq_noop_put_nacked); 614 m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6); 615 gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, 1, 1, 616 IMA); 617 if (gru_wait(cb) == CBS_IDLE) 618 ret = MQIE_AGAIN; 619 else 620 ret = MQE_UNEXPECTED_CB_ERR; 621 break; 622 case CBSS_PAGE_OVERFLOW: 623 STAT(mesq_noop_page_overflow); 624 /* fall through */ 625 default: 626 BUG(); 627 } 628 } 629 *mhdr = save_mhdr; 630 return ret; 631 } 632 633 /* 634 * Handle a gru_mesq full. 635 */ 636 static int send_message_queue_full(void *cb, struct gru_message_queue_desc *mqd, 637 void *mesg, int lines) 638 { 639 union gru_mesqhead mqh; 640 unsigned int limit, head; 641 unsigned long avalue; 642 int half, qlines; 643 644 /* Determine if switching to first/second half of q */ 645 avalue = gru_get_amo_value(cb); 646 head = gru_get_amo_value_head(cb); 647 limit = gru_get_amo_value_limit(cb); 648 649 qlines = mqd->qlines; 650 half = (limit != qlines); 651 652 if (half) 653 mqh = gru_mesq_head(qlines / 2 + 1, qlines); 654 else 655 mqh = gru_mesq_head(2, qlines / 2 + 1); 656 657 /* Try to get lock for switching head pointer */ 658 gru_gamir(cb, EOP_IR_CLR, HSTATUS(mqd->mq_gpa, half), XTYPE_DW, IMA); 659 if (gru_wait(cb) != CBS_IDLE) 660 goto cberr; 661 if (!gru_get_amo_value(cb)) { 662 STAT(mesq_qf_locked); 663 return MQE_QUEUE_FULL; 664 } 665 666 /* Got the lock. Send optional NOP if queue not full, */ 667 if (head != limit) { 668 if (send_noop_message(cb, mqd, mesg)) { 669 gru_gamir(cb, EOP_IR_INC, HSTATUS(mqd->mq_gpa, half), 670 XTYPE_DW, IMA); 671 if (gru_wait(cb) != CBS_IDLE) 672 goto cberr; 673 STAT(mesq_qf_noop_not_full); 674 return MQIE_AGAIN; 675 } 676 avalue++; 677 } 678 679 /* Then flip queuehead to other half of queue. */ 680 gru_gamer(cb, EOP_ERR_CSWAP, mqd->mq_gpa, XTYPE_DW, mqh.val, avalue, 681 IMA); 682 if (gru_wait(cb) != CBS_IDLE) 683 goto cberr; 684 685 /* If not successfully in swapping queue head, clear the hstatus lock */ 686 if (gru_get_amo_value(cb) != avalue) { 687 STAT(mesq_qf_switch_head_failed); 688 gru_gamir(cb, EOP_IR_INC, HSTATUS(mqd->mq_gpa, half), XTYPE_DW, 689 IMA); 690 if (gru_wait(cb) != CBS_IDLE) 691 goto cberr; 692 } 693 return MQIE_AGAIN; 694 cberr: 695 STAT(mesq_qf_unexpected_error); 696 return MQE_UNEXPECTED_CB_ERR; 697 } 698 699 /* 700 * Handle a PUT failure. Note: if message was a 2-line message, one of the 701 * lines might have successfully have been written. Before sending the 702 * message, "present" must be cleared in BOTH lines to prevent the receiver 703 * from prematurely seeing the full message. 704 */ 705 static int send_message_put_nacked(void *cb, struct gru_message_queue_desc *mqd, 706 void *mesg, int lines) 707 { 708 unsigned long m; 709 int ret, loops = 200; /* experimentally determined */ 710 711 m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6); 712 if (lines == 2) { 713 gru_vset(cb, m, 0, XTYPE_CL, lines, 1, IMA); 714 if (gru_wait(cb) != CBS_IDLE) 715 return MQE_UNEXPECTED_CB_ERR; 716 } 717 gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, lines, 1, IMA); 718 if (gru_wait(cb) != CBS_IDLE) 719 return MQE_UNEXPECTED_CB_ERR; 720 721 if (!mqd->interrupt_vector) 722 return MQE_OK; 723 724 /* 725 * Send a noop message in order to deliver a cross-partition interrupt 726 * to the SSI that contains the target message queue. Normally, the 727 * interrupt is automatically delivered by hardware following mesq 728 * operations, but some error conditions require explicit delivery. 729 * The noop message will trigger delivery. Otherwise partition failures 730 * could cause unrecovered errors. 731 */ 732 do { 733 ret = send_noop_message(cb, mqd, mesg); 734 } while ((ret == MQIE_AGAIN || ret == MQE_CONGESTION) && (loops-- > 0)); 735 736 if (ret == MQIE_AGAIN || ret == MQE_CONGESTION) { 737 /* 738 * Don't indicate to the app to resend the message, as it's 739 * already been successfully sent. We simply send an OK 740 * (rather than fail the send with MQE_UNEXPECTED_CB_ERR), 741 * assuming that the other side is receiving enough 742 * interrupts to get this message processed anyway. 743 */ 744 ret = MQE_OK; 745 } 746 return ret; 747 } 748 749 /* 750 * Handle a gru_mesq failure. Some of these failures are software recoverable 751 * or retryable. 752 */ 753 static int send_message_failure(void *cb, struct gru_message_queue_desc *mqd, 754 void *mesg, int lines) 755 { 756 int substatus, ret = 0; 757 758 substatus = gru_get_cb_message_queue_substatus(cb); 759 switch (substatus) { 760 case CBSS_NO_ERROR: 761 STAT(mesq_send_unexpected_error); 762 ret = MQE_UNEXPECTED_CB_ERR; 763 break; 764 case CBSS_LB_OVERFLOWED: 765 STAT(mesq_send_lb_overflow); 766 ret = MQE_CONGESTION; 767 break; 768 case CBSS_QLIMIT_REACHED: 769 STAT(mesq_send_qlimit_reached); 770 ret = send_message_queue_full(cb, mqd, mesg, lines); 771 break; 772 case CBSS_AMO_NACKED: 773 STAT(mesq_send_amo_nacked); 774 ret = MQE_CONGESTION; 775 break; 776 case CBSS_PUT_NACKED: 777 STAT(mesq_send_put_nacked); 778 ret = send_message_put_nacked(cb, mqd, mesg, lines); 779 break; 780 case CBSS_PAGE_OVERFLOW: 781 STAT(mesq_page_overflow); 782 /* fall through */ 783 default: 784 BUG(); 785 } 786 return ret; 787 } 788 789 /* 790 * Send a message to a message queue 791 * mqd message queue descriptor 792 * mesg message. ust be vaddr within a GSEG 793 * bytes message size (<= 2 CL) 794 */ 795 int gru_send_message_gpa(struct gru_message_queue_desc *mqd, void *mesg, 796 unsigned int bytes) 797 { 798 struct message_header *mhdr; 799 void *cb; 800 void *dsr; 801 int istatus, clines, ret; 802 803 STAT(mesq_send); 804 BUG_ON(bytes < sizeof(int) || bytes > 2 * GRU_CACHE_LINE_BYTES); 805 806 clines = DIV_ROUND_UP(bytes, GRU_CACHE_LINE_BYTES); 807 if (gru_get_cpu_resources(bytes, &cb, &dsr)) 808 return MQE_BUG_NO_RESOURCES; 809 memcpy(dsr, mesg, bytes); 810 mhdr = dsr; 811 mhdr->present = MQS_FULL; 812 mhdr->lines = clines; 813 if (clines == 2) { 814 mhdr->present2 = get_present2(mhdr); 815 restore_present2(mhdr, MQS_FULL); 816 } 817 818 do { 819 ret = MQE_OK; 820 gru_mesq(cb, mqd->mq_gpa, gru_get_tri(mhdr), clines, IMA); 821 istatus = gru_wait(cb); 822 if (istatus != CBS_IDLE) 823 ret = send_message_failure(cb, mqd, dsr, clines); 824 } while (ret == MQIE_AGAIN); 825 gru_free_cpu_resources(cb, dsr); 826 827 if (ret) 828 STAT(mesq_send_failed); 829 return ret; 830 } 831 EXPORT_SYMBOL_GPL(gru_send_message_gpa); 832 833 /* 834 * Advance the receive pointer for the queue to the next message. 835 */ 836 void gru_free_message(struct gru_message_queue_desc *mqd, void *mesg) 837 { 838 struct message_queue *mq = mqd->mq; 839 struct message_header *mhdr = mq->next; 840 void *next, *pnext; 841 int half = -1; 842 int lines = mhdr->lines; 843 844 if (lines == 2) 845 restore_present2(mhdr, MQS_EMPTY); 846 mhdr->present = MQS_EMPTY; 847 848 pnext = mq->next; 849 next = pnext + GRU_CACHE_LINE_BYTES * lines; 850 if (next == mq->limit) { 851 next = mq->start; 852 half = 1; 853 } else if (pnext < mq->start2 && next >= mq->start2) { 854 half = 0; 855 } 856 857 if (half >= 0) 858 mq->hstatus[half] = 1; 859 mq->next = next; 860 } 861 EXPORT_SYMBOL_GPL(gru_free_message); 862 863 /* 864 * Get next message from message queue. Return NULL if no message 865 * present. User must call next_message() to move to next message. 866 * rmq message queue 867 */ 868 void *gru_get_next_message(struct gru_message_queue_desc *mqd) 869 { 870 struct message_queue *mq = mqd->mq; 871 struct message_header *mhdr = mq->next; 872 int present = mhdr->present; 873 874 /* skip NOOP messages */ 875 while (present == MQS_NOOP) { 876 gru_free_message(mqd, mhdr); 877 mhdr = mq->next; 878 present = mhdr->present; 879 } 880 881 /* Wait for both halves of 2 line messages */ 882 if (present == MQS_FULL && mhdr->lines == 2 && 883 get_present2(mhdr) == MQS_EMPTY) 884 present = MQS_EMPTY; 885 886 if (!present) { 887 STAT(mesq_receive_none); 888 return NULL; 889 } 890 891 if (mhdr->lines == 2) 892 restore_present2(mhdr, mhdr->present2); 893 894 STAT(mesq_receive); 895 return mhdr; 896 } 897 EXPORT_SYMBOL_GPL(gru_get_next_message); 898 899 /* ---------------------- GRU DATA COPY FUNCTIONS ---------------------------*/ 900 901 /* 902 * Load a DW from a global GPA. The GPA can be a memory or MMR address. 903 */ 904 int gru_read_gpa(unsigned long *value, unsigned long gpa) 905 { 906 void *cb; 907 void *dsr; 908 int ret, iaa; 909 910 STAT(read_gpa); 911 if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES, &cb, &dsr)) 912 return MQE_BUG_NO_RESOURCES; 913 iaa = gpa >> 62; 914 gru_vload_phys(cb, gpa, gru_get_tri(dsr), iaa, IMA); 915 ret = gru_wait(cb); 916 if (ret == CBS_IDLE) 917 *value = *(unsigned long *)dsr; 918 gru_free_cpu_resources(cb, dsr); 919 return ret; 920 } 921 EXPORT_SYMBOL_GPL(gru_read_gpa); 922 923 924 /* 925 * Copy a block of data using the GRU resources 926 */ 927 int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa, 928 unsigned int bytes) 929 { 930 void *cb; 931 void *dsr; 932 int ret; 933 934 STAT(copy_gpa); 935 if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES, &cb, &dsr)) 936 return MQE_BUG_NO_RESOURCES; 937 gru_bcopy(cb, src_gpa, dest_gpa, gru_get_tri(dsr), 938 XTYPE_B, bytes, GRU_NUM_KERNEL_DSR_CL, IMA); 939 ret = gru_wait(cb); 940 gru_free_cpu_resources(cb, dsr); 941 return ret; 942 } 943 EXPORT_SYMBOL_GPL(gru_copy_gpa); 944 945 /* ------------------- KERNEL QUICKTESTS RUN AT STARTUP ----------------*/ 946 /* Temp - will delete after we gain confidence in the GRU */ 947 948 static int quicktest0(unsigned long arg) 949 { 950 unsigned long word0; 951 unsigned long word1; 952 void *cb; 953 void *dsr; 954 unsigned long *p; 955 int ret = -EIO; 956 957 if (gru_get_cpu_resources(GRU_CACHE_LINE_BYTES, &cb, &dsr)) 958 return MQE_BUG_NO_RESOURCES; 959 p = dsr; 960 word0 = MAGIC; 961 word1 = 0; 962 963 gru_vload(cb, uv_gpa(&word0), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA); 964 if (gru_wait(cb) != CBS_IDLE) { 965 printk(KERN_DEBUG "GRU:%d quicktest0: CBR failure 1\n", smp_processor_id()); 966 goto done; 967 } 968 969 if (*p != MAGIC) { 970 printk(KERN_DEBUG "GRU:%d quicktest0 bad magic 0x%lx\n", smp_processor_id(), *p); 971 goto done; 972 } 973 gru_vstore(cb, uv_gpa(&word1), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA); 974 if (gru_wait(cb) != CBS_IDLE) { 975 printk(KERN_DEBUG "GRU:%d quicktest0: CBR failure 2\n", smp_processor_id()); 976 goto done; 977 } 978 979 if (word0 != word1 || word1 != MAGIC) { 980 printk(KERN_DEBUG 981 "GRU:%d quicktest0 err: found 0x%lx, expected 0x%lx\n", 982 smp_processor_id(), word1, MAGIC); 983 goto done; 984 } 985 ret = 0; 986 987 done: 988 gru_free_cpu_resources(cb, dsr); 989 return ret; 990 } 991 992 #define ALIGNUP(p, q) ((void *)(((unsigned long)(p) + (q) - 1) & ~(q - 1))) 993 994 static int quicktest1(unsigned long arg) 995 { 996 struct gru_message_queue_desc mqd; 997 void *p, *mq; 998 int i, ret = -EIO; 999 char mes[GRU_CACHE_LINE_BYTES], *m; 1000 1001 /* Need 1K cacheline aligned that does not cross page boundary */ 1002 p = kmalloc(4096, 0); 1003 if (p == NULL) 1004 return -ENOMEM; 1005 mq = ALIGNUP(p, 1024); 1006 memset(mes, 0xee, sizeof(mes)); 1007 1008 gru_create_message_queue(&mqd, mq, 8 * GRU_CACHE_LINE_BYTES, 0, 0, 0); 1009 for (i = 0; i < 6; i++) { 1010 mes[8] = i; 1011 do { 1012 ret = gru_send_message_gpa(&mqd, mes, sizeof(mes)); 1013 } while (ret == MQE_CONGESTION); 1014 if (ret) 1015 break; 1016 } 1017 if (ret != MQE_QUEUE_FULL || i != 4) { 1018 printk(KERN_DEBUG "GRU:%d quicktest1: unexpect status %d, i %d\n", 1019 smp_processor_id(), ret, i); 1020 goto done; 1021 } 1022 1023 for (i = 0; i < 6; i++) { 1024 m = gru_get_next_message(&mqd); 1025 if (!m || m[8] != i) 1026 break; 1027 gru_free_message(&mqd, m); 1028 } 1029 if (i != 4) { 1030 printk(KERN_DEBUG "GRU:%d quicktest2: bad message, i %d, m %p, m8 %d\n", 1031 smp_processor_id(), i, m, m ? m[8] : -1); 1032 goto done; 1033 } 1034 ret = 0; 1035 1036 done: 1037 kfree(p); 1038 return ret; 1039 } 1040 1041 static int quicktest2(unsigned long arg) 1042 { 1043 static DECLARE_COMPLETION(cmp); 1044 unsigned long han; 1045 int blade_id = 0; 1046 int numcb = 4; 1047 int ret = 0; 1048 unsigned long *buf; 1049 void *cb0, *cb; 1050 struct gru_control_block_status *gen; 1051 int i, k, istatus, bytes; 1052 1053 bytes = numcb * 4 * 8; 1054 buf = kmalloc(bytes, GFP_KERNEL); 1055 if (!buf) 1056 return -ENOMEM; 1057 1058 ret = -EBUSY; 1059 han = gru_reserve_async_resources(blade_id, numcb, 0, &cmp); 1060 if (!han) 1061 goto done; 1062 1063 gru_lock_async_resource(han, &cb0, NULL); 1064 memset(buf, 0xee, bytes); 1065 for (i = 0; i < numcb; i++) 1066 gru_vset(cb0 + i * GRU_HANDLE_STRIDE, uv_gpa(&buf[i * 4]), 0, 1067 XTYPE_DW, 4, 1, IMA_INTERRUPT); 1068 1069 ret = 0; 1070 k = numcb; 1071 do { 1072 gru_wait_async_cbr(han); 1073 for (i = 0; i < numcb; i++) { 1074 cb = cb0 + i * GRU_HANDLE_STRIDE; 1075 istatus = gru_check_status(cb); 1076 if (istatus != CBS_ACTIVE && istatus != CBS_CALL_OS) 1077 break; 1078 } 1079 if (i == numcb) 1080 continue; 1081 if (istatus != CBS_IDLE) { 1082 printk(KERN_DEBUG "GRU:%d quicktest2: cb %d, exception\n", smp_processor_id(), i); 1083 ret = -EFAULT; 1084 } else if (buf[4 * i] || buf[4 * i + 1] || buf[4 * i + 2] || 1085 buf[4 * i + 3]) { 1086 printk(KERN_DEBUG "GRU:%d quicktest2:cb %d, buf 0x%lx, 0x%lx, 0x%lx, 0x%lx\n", 1087 smp_processor_id(), i, buf[4 * i], buf[4 * i + 1], buf[4 * i + 2], buf[4 * i + 3]); 1088 ret = -EIO; 1089 } 1090 k--; 1091 gen = cb; 1092 gen->istatus = CBS_CALL_OS; /* don't handle this CBR again */ 1093 } while (k); 1094 BUG_ON(cmp.done); 1095 1096 gru_unlock_async_resource(han); 1097 gru_release_async_resources(han); 1098 done: 1099 kfree(buf); 1100 return ret; 1101 } 1102 1103 #define BUFSIZE 200 1104 static int quicktest3(unsigned long arg) 1105 { 1106 char buf1[BUFSIZE], buf2[BUFSIZE]; 1107 int ret = 0; 1108 1109 memset(buf2, 0, sizeof(buf2)); 1110 memset(buf1, get_cycles() & 255, sizeof(buf1)); 1111 gru_copy_gpa(uv_gpa(buf2), uv_gpa(buf1), BUFSIZE); 1112 if (memcmp(buf1, buf2, BUFSIZE)) { 1113 printk(KERN_DEBUG "GRU:%d quicktest3 error\n", smp_processor_id()); 1114 ret = -EIO; 1115 } 1116 return ret; 1117 } 1118 1119 /* 1120 * Debugging only. User hook for various kernel tests 1121 * of driver & gru. 1122 */ 1123 int gru_ktest(unsigned long arg) 1124 { 1125 int ret = -EINVAL; 1126 1127 switch (arg & 0xff) { 1128 case 0: 1129 ret = quicktest0(arg); 1130 break; 1131 case 1: 1132 ret = quicktest1(arg); 1133 break; 1134 case 2: 1135 ret = quicktest2(arg); 1136 break; 1137 case 3: 1138 ret = quicktest3(arg); 1139 break; 1140 case 99: 1141 ret = gru_free_kernel_contexts(); 1142 break; 1143 } 1144 return ret; 1145 1146 } 1147 1148 int gru_kservices_init(void) 1149 { 1150 return 0; 1151 } 1152 1153 void gru_kservices_exit(void) 1154 { 1155 if (gru_free_kernel_contexts()) 1156 BUG(); 1157 } 1158 1159