1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * SN Platform GRU Driver 4 * 5 * KERNEL SERVICES THAT USE THE GRU 6 * 7 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. 8 */ 9 10 #include <linux/kernel.h> 11 #include <linux/errno.h> 12 #include <linux/slab.h> 13 #include <linux/mm.h> 14 #include <linux/spinlock.h> 15 #include <linux/device.h> 16 #include <linux/miscdevice.h> 17 #include <linux/proc_fs.h> 18 #include <linux/interrupt.h> 19 #include <linux/sync_core.h> 20 #include <linux/uaccess.h> 21 #include <linux/delay.h> 22 #include <linux/export.h> 23 #include <asm/io_apic.h> 24 #include "gru.h" 25 #include "grulib.h" 26 #include "grutables.h" 27 #include "grukservices.h" 28 #include "gru_instructions.h" 29 #include <asm/uv/uv_hub.h> 30 31 /* 32 * Kernel GRU Usage 33 * 34 * The following is an interim algorithm for management of kernel GRU 35 * resources. This will likely be replaced when we better understand the 36 * kernel/user requirements. 37 * 38 * Blade percpu resources reserved for kernel use. These resources are 39 * reserved whenever the kernel context for the blade is loaded. Note 40 * that the kernel context is not guaranteed to be always available. It is 41 * loaded on demand & can be stolen by a user if the user demand exceeds the 42 * kernel demand. The kernel can always reload the kernel context but 43 * a SLEEP may be required!!!. 44 * 45 * Async Overview: 46 * 47 * Each blade has one "kernel context" that owns GRU kernel resources 48 * located on the blade. Kernel drivers use GRU resources in this context 49 * for sending messages, zeroing memory, etc. 50 * 51 * The kernel context is dynamically loaded on demand. If it is not in 52 * use by the kernel, the kernel context can be unloaded & given to a user. 53 * The kernel context will be reloaded when needed. This may require that 54 * a context be stolen from a user. 55 * NOTE: frequent unloading/reloading of the kernel context is 56 * expensive. We are depending on batch schedulers, cpusets, sane 57 * drivers or some other mechanism to prevent the need for frequent 58 * stealing/reloading. 59 * 60 * The kernel context consists of two parts: 61 * - 1 CB & a few DSRs that are reserved for each cpu on the blade. 62 * Each cpu has it's own private resources & does not share them 63 * with other cpus. These resources are used serially, ie, 64 * locked, used & unlocked on each call to a function in 65 * grukservices. 66 * (Now that we have dynamic loading of kernel contexts, I 67 * may rethink this & allow sharing between cpus....) 68 * 69 * - Additional resources can be reserved long term & used directly 70 * by UV drivers located in the kernel. Drivers using these GRU 71 * resources can use asynchronous GRU instructions that send 72 * interrupts on completion. 73 * - these resources must be explicitly locked/unlocked 74 * - locked resources prevent (obviously) the kernel 75 * context from being unloaded. 76 * - drivers using these resource directly issue their own 77 * GRU instruction and must wait/check completion. 78 * 79 * When these resources are reserved, the caller can optionally 80 * associate a wait_queue with the resources and use asynchronous 81 * GRU instructions. When an async GRU instruction completes, the 82 * driver will do a wakeup on the event. 83 * 84 */ 85 86 87 #define ASYNC_HAN_TO_BID(h) ((h) - 1) 88 #define ASYNC_BID_TO_HAN(b) ((b) + 1) 89 #define ASYNC_HAN_TO_BS(h) gru_base[ASYNC_HAN_TO_BID(h)] 90 91 #define GRU_NUM_KERNEL_CBR 1 92 #define GRU_NUM_KERNEL_DSR_BYTES 256 93 #define GRU_NUM_KERNEL_DSR_CL (GRU_NUM_KERNEL_DSR_BYTES / \ 94 GRU_CACHE_LINE_BYTES) 95 96 /* GRU instruction attributes for all instructions */ 97 #define IMA IMA_CB_DELAY 98 99 /* GRU cacheline size is always 64 bytes - even on arches with 128 byte lines */ 100 #define __gru_cacheline_aligned__ \ 101 __attribute__((__aligned__(GRU_CACHE_LINE_BYTES))) 102 103 #define MAGIC 0x1234567887654321UL 104 105 /* Default retry count for GRU errors on kernel instructions */ 106 #define EXCEPTION_RETRY_LIMIT 3 107 108 /* Status of message queue sections */ 109 #define MQS_EMPTY 0 110 #define MQS_FULL 1 111 #define MQS_NOOP 2 112 113 /*----------------- RESOURCE MANAGEMENT -------------------------------------*/ 114 /* optimized for x86_64 */ 115 struct message_queue { 116 union gru_mesqhead head __gru_cacheline_aligned__; /* CL 0 */ 117 int qlines; /* DW 1 */ 118 long hstatus[2]; 119 void *next __gru_cacheline_aligned__;/* CL 1 */ 120 void *limit; 121 void *start; 122 void *start2; 123 char data ____cacheline_aligned; /* CL 2 */ 124 }; 125 126 /* First word in every message - used by mesq interface */ 127 struct message_header { 128 char present; 129 char present2; 130 char lines; 131 char fill; 132 }; 133 134 #define HSTATUS(mq, h) ((mq) + offsetof(struct message_queue, hstatus[h])) 135 136 /* 137 * Reload the blade's kernel context into a GRU chiplet. Called holding 138 * the bs_kgts_sema for READ. Will steal user contexts if necessary. 139 */ 140 static void gru_load_kernel_context(struct gru_blade_state *bs, int blade_id) 141 { 142 struct gru_state *gru; 143 struct gru_thread_state *kgts; 144 void *vaddr; 145 int ctxnum, ncpus; 146 147 up_read(&bs->bs_kgts_sema); 148 down_write(&bs->bs_kgts_sema); 149 150 if (!bs->bs_kgts) { 151 do { 152 bs->bs_kgts = gru_alloc_gts(NULL, 0, 0, 0, 0, 0); 153 if (!IS_ERR(bs->bs_kgts)) 154 break; 155 msleep(1); 156 } while (true); 157 bs->bs_kgts->ts_user_blade_id = blade_id; 158 } 159 kgts = bs->bs_kgts; 160 161 if (!kgts->ts_gru) { 162 STAT(load_kernel_context); 163 ncpus = uv_blade_nr_possible_cpus(blade_id); 164 kgts->ts_cbr_au_count = GRU_CB_COUNT_TO_AU( 165 GRU_NUM_KERNEL_CBR * ncpus + bs->bs_async_cbrs); 166 kgts->ts_dsr_au_count = GRU_DS_BYTES_TO_AU( 167 GRU_NUM_KERNEL_DSR_BYTES * ncpus + 168 bs->bs_async_dsr_bytes); 169 while (!gru_assign_gru_context(kgts)) { 170 msleep(1); 171 gru_steal_context(kgts); 172 } 173 gru_load_context(kgts); 174 gru = bs->bs_kgts->ts_gru; 175 vaddr = gru->gs_gru_base_vaddr; 176 ctxnum = kgts->ts_ctxnum; 177 bs->kernel_cb = get_gseg_base_address_cb(vaddr, ctxnum, 0); 178 bs->kernel_dsr = get_gseg_base_address_ds(vaddr, ctxnum, 0); 179 } 180 downgrade_write(&bs->bs_kgts_sema); 181 } 182 183 /* 184 * Free all kernel contexts that are not currently in use. 185 * Returns 0 if all freed, else number of inuse context. 186 */ 187 static int gru_free_kernel_contexts(void) 188 { 189 struct gru_blade_state *bs; 190 struct gru_thread_state *kgts; 191 int bid, ret = 0; 192 193 for (bid = 0; bid < GRU_MAX_BLADES; bid++) { 194 bs = gru_base[bid]; 195 if (!bs) 196 continue; 197 198 /* Ignore busy contexts. Don't want to block here. */ 199 if (down_write_trylock(&bs->bs_kgts_sema)) { 200 kgts = bs->bs_kgts; 201 if (kgts && kgts->ts_gru) 202 gru_unload_context(kgts, 0); 203 bs->bs_kgts = NULL; 204 up_write(&bs->bs_kgts_sema); 205 kfree(kgts); 206 } else { 207 ret++; 208 } 209 } 210 return ret; 211 } 212 213 /* 214 * Lock & load the kernel context for the specified blade. 215 */ 216 static struct gru_blade_state *gru_lock_kernel_context(int blade_id) 217 { 218 struct gru_blade_state *bs; 219 int bid; 220 221 STAT(lock_kernel_context); 222 again: 223 bid = blade_id < 0 ? uv_numa_blade_id() : blade_id; 224 bs = gru_base[bid]; 225 226 /* Handle the case where migration occurred while waiting for the sema */ 227 down_read(&bs->bs_kgts_sema); 228 if (blade_id < 0 && bid != uv_numa_blade_id()) { 229 up_read(&bs->bs_kgts_sema); 230 goto again; 231 } 232 if (!bs->bs_kgts || !bs->bs_kgts->ts_gru) 233 gru_load_kernel_context(bs, bid); 234 return bs; 235 236 } 237 238 /* 239 * Unlock the kernel context for the specified blade. Context is not 240 * unloaded but may be stolen before next use. 241 */ 242 static void gru_unlock_kernel_context(int blade_id) 243 { 244 struct gru_blade_state *bs; 245 246 bs = gru_base[blade_id]; 247 up_read(&bs->bs_kgts_sema); 248 STAT(unlock_kernel_context); 249 } 250 251 /* 252 * Reserve & get pointers to the DSR/CBRs reserved for the current cpu. 253 * - returns with preemption disabled 254 */ 255 static int gru_get_cpu_resources(int dsr_bytes, void **cb, void **dsr) 256 { 257 struct gru_blade_state *bs; 258 int lcpu; 259 260 BUG_ON(dsr_bytes > GRU_NUM_KERNEL_DSR_BYTES); 261 bs = gru_lock_kernel_context(-1); 262 lcpu = uv_blade_processor_id(); 263 *cb = bs->kernel_cb + lcpu * GRU_HANDLE_STRIDE; 264 *dsr = bs->kernel_dsr + lcpu * GRU_NUM_KERNEL_DSR_BYTES; 265 return 0; 266 } 267 268 /* 269 * Free the current cpus reserved DSR/CBR resources. 270 */ 271 static void gru_free_cpu_resources(void *cb, void *dsr) 272 { 273 gru_unlock_kernel_context(uv_numa_blade_id()); 274 } 275 276 /* 277 * Reserve GRU resources to be used asynchronously. 278 * Note: currently supports only 1 reservation per blade. 279 * 280 * input: 281 * blade_id - blade on which resources should be reserved 282 * cbrs - number of CBRs 283 * dsr_bytes - number of DSR bytes needed 284 * output: 285 * handle to identify resource 286 * (0 = async resources already reserved) 287 */ 288 unsigned long gru_reserve_async_resources(int blade_id, int cbrs, int dsr_bytes, 289 struct completion *cmp) 290 { 291 struct gru_blade_state *bs; 292 struct gru_thread_state *kgts; 293 int ret = 0; 294 295 bs = gru_base[blade_id]; 296 297 down_write(&bs->bs_kgts_sema); 298 299 /* Verify no resources already reserved */ 300 if (bs->bs_async_dsr_bytes + bs->bs_async_cbrs) 301 goto done; 302 bs->bs_async_dsr_bytes = dsr_bytes; 303 bs->bs_async_cbrs = cbrs; 304 bs->bs_async_wq = cmp; 305 kgts = bs->bs_kgts; 306 307 /* Resources changed. Unload context if already loaded */ 308 if (kgts && kgts->ts_gru) 309 gru_unload_context(kgts, 0); 310 ret = ASYNC_BID_TO_HAN(blade_id); 311 312 done: 313 up_write(&bs->bs_kgts_sema); 314 return ret; 315 } 316 317 /* 318 * Release async resources previously reserved. 319 * 320 * input: 321 * han - handle to identify resources 322 */ 323 void gru_release_async_resources(unsigned long han) 324 { 325 struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han); 326 327 down_write(&bs->bs_kgts_sema); 328 bs->bs_async_dsr_bytes = 0; 329 bs->bs_async_cbrs = 0; 330 bs->bs_async_wq = NULL; 331 up_write(&bs->bs_kgts_sema); 332 } 333 334 /* 335 * Wait for async GRU instructions to complete. 336 * 337 * input: 338 * han - handle to identify resources 339 */ 340 void gru_wait_async_cbr(unsigned long han) 341 { 342 struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han); 343 344 wait_for_completion(bs->bs_async_wq); 345 mb(); 346 } 347 348 /* 349 * Lock previous reserved async GRU resources 350 * 351 * input: 352 * han - handle to identify resources 353 * output: 354 * cb - pointer to first CBR 355 * dsr - pointer to first DSR 356 */ 357 void gru_lock_async_resource(unsigned long han, void **cb, void **dsr) 358 { 359 struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han); 360 int blade_id = ASYNC_HAN_TO_BID(han); 361 int ncpus; 362 363 gru_lock_kernel_context(blade_id); 364 ncpus = uv_blade_nr_possible_cpus(blade_id); 365 if (cb) 366 *cb = bs->kernel_cb + ncpus * GRU_HANDLE_STRIDE; 367 if (dsr) 368 *dsr = bs->kernel_dsr + ncpus * GRU_NUM_KERNEL_DSR_BYTES; 369 } 370 371 /* 372 * Unlock previous reserved async GRU resources 373 * 374 * input: 375 * han - handle to identify resources 376 */ 377 void gru_unlock_async_resource(unsigned long han) 378 { 379 int blade_id = ASYNC_HAN_TO_BID(han); 380 381 gru_unlock_kernel_context(blade_id); 382 } 383 384 /*----------------------------------------------------------------------*/ 385 int gru_get_cb_exception_detail(void *cb, 386 struct control_block_extended_exc_detail *excdet) 387 { 388 struct gru_control_block_extended *cbe; 389 struct gru_thread_state *kgts = NULL; 390 unsigned long off; 391 int cbrnum, bid; 392 393 /* 394 * Locate kgts for cb. This algorithm is SLOW but 395 * this function is rarely called (ie., almost never). 396 * Performance does not matter. 397 */ 398 for_each_possible_blade(bid) { 399 if (!gru_base[bid]) 400 break; 401 kgts = gru_base[bid]->bs_kgts; 402 if (!kgts || !kgts->ts_gru) 403 continue; 404 off = cb - kgts->ts_gru->gs_gru_base_vaddr; 405 if (off < GRU_SIZE) 406 break; 407 kgts = NULL; 408 } 409 BUG_ON(!kgts); 410 cbrnum = thread_cbr_number(kgts, get_cb_number(cb)); 411 cbe = get_cbe(GRUBASE(cb), cbrnum); 412 gru_flush_cache(cbe); /* CBE not coherent */ 413 sync_core(); 414 excdet->opc = cbe->opccpy; 415 excdet->exopc = cbe->exopccpy; 416 excdet->ecause = cbe->ecause; 417 excdet->exceptdet0 = cbe->idef1upd; 418 excdet->exceptdet1 = cbe->idef3upd; 419 gru_flush_cache(cbe); 420 return 0; 421 } 422 423 static char *gru_get_cb_exception_detail_str(int ret, void *cb, 424 char *buf, int size) 425 { 426 struct gru_control_block_status *gen = cb; 427 struct control_block_extended_exc_detail excdet; 428 429 if (ret > 0 && gen->istatus == CBS_EXCEPTION) { 430 gru_get_cb_exception_detail(cb, &excdet); 431 snprintf(buf, size, 432 "GRU:%d exception: cb %p, opc %d, exopc %d, ecause 0x%x," 433 "excdet0 0x%lx, excdet1 0x%x", smp_processor_id(), 434 gen, excdet.opc, excdet.exopc, excdet.ecause, 435 excdet.exceptdet0, excdet.exceptdet1); 436 } else { 437 snprintf(buf, size, "No exception"); 438 } 439 return buf; 440 } 441 442 static int gru_wait_idle_or_exception(struct gru_control_block_status *gen) 443 { 444 while (gen->istatus >= CBS_ACTIVE) { 445 cpu_relax(); 446 barrier(); 447 } 448 return gen->istatus; 449 } 450 451 static int gru_retry_exception(void *cb) 452 { 453 struct gru_control_block_status *gen = cb; 454 struct control_block_extended_exc_detail excdet; 455 int retry = EXCEPTION_RETRY_LIMIT; 456 457 while (1) { 458 if (gru_wait_idle_or_exception(gen) == CBS_IDLE) 459 return CBS_IDLE; 460 if (gru_get_cb_message_queue_substatus(cb)) 461 return CBS_EXCEPTION; 462 gru_get_cb_exception_detail(cb, &excdet); 463 if ((excdet.ecause & ~EXCEPTION_RETRY_BITS) || 464 (excdet.cbrexecstatus & CBR_EXS_ABORT_OCC)) 465 break; 466 if (retry-- == 0) 467 break; 468 gen->icmd = 1; 469 gru_flush_cache(gen); 470 } 471 return CBS_EXCEPTION; 472 } 473 474 int gru_check_status_proc(void *cb) 475 { 476 struct gru_control_block_status *gen = cb; 477 int ret; 478 479 ret = gen->istatus; 480 if (ret == CBS_EXCEPTION) 481 ret = gru_retry_exception(cb); 482 rmb(); 483 return ret; 484 485 } 486 487 int gru_wait_proc(void *cb) 488 { 489 struct gru_control_block_status *gen = cb; 490 int ret; 491 492 ret = gru_wait_idle_or_exception(gen); 493 if (ret == CBS_EXCEPTION) 494 ret = gru_retry_exception(cb); 495 rmb(); 496 return ret; 497 } 498 499 static void gru_abort(int ret, void *cb, char *str) 500 { 501 char buf[GRU_EXC_STR_SIZE]; 502 503 panic("GRU FATAL ERROR: %s - %s\n", str, 504 gru_get_cb_exception_detail_str(ret, cb, buf, sizeof(buf))); 505 } 506 507 void gru_wait_abort_proc(void *cb) 508 { 509 int ret; 510 511 ret = gru_wait_proc(cb); 512 if (ret) 513 gru_abort(ret, cb, "gru_wait_abort"); 514 } 515 516 517 /*------------------------------ MESSAGE QUEUES -----------------------------*/ 518 519 /* Internal status . These are NOT returned to the user. */ 520 #define MQIE_AGAIN -1 /* try again */ 521 522 523 /* 524 * Save/restore the "present" flag that is in the second line of 2-line 525 * messages 526 */ 527 static inline int get_present2(void *p) 528 { 529 struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES; 530 return mhdr->present; 531 } 532 533 static inline void restore_present2(void *p, int val) 534 { 535 struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES; 536 mhdr->present = val; 537 } 538 539 /* 540 * Create a message queue. 541 * qlines - message queue size in cache lines. Includes 2-line header. 542 */ 543 int gru_create_message_queue(struct gru_message_queue_desc *mqd, 544 void *p, unsigned int bytes, int nasid, int vector, int apicid) 545 { 546 struct message_queue *mq = p; 547 unsigned int qlines; 548 549 qlines = bytes / GRU_CACHE_LINE_BYTES - 2; 550 memset(mq, 0, bytes); 551 mq->start = &mq->data; 552 mq->start2 = &mq->data + (qlines / 2 - 1) * GRU_CACHE_LINE_BYTES; 553 mq->next = &mq->data; 554 mq->limit = &mq->data + (qlines - 2) * GRU_CACHE_LINE_BYTES; 555 mq->qlines = qlines; 556 mq->hstatus[0] = 0; 557 mq->hstatus[1] = 1; 558 mq->head = gru_mesq_head(2, qlines / 2 + 1); 559 mqd->mq = mq; 560 mqd->mq_gpa = uv_gpa(mq); 561 mqd->qlines = qlines; 562 mqd->interrupt_pnode = nasid >> 1; 563 mqd->interrupt_vector = vector; 564 mqd->interrupt_apicid = apicid; 565 return 0; 566 } 567 EXPORT_SYMBOL_GPL(gru_create_message_queue); 568 569 /* 570 * Send a NOOP message to a message queue 571 * Returns: 572 * 0 - if queue is full after the send. This is the normal case 573 * but various races can change this. 574 * -1 - if mesq sent successfully but queue not full 575 * >0 - unexpected error. MQE_xxx returned 576 */ 577 static int send_noop_message(void *cb, struct gru_message_queue_desc *mqd, 578 void *mesg) 579 { 580 const struct message_header noop_header = { 581 .present = MQS_NOOP, .lines = 1}; 582 unsigned long m; 583 int substatus, ret; 584 struct message_header save_mhdr, *mhdr = mesg; 585 586 STAT(mesq_noop); 587 save_mhdr = *mhdr; 588 *mhdr = noop_header; 589 gru_mesq(cb, mqd->mq_gpa, gru_get_tri(mhdr), 1, IMA); 590 ret = gru_wait(cb); 591 592 if (ret) { 593 substatus = gru_get_cb_message_queue_substatus(cb); 594 switch (substatus) { 595 case CBSS_NO_ERROR: 596 STAT(mesq_noop_unexpected_error); 597 ret = MQE_UNEXPECTED_CB_ERR; 598 break; 599 case CBSS_LB_OVERFLOWED: 600 STAT(mesq_noop_lb_overflow); 601 ret = MQE_CONGESTION; 602 break; 603 case CBSS_QLIMIT_REACHED: 604 STAT(mesq_noop_qlimit_reached); 605 ret = 0; 606 break; 607 case CBSS_AMO_NACKED: 608 STAT(mesq_noop_amo_nacked); 609 ret = MQE_CONGESTION; 610 break; 611 case CBSS_PUT_NACKED: 612 STAT(mesq_noop_put_nacked); 613 m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6); 614 gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, 1, 1, 615 IMA); 616 if (gru_wait(cb) == CBS_IDLE) 617 ret = MQIE_AGAIN; 618 else 619 ret = MQE_UNEXPECTED_CB_ERR; 620 break; 621 case CBSS_PAGE_OVERFLOW: 622 STAT(mesq_noop_page_overflow); 623 fallthrough; 624 default: 625 BUG(); 626 } 627 } 628 *mhdr = save_mhdr; 629 return ret; 630 } 631 632 /* 633 * Handle a gru_mesq full. 634 */ 635 static int send_message_queue_full(void *cb, struct gru_message_queue_desc *mqd, 636 void *mesg, int lines) 637 { 638 union gru_mesqhead mqh; 639 unsigned int limit, head; 640 unsigned long avalue; 641 int half, qlines; 642 643 /* Determine if switching to first/second half of q */ 644 avalue = gru_get_amo_value(cb); 645 head = gru_get_amo_value_head(cb); 646 limit = gru_get_amo_value_limit(cb); 647 648 qlines = mqd->qlines; 649 half = (limit != qlines); 650 651 if (half) 652 mqh = gru_mesq_head(qlines / 2 + 1, qlines); 653 else 654 mqh = gru_mesq_head(2, qlines / 2 + 1); 655 656 /* Try to get lock for switching head pointer */ 657 gru_gamir(cb, EOP_IR_CLR, HSTATUS(mqd->mq_gpa, half), XTYPE_DW, IMA); 658 if (gru_wait(cb) != CBS_IDLE) 659 goto cberr; 660 if (!gru_get_amo_value(cb)) { 661 STAT(mesq_qf_locked); 662 return MQE_QUEUE_FULL; 663 } 664 665 /* Got the lock. Send optional NOP if queue not full, */ 666 if (head != limit) { 667 if (send_noop_message(cb, mqd, mesg)) { 668 gru_gamir(cb, EOP_IR_INC, HSTATUS(mqd->mq_gpa, half), 669 XTYPE_DW, IMA); 670 if (gru_wait(cb) != CBS_IDLE) 671 goto cberr; 672 STAT(mesq_qf_noop_not_full); 673 return MQIE_AGAIN; 674 } 675 avalue++; 676 } 677 678 /* Then flip queuehead to other half of queue. */ 679 gru_gamer(cb, EOP_ERR_CSWAP, mqd->mq_gpa, XTYPE_DW, mqh.val, avalue, 680 IMA); 681 if (gru_wait(cb) != CBS_IDLE) 682 goto cberr; 683 684 /* If not successfully in swapping queue head, clear the hstatus lock */ 685 if (gru_get_amo_value(cb) != avalue) { 686 STAT(mesq_qf_switch_head_failed); 687 gru_gamir(cb, EOP_IR_INC, HSTATUS(mqd->mq_gpa, half), XTYPE_DW, 688 IMA); 689 if (gru_wait(cb) != CBS_IDLE) 690 goto cberr; 691 } 692 return MQIE_AGAIN; 693 cberr: 694 STAT(mesq_qf_unexpected_error); 695 return MQE_UNEXPECTED_CB_ERR; 696 } 697 698 /* 699 * Handle a PUT failure. Note: if message was a 2-line message, one of the 700 * lines might have successfully have been written. Before sending the 701 * message, "present" must be cleared in BOTH lines to prevent the receiver 702 * from prematurely seeing the full message. 703 */ 704 static int send_message_put_nacked(void *cb, struct gru_message_queue_desc *mqd, 705 void *mesg, int lines) 706 { 707 unsigned long m; 708 int ret, loops = 200; /* experimentally determined */ 709 710 m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6); 711 if (lines == 2) { 712 gru_vset(cb, m, 0, XTYPE_CL, lines, 1, IMA); 713 if (gru_wait(cb) != CBS_IDLE) 714 return MQE_UNEXPECTED_CB_ERR; 715 } 716 gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, lines, 1, IMA); 717 if (gru_wait(cb) != CBS_IDLE) 718 return MQE_UNEXPECTED_CB_ERR; 719 720 if (!mqd->interrupt_vector) 721 return MQE_OK; 722 723 /* 724 * Send a noop message in order to deliver a cross-partition interrupt 725 * to the SSI that contains the target message queue. Normally, the 726 * interrupt is automatically delivered by hardware following mesq 727 * operations, but some error conditions require explicit delivery. 728 * The noop message will trigger delivery. Otherwise partition failures 729 * could cause unrecovered errors. 730 */ 731 do { 732 ret = send_noop_message(cb, mqd, mesg); 733 } while ((ret == MQIE_AGAIN || ret == MQE_CONGESTION) && (loops-- > 0)); 734 735 if (ret == MQIE_AGAIN || ret == MQE_CONGESTION) { 736 /* 737 * Don't indicate to the app to resend the message, as it's 738 * already been successfully sent. We simply send an OK 739 * (rather than fail the send with MQE_UNEXPECTED_CB_ERR), 740 * assuming that the other side is receiving enough 741 * interrupts to get this message processed anyway. 742 */ 743 ret = MQE_OK; 744 } 745 return ret; 746 } 747 748 /* 749 * Handle a gru_mesq failure. Some of these failures are software recoverable 750 * or retryable. 751 */ 752 static int send_message_failure(void *cb, struct gru_message_queue_desc *mqd, 753 void *mesg, int lines) 754 { 755 int substatus, ret = 0; 756 757 substatus = gru_get_cb_message_queue_substatus(cb); 758 switch (substatus) { 759 case CBSS_NO_ERROR: 760 STAT(mesq_send_unexpected_error); 761 ret = MQE_UNEXPECTED_CB_ERR; 762 break; 763 case CBSS_LB_OVERFLOWED: 764 STAT(mesq_send_lb_overflow); 765 ret = MQE_CONGESTION; 766 break; 767 case CBSS_QLIMIT_REACHED: 768 STAT(mesq_send_qlimit_reached); 769 ret = send_message_queue_full(cb, mqd, mesg, lines); 770 break; 771 case CBSS_AMO_NACKED: 772 STAT(mesq_send_amo_nacked); 773 ret = MQE_CONGESTION; 774 break; 775 case CBSS_PUT_NACKED: 776 STAT(mesq_send_put_nacked); 777 ret = send_message_put_nacked(cb, mqd, mesg, lines); 778 break; 779 case CBSS_PAGE_OVERFLOW: 780 STAT(mesq_page_overflow); 781 fallthrough; 782 default: 783 BUG(); 784 } 785 return ret; 786 } 787 788 /* 789 * Send a message to a message queue 790 * mqd message queue descriptor 791 * mesg message. ust be vaddr within a GSEG 792 * bytes message size (<= 2 CL) 793 */ 794 int gru_send_message_gpa(struct gru_message_queue_desc *mqd, void *mesg, 795 unsigned int bytes) 796 { 797 struct message_header *mhdr; 798 void *cb; 799 void *dsr; 800 int istatus, clines, ret; 801 802 STAT(mesq_send); 803 BUG_ON(bytes < sizeof(int) || bytes > 2 * GRU_CACHE_LINE_BYTES); 804 805 clines = DIV_ROUND_UP(bytes, GRU_CACHE_LINE_BYTES); 806 if (gru_get_cpu_resources(bytes, &cb, &dsr)) 807 return MQE_BUG_NO_RESOURCES; 808 memcpy(dsr, mesg, bytes); 809 mhdr = dsr; 810 mhdr->present = MQS_FULL; 811 mhdr->lines = clines; 812 if (clines == 2) { 813 mhdr->present2 = get_present2(mhdr); 814 restore_present2(mhdr, MQS_FULL); 815 } 816 817 do { 818 ret = MQE_OK; 819 gru_mesq(cb, mqd->mq_gpa, gru_get_tri(mhdr), clines, IMA); 820 istatus = gru_wait(cb); 821 if (istatus != CBS_IDLE) 822 ret = send_message_failure(cb, mqd, dsr, clines); 823 } while (ret == MQIE_AGAIN); 824 gru_free_cpu_resources(cb, dsr); 825 826 if (ret) 827 STAT(mesq_send_failed); 828 return ret; 829 } 830 EXPORT_SYMBOL_GPL(gru_send_message_gpa); 831 832 /* 833 * Advance the receive pointer for the queue to the next message. 834 */ 835 void gru_free_message(struct gru_message_queue_desc *mqd, void *mesg) 836 { 837 struct message_queue *mq = mqd->mq; 838 struct message_header *mhdr = mq->next; 839 void *next, *pnext; 840 int half = -1; 841 int lines = mhdr->lines; 842 843 if (lines == 2) 844 restore_present2(mhdr, MQS_EMPTY); 845 mhdr->present = MQS_EMPTY; 846 847 pnext = mq->next; 848 next = pnext + GRU_CACHE_LINE_BYTES * lines; 849 if (next == mq->limit) { 850 next = mq->start; 851 half = 1; 852 } else if (pnext < mq->start2 && next >= mq->start2) { 853 half = 0; 854 } 855 856 if (half >= 0) 857 mq->hstatus[half] = 1; 858 mq->next = next; 859 } 860 EXPORT_SYMBOL_GPL(gru_free_message); 861 862 /* 863 * Get next message from message queue. Return NULL if no message 864 * present. User must call next_message() to move to next message. 865 * rmq message queue 866 */ 867 void *gru_get_next_message(struct gru_message_queue_desc *mqd) 868 { 869 struct message_queue *mq = mqd->mq; 870 struct message_header *mhdr = mq->next; 871 int present = mhdr->present; 872 873 /* skip NOOP messages */ 874 while (present == MQS_NOOP) { 875 gru_free_message(mqd, mhdr); 876 mhdr = mq->next; 877 present = mhdr->present; 878 } 879 880 /* Wait for both halves of 2 line messages */ 881 if (present == MQS_FULL && mhdr->lines == 2 && 882 get_present2(mhdr) == MQS_EMPTY) 883 present = MQS_EMPTY; 884 885 if (!present) { 886 STAT(mesq_receive_none); 887 return NULL; 888 } 889 890 if (mhdr->lines == 2) 891 restore_present2(mhdr, mhdr->present2); 892 893 STAT(mesq_receive); 894 return mhdr; 895 } 896 EXPORT_SYMBOL_GPL(gru_get_next_message); 897 898 /* ---------------------- GRU DATA COPY FUNCTIONS ---------------------------*/ 899 900 /* 901 * Load a DW from a global GPA. The GPA can be a memory or MMR address. 902 */ 903 int gru_read_gpa(unsigned long *value, unsigned long gpa) 904 { 905 void *cb; 906 void *dsr; 907 int ret, iaa; 908 909 STAT(read_gpa); 910 if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES, &cb, &dsr)) 911 return MQE_BUG_NO_RESOURCES; 912 iaa = gpa >> 62; 913 gru_vload_phys(cb, gpa, gru_get_tri(dsr), iaa, IMA); 914 ret = gru_wait(cb); 915 if (ret == CBS_IDLE) 916 *value = *(unsigned long *)dsr; 917 gru_free_cpu_resources(cb, dsr); 918 return ret; 919 } 920 EXPORT_SYMBOL_GPL(gru_read_gpa); 921 922 923 /* 924 * Copy a block of data using the GRU resources 925 */ 926 int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa, 927 unsigned int bytes) 928 { 929 void *cb; 930 void *dsr; 931 int ret; 932 933 STAT(copy_gpa); 934 if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES, &cb, &dsr)) 935 return MQE_BUG_NO_RESOURCES; 936 gru_bcopy(cb, src_gpa, dest_gpa, gru_get_tri(dsr), 937 XTYPE_B, bytes, GRU_NUM_KERNEL_DSR_CL, IMA); 938 ret = gru_wait(cb); 939 gru_free_cpu_resources(cb, dsr); 940 return ret; 941 } 942 EXPORT_SYMBOL_GPL(gru_copy_gpa); 943 944 /* ------------------- KERNEL QUICKTESTS RUN AT STARTUP ----------------*/ 945 /* Temp - will delete after we gain confidence in the GRU */ 946 947 static int quicktest0(unsigned long arg) 948 { 949 unsigned long word0; 950 unsigned long word1; 951 void *cb; 952 void *dsr; 953 unsigned long *p; 954 int ret = -EIO; 955 956 if (gru_get_cpu_resources(GRU_CACHE_LINE_BYTES, &cb, &dsr)) 957 return MQE_BUG_NO_RESOURCES; 958 p = dsr; 959 word0 = MAGIC; 960 word1 = 0; 961 962 gru_vload(cb, uv_gpa(&word0), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA); 963 if (gru_wait(cb) != CBS_IDLE) { 964 printk(KERN_DEBUG "GRU:%d quicktest0: CBR failure 1\n", smp_processor_id()); 965 goto done; 966 } 967 968 if (*p != MAGIC) { 969 printk(KERN_DEBUG "GRU:%d quicktest0 bad magic 0x%lx\n", smp_processor_id(), *p); 970 goto done; 971 } 972 gru_vstore(cb, uv_gpa(&word1), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA); 973 if (gru_wait(cb) != CBS_IDLE) { 974 printk(KERN_DEBUG "GRU:%d quicktest0: CBR failure 2\n", smp_processor_id()); 975 goto done; 976 } 977 978 if (word0 != word1 || word1 != MAGIC) { 979 printk(KERN_DEBUG 980 "GRU:%d quicktest0 err: found 0x%lx, expected 0x%lx\n", 981 smp_processor_id(), word1, MAGIC); 982 goto done; 983 } 984 ret = 0; 985 986 done: 987 gru_free_cpu_resources(cb, dsr); 988 return ret; 989 } 990 991 #define ALIGNUP(p, q) ((void *)(((unsigned long)(p) + (q) - 1) & ~(q - 1))) 992 993 static int quicktest1(unsigned long arg) 994 { 995 struct gru_message_queue_desc mqd; 996 void *p, *mq; 997 int i, ret = -EIO; 998 char mes[GRU_CACHE_LINE_BYTES], *m; 999 1000 /* Need 1K cacheline aligned that does not cross page boundary */ 1001 p = kmalloc(4096, 0); 1002 if (p == NULL) 1003 return -ENOMEM; 1004 mq = ALIGNUP(p, 1024); 1005 memset(mes, 0xee, sizeof(mes)); 1006 1007 gru_create_message_queue(&mqd, mq, 8 * GRU_CACHE_LINE_BYTES, 0, 0, 0); 1008 for (i = 0; i < 6; i++) { 1009 mes[8] = i; 1010 do { 1011 ret = gru_send_message_gpa(&mqd, mes, sizeof(mes)); 1012 } while (ret == MQE_CONGESTION); 1013 if (ret) 1014 break; 1015 } 1016 if (ret != MQE_QUEUE_FULL || i != 4) { 1017 printk(KERN_DEBUG "GRU:%d quicktest1: unexpected status %d, i %d\n", 1018 smp_processor_id(), ret, i); 1019 goto done; 1020 } 1021 1022 for (i = 0; i < 6; i++) { 1023 m = gru_get_next_message(&mqd); 1024 if (!m || m[8] != i) 1025 break; 1026 gru_free_message(&mqd, m); 1027 } 1028 if (i != 4) { 1029 printk(KERN_DEBUG "GRU:%d quicktest2: bad message, i %d, m %p, m8 %d\n", 1030 smp_processor_id(), i, m, m ? m[8] : -1); 1031 goto done; 1032 } 1033 ret = 0; 1034 1035 done: 1036 kfree(p); 1037 return ret; 1038 } 1039 1040 static int quicktest2(unsigned long arg) 1041 { 1042 static DECLARE_COMPLETION(cmp); 1043 unsigned long han; 1044 int blade_id = 0; 1045 int numcb = 4; 1046 int ret = 0; 1047 unsigned long *buf; 1048 void *cb0, *cb; 1049 struct gru_control_block_status *gen; 1050 int i, k, istatus, bytes; 1051 1052 bytes = numcb * 4 * 8; 1053 buf = kmalloc(bytes, GFP_KERNEL); 1054 if (!buf) 1055 return -ENOMEM; 1056 1057 ret = -EBUSY; 1058 han = gru_reserve_async_resources(blade_id, numcb, 0, &cmp); 1059 if (!han) 1060 goto done; 1061 1062 gru_lock_async_resource(han, &cb0, NULL); 1063 memset(buf, 0xee, bytes); 1064 for (i = 0; i < numcb; i++) 1065 gru_vset(cb0 + i * GRU_HANDLE_STRIDE, uv_gpa(&buf[i * 4]), 0, 1066 XTYPE_DW, 4, 1, IMA_INTERRUPT); 1067 1068 ret = 0; 1069 k = numcb; 1070 do { 1071 gru_wait_async_cbr(han); 1072 for (i = 0; i < numcb; i++) { 1073 cb = cb0 + i * GRU_HANDLE_STRIDE; 1074 istatus = gru_check_status(cb); 1075 if (istatus != CBS_ACTIVE && istatus != CBS_CALL_OS) 1076 break; 1077 } 1078 if (i == numcb) 1079 continue; 1080 if (istatus != CBS_IDLE) { 1081 printk(KERN_DEBUG "GRU:%d quicktest2: cb %d, exception\n", smp_processor_id(), i); 1082 ret = -EFAULT; 1083 } else if (buf[4 * i] || buf[4 * i + 1] || buf[4 * i + 2] || 1084 buf[4 * i + 3]) { 1085 printk(KERN_DEBUG "GRU:%d quicktest2:cb %d, buf 0x%lx, 0x%lx, 0x%lx, 0x%lx\n", 1086 smp_processor_id(), i, buf[4 * i], buf[4 * i + 1], buf[4 * i + 2], buf[4 * i + 3]); 1087 ret = -EIO; 1088 } 1089 k--; 1090 gen = cb; 1091 gen->istatus = CBS_CALL_OS; /* don't handle this CBR again */ 1092 } while (k); 1093 BUG_ON(cmp.done); 1094 1095 gru_unlock_async_resource(han); 1096 gru_release_async_resources(han); 1097 done: 1098 kfree(buf); 1099 return ret; 1100 } 1101 1102 #define BUFSIZE 200 1103 static int quicktest3(unsigned long arg) 1104 { 1105 char buf1[BUFSIZE], buf2[BUFSIZE]; 1106 int ret = 0; 1107 1108 memset(buf2, 0, sizeof(buf2)); 1109 memset(buf1, get_cycles() & 255, sizeof(buf1)); 1110 gru_copy_gpa(uv_gpa(buf2), uv_gpa(buf1), BUFSIZE); 1111 if (memcmp(buf1, buf2, BUFSIZE)) { 1112 printk(KERN_DEBUG "GRU:%d quicktest3 error\n", smp_processor_id()); 1113 ret = -EIO; 1114 } 1115 return ret; 1116 } 1117 1118 /* 1119 * Debugging only. User hook for various kernel tests 1120 * of driver & gru. 1121 */ 1122 int gru_ktest(unsigned long arg) 1123 { 1124 int ret = -EINVAL; 1125 1126 switch (arg & 0xff) { 1127 case 0: 1128 ret = quicktest0(arg); 1129 break; 1130 case 1: 1131 ret = quicktest1(arg); 1132 break; 1133 case 2: 1134 ret = quicktest2(arg); 1135 break; 1136 case 3: 1137 ret = quicktest3(arg); 1138 break; 1139 case 99: 1140 ret = gru_free_kernel_contexts(); 1141 break; 1142 } 1143 return ret; 1144 1145 } 1146 1147 int gru_kservices_init(void) 1148 { 1149 return 0; 1150 } 1151 1152 void gru_kservices_exit(void) 1153 { 1154 if (gru_free_kernel_contexts()) 1155 BUG(); 1156 } 1157 1158