1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * SN Platform GRU Driver 4 * 5 * KERNEL SERVICES THAT USE THE GRU 6 * 7 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. 8 */ 9 10 #include <linux/kernel.h> 11 #include <linux/errno.h> 12 #include <linux/slab.h> 13 #include <linux/mm.h> 14 #include <linux/spinlock.h> 15 #include <linux/device.h> 16 #include <linux/miscdevice.h> 17 #include <linux/proc_fs.h> 18 #include <linux/interrupt.h> 19 #include <linux/sync_core.h> 20 #include <linux/uaccess.h> 21 #include <linux/delay.h> 22 #include <linux/export.h> 23 #include <asm/io_apic.h> 24 #include "gru.h" 25 #include "grulib.h" 26 #include "grutables.h" 27 #include "grukservices.h" 28 #include "gru_instructions.h" 29 #include <asm/uv/uv_hub.h> 30 31 /* 32 * Kernel GRU Usage 33 * 34 * The following is an interim algorithm for management of kernel GRU 35 * resources. This will likely be replaced when we better understand the 36 * kernel/user requirements. 37 * 38 * Blade percpu resources reserved for kernel use. These resources are 39 * reserved whenever the kernel context for the blade is loaded. Note 40 * that the kernel context is not guaranteed to be always available. It is 41 * loaded on demand & can be stolen by a user if the user demand exceeds the 42 * kernel demand. The kernel can always reload the kernel context but 43 * a SLEEP may be required!!!. 44 * 45 * Async Overview: 46 * 47 * Each blade has one "kernel context" that owns GRU kernel resources 48 * located on the blade. Kernel drivers use GRU resources in this context 49 * for sending messages, zeroing memory, etc. 50 * 51 * The kernel context is dynamically loaded on demand. If it is not in 52 * use by the kernel, the kernel context can be unloaded & given to a user. 53 * The kernel context will be reloaded when needed. This may require that 54 * a context be stolen from a user. 55 * NOTE: frequent unloading/reloading of the kernel context is 56 * expensive. We are depending on batch schedulers, cpusets, sane 57 * drivers or some other mechanism to prevent the need for frequent 58 * stealing/reloading. 59 * 60 * The kernel context consists of two parts: 61 * - 1 CB & a few DSRs that are reserved for each cpu on the blade. 62 * Each cpu has it's own private resources & does not share them 63 * with other cpus. These resources are used serially, ie, 64 * locked, used & unlocked on each call to a function in 65 * grukservices. 66 * (Now that we have dynamic loading of kernel contexts, I 67 * may rethink this & allow sharing between cpus....) 68 * 69 * - Additional resources can be reserved long term & used directly 70 * by UV drivers located in the kernel. Drivers using these GRU 71 * resources can use asynchronous GRU instructions that send 72 * interrupts on completion. 73 * - these resources must be explicitly locked/unlocked 74 * - locked resources prevent (obviously) the kernel 75 * context from being unloaded. 76 * - drivers using these resource directly issue their own 77 * GRU instruction and must wait/check completion. 78 * 79 * When these resources are reserved, the caller can optionally 80 * associate a wait_queue with the resources and use asynchronous 81 * GRU instructions. When an async GRU instruction completes, the 82 * driver will do a wakeup on the event. 83 * 84 */ 85 86 87 #define ASYNC_HAN_TO_BID(h) ((h) - 1) 88 #define ASYNC_BID_TO_HAN(b) ((b) + 1) 89 #define ASYNC_HAN_TO_BS(h) gru_base[ASYNC_HAN_TO_BID(h)] 90 91 #define GRU_NUM_KERNEL_CBR 1 92 #define GRU_NUM_KERNEL_DSR_BYTES 256 93 #define GRU_NUM_KERNEL_DSR_CL (GRU_NUM_KERNEL_DSR_BYTES / \ 94 GRU_CACHE_LINE_BYTES) 95 96 /* GRU instruction attributes for all instructions */ 97 #define IMA IMA_CB_DELAY 98 99 /* GRU cacheline size is always 64 bytes - even on arches with 128 byte lines */ 100 #define __gru_cacheline_aligned__ \ 101 __attribute__((__aligned__(GRU_CACHE_LINE_BYTES))) 102 103 #define MAGIC 0x1234567887654321UL 104 105 /* Default retry count for GRU errors on kernel instructions */ 106 #define EXCEPTION_RETRY_LIMIT 3 107 108 /* Status of message queue sections */ 109 #define MQS_EMPTY 0 110 #define MQS_FULL 1 111 #define MQS_NOOP 2 112 113 /*----------------- RESOURCE MANAGEMENT -------------------------------------*/ 114 /* optimized for x86_64 */ 115 struct message_queue { 116 union gru_mesqhead head __gru_cacheline_aligned__; /* CL 0 */ 117 int qlines; /* DW 1 */ 118 long hstatus[2]; 119 void *next __gru_cacheline_aligned__;/* CL 1 */ 120 void *limit; 121 void *start; 122 void *start2; 123 char data ____cacheline_aligned; /* CL 2 */ 124 }; 125 126 /* First word in every message - used by mesq interface */ 127 struct message_header { 128 char present; 129 char present2; 130 char lines; 131 char fill; 132 }; 133 134 #define HSTATUS(mq, h) ((mq) + offsetof(struct message_queue, hstatus[h])) 135 136 /* 137 * Reload the blade's kernel context into a GRU chiplet. Called holding 138 * the bs_kgts_sema for READ. Will steal user contexts if necessary. 139 */ 140 static void gru_load_kernel_context(struct gru_blade_state *bs, int blade_id) 141 { 142 struct gru_state *gru; 143 struct gru_thread_state *kgts; 144 void *vaddr; 145 int ctxnum, ncpus; 146 147 up_read(&bs->bs_kgts_sema); 148 down_write(&bs->bs_kgts_sema); 149 150 if (!bs->bs_kgts) { 151 do { 152 bs->bs_kgts = gru_alloc_gts(NULL, 0, 0, 0, 0, 0); 153 if (!IS_ERR(bs->bs_kgts)) 154 break; 155 msleep(1); 156 } while (true); 157 bs->bs_kgts->ts_user_blade_id = blade_id; 158 } 159 kgts = bs->bs_kgts; 160 161 if (!kgts->ts_gru) { 162 STAT(load_kernel_context); 163 ncpus = uv_blade_nr_possible_cpus(blade_id); 164 kgts->ts_cbr_au_count = GRU_CB_COUNT_TO_AU( 165 GRU_NUM_KERNEL_CBR * ncpus + bs->bs_async_cbrs); 166 kgts->ts_dsr_au_count = GRU_DS_BYTES_TO_AU( 167 GRU_NUM_KERNEL_DSR_BYTES * ncpus + 168 bs->bs_async_dsr_bytes); 169 while (!gru_assign_gru_context(kgts)) { 170 msleep(1); 171 gru_steal_context(kgts); 172 } 173 gru_load_context(kgts); 174 gru = bs->bs_kgts->ts_gru; 175 vaddr = gru->gs_gru_base_vaddr; 176 ctxnum = kgts->ts_ctxnum; 177 bs->kernel_cb = get_gseg_base_address_cb(vaddr, ctxnum, 0); 178 bs->kernel_dsr = get_gseg_base_address_ds(vaddr, ctxnum, 0); 179 } 180 downgrade_write(&bs->bs_kgts_sema); 181 } 182 183 /* 184 * Free all kernel contexts that are not currently in use. 185 * Returns 0 if all freed, else number of inuse context. 186 */ 187 static int gru_free_kernel_contexts(void) 188 { 189 struct gru_blade_state *bs; 190 struct gru_thread_state *kgts; 191 int bid, ret = 0; 192 193 for (bid = 0; bid < GRU_MAX_BLADES; bid++) { 194 bs = gru_base[bid]; 195 if (!bs) 196 continue; 197 198 /* Ignore busy contexts. Don't want to block here. */ 199 if (down_write_trylock(&bs->bs_kgts_sema)) { 200 kgts = bs->bs_kgts; 201 if (kgts && kgts->ts_gru) 202 gru_unload_context(kgts, 0); 203 bs->bs_kgts = NULL; 204 up_write(&bs->bs_kgts_sema); 205 kfree(kgts); 206 } else { 207 ret++; 208 } 209 } 210 return ret; 211 } 212 213 /* 214 * Lock & load the kernel context for the specified blade. 215 */ 216 static struct gru_blade_state *gru_lock_kernel_context(int blade_id) 217 { 218 struct gru_blade_state *bs; 219 int bid; 220 221 STAT(lock_kernel_context); 222 again: 223 bid = blade_id < 0 ? uv_numa_blade_id() : blade_id; 224 bs = gru_base[bid]; 225 226 /* Handle the case where migration occurred while waiting for the sema */ 227 down_read(&bs->bs_kgts_sema); 228 if (blade_id < 0 && bid != uv_numa_blade_id()) { 229 up_read(&bs->bs_kgts_sema); 230 goto again; 231 } 232 if (!bs->bs_kgts || !bs->bs_kgts->ts_gru) 233 gru_load_kernel_context(bs, bid); 234 return bs; 235 236 } 237 238 /* 239 * Unlock the kernel context for the specified blade. Context is not 240 * unloaded but may be stolen before next use. 241 */ 242 static void gru_unlock_kernel_context(int blade_id) 243 { 244 struct gru_blade_state *bs; 245 246 bs = gru_base[blade_id]; 247 up_read(&bs->bs_kgts_sema); 248 STAT(unlock_kernel_context); 249 } 250 251 /* 252 * Reserve & get pointers to the DSR/CBRs reserved for the current cpu. 253 * - returns with preemption disabled 254 */ 255 static int gru_get_cpu_resources(int dsr_bytes, void **cb, void **dsr) 256 { 257 struct gru_blade_state *bs; 258 int lcpu; 259 260 BUG_ON(dsr_bytes > GRU_NUM_KERNEL_DSR_BYTES); 261 preempt_disable(); 262 bs = gru_lock_kernel_context(-1); 263 lcpu = uv_blade_processor_id(); 264 *cb = bs->kernel_cb + lcpu * GRU_HANDLE_STRIDE; 265 *dsr = bs->kernel_dsr + lcpu * GRU_NUM_KERNEL_DSR_BYTES; 266 return 0; 267 } 268 269 /* 270 * Free the current cpus reserved DSR/CBR resources. 271 */ 272 static void gru_free_cpu_resources(void *cb, void *dsr) 273 { 274 gru_unlock_kernel_context(uv_numa_blade_id()); 275 preempt_enable(); 276 } 277 278 /* 279 * Reserve GRU resources to be used asynchronously. 280 * Note: currently supports only 1 reservation per blade. 281 * 282 * input: 283 * blade_id - blade on which resources should be reserved 284 * cbrs - number of CBRs 285 * dsr_bytes - number of DSR bytes needed 286 * output: 287 * handle to identify resource 288 * (0 = async resources already reserved) 289 */ 290 unsigned long gru_reserve_async_resources(int blade_id, int cbrs, int dsr_bytes, 291 struct completion *cmp) 292 { 293 struct gru_blade_state *bs; 294 struct gru_thread_state *kgts; 295 int ret = 0; 296 297 bs = gru_base[blade_id]; 298 299 down_write(&bs->bs_kgts_sema); 300 301 /* Verify no resources already reserved */ 302 if (bs->bs_async_dsr_bytes + bs->bs_async_cbrs) 303 goto done; 304 bs->bs_async_dsr_bytes = dsr_bytes; 305 bs->bs_async_cbrs = cbrs; 306 bs->bs_async_wq = cmp; 307 kgts = bs->bs_kgts; 308 309 /* Resources changed. Unload context if already loaded */ 310 if (kgts && kgts->ts_gru) 311 gru_unload_context(kgts, 0); 312 ret = ASYNC_BID_TO_HAN(blade_id); 313 314 done: 315 up_write(&bs->bs_kgts_sema); 316 return ret; 317 } 318 319 /* 320 * Release async resources previously reserved. 321 * 322 * input: 323 * han - handle to identify resources 324 */ 325 void gru_release_async_resources(unsigned long han) 326 { 327 struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han); 328 329 down_write(&bs->bs_kgts_sema); 330 bs->bs_async_dsr_bytes = 0; 331 bs->bs_async_cbrs = 0; 332 bs->bs_async_wq = NULL; 333 up_write(&bs->bs_kgts_sema); 334 } 335 336 /* 337 * Wait for async GRU instructions to complete. 338 * 339 * input: 340 * han - handle to identify resources 341 */ 342 void gru_wait_async_cbr(unsigned long han) 343 { 344 struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han); 345 346 wait_for_completion(bs->bs_async_wq); 347 mb(); 348 } 349 350 /* 351 * Lock previous reserved async GRU resources 352 * 353 * input: 354 * han - handle to identify resources 355 * output: 356 * cb - pointer to first CBR 357 * dsr - pointer to first DSR 358 */ 359 void gru_lock_async_resource(unsigned long han, void **cb, void **dsr) 360 { 361 struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han); 362 int blade_id = ASYNC_HAN_TO_BID(han); 363 int ncpus; 364 365 gru_lock_kernel_context(blade_id); 366 ncpus = uv_blade_nr_possible_cpus(blade_id); 367 if (cb) 368 *cb = bs->kernel_cb + ncpus * GRU_HANDLE_STRIDE; 369 if (dsr) 370 *dsr = bs->kernel_dsr + ncpus * GRU_NUM_KERNEL_DSR_BYTES; 371 } 372 373 /* 374 * Unlock previous reserved async GRU resources 375 * 376 * input: 377 * han - handle to identify resources 378 */ 379 void gru_unlock_async_resource(unsigned long han) 380 { 381 int blade_id = ASYNC_HAN_TO_BID(han); 382 383 gru_unlock_kernel_context(blade_id); 384 } 385 386 /*----------------------------------------------------------------------*/ 387 int gru_get_cb_exception_detail(void *cb, 388 struct control_block_extended_exc_detail *excdet) 389 { 390 struct gru_control_block_extended *cbe; 391 struct gru_thread_state *kgts = NULL; 392 unsigned long off; 393 int cbrnum, bid; 394 395 /* 396 * Locate kgts for cb. This algorithm is SLOW but 397 * this function is rarely called (ie., almost never). 398 * Performance does not matter. 399 */ 400 for_each_possible_blade(bid) { 401 if (!gru_base[bid]) 402 break; 403 kgts = gru_base[bid]->bs_kgts; 404 if (!kgts || !kgts->ts_gru) 405 continue; 406 off = cb - kgts->ts_gru->gs_gru_base_vaddr; 407 if (off < GRU_SIZE) 408 break; 409 kgts = NULL; 410 } 411 BUG_ON(!kgts); 412 cbrnum = thread_cbr_number(kgts, get_cb_number(cb)); 413 cbe = get_cbe(GRUBASE(cb), cbrnum); 414 gru_flush_cache(cbe); /* CBE not coherent */ 415 sync_core(); 416 excdet->opc = cbe->opccpy; 417 excdet->exopc = cbe->exopccpy; 418 excdet->ecause = cbe->ecause; 419 excdet->exceptdet0 = cbe->idef1upd; 420 excdet->exceptdet1 = cbe->idef3upd; 421 gru_flush_cache(cbe); 422 return 0; 423 } 424 425 static char *gru_get_cb_exception_detail_str(int ret, void *cb, 426 char *buf, int size) 427 { 428 struct gru_control_block_status *gen = (void *)cb; 429 struct control_block_extended_exc_detail excdet; 430 431 if (ret > 0 && gen->istatus == CBS_EXCEPTION) { 432 gru_get_cb_exception_detail(cb, &excdet); 433 snprintf(buf, size, 434 "GRU:%d exception: cb %p, opc %d, exopc %d, ecause 0x%x," 435 "excdet0 0x%lx, excdet1 0x%x", smp_processor_id(), 436 gen, excdet.opc, excdet.exopc, excdet.ecause, 437 excdet.exceptdet0, excdet.exceptdet1); 438 } else { 439 snprintf(buf, size, "No exception"); 440 } 441 return buf; 442 } 443 444 static int gru_wait_idle_or_exception(struct gru_control_block_status *gen) 445 { 446 while (gen->istatus >= CBS_ACTIVE) { 447 cpu_relax(); 448 barrier(); 449 } 450 return gen->istatus; 451 } 452 453 static int gru_retry_exception(void *cb) 454 { 455 struct gru_control_block_status *gen = (void *)cb; 456 struct control_block_extended_exc_detail excdet; 457 int retry = EXCEPTION_RETRY_LIMIT; 458 459 while (1) { 460 if (gru_wait_idle_or_exception(gen) == CBS_IDLE) 461 return CBS_IDLE; 462 if (gru_get_cb_message_queue_substatus(cb)) 463 return CBS_EXCEPTION; 464 gru_get_cb_exception_detail(cb, &excdet); 465 if ((excdet.ecause & ~EXCEPTION_RETRY_BITS) || 466 (excdet.cbrexecstatus & CBR_EXS_ABORT_OCC)) 467 break; 468 if (retry-- == 0) 469 break; 470 gen->icmd = 1; 471 gru_flush_cache(gen); 472 } 473 return CBS_EXCEPTION; 474 } 475 476 int gru_check_status_proc(void *cb) 477 { 478 struct gru_control_block_status *gen = (void *)cb; 479 int ret; 480 481 ret = gen->istatus; 482 if (ret == CBS_EXCEPTION) 483 ret = gru_retry_exception(cb); 484 rmb(); 485 return ret; 486 487 } 488 489 int gru_wait_proc(void *cb) 490 { 491 struct gru_control_block_status *gen = (void *)cb; 492 int ret; 493 494 ret = gru_wait_idle_or_exception(gen); 495 if (ret == CBS_EXCEPTION) 496 ret = gru_retry_exception(cb); 497 rmb(); 498 return ret; 499 } 500 501 static void gru_abort(int ret, void *cb, char *str) 502 { 503 char buf[GRU_EXC_STR_SIZE]; 504 505 panic("GRU FATAL ERROR: %s - %s\n", str, 506 gru_get_cb_exception_detail_str(ret, cb, buf, sizeof(buf))); 507 } 508 509 void gru_wait_abort_proc(void *cb) 510 { 511 int ret; 512 513 ret = gru_wait_proc(cb); 514 if (ret) 515 gru_abort(ret, cb, "gru_wait_abort"); 516 } 517 518 519 /*------------------------------ MESSAGE QUEUES -----------------------------*/ 520 521 /* Internal status . These are NOT returned to the user. */ 522 #define MQIE_AGAIN -1 /* try again */ 523 524 525 /* 526 * Save/restore the "present" flag that is in the second line of 2-line 527 * messages 528 */ 529 static inline int get_present2(void *p) 530 { 531 struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES; 532 return mhdr->present; 533 } 534 535 static inline void restore_present2(void *p, int val) 536 { 537 struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES; 538 mhdr->present = val; 539 } 540 541 /* 542 * Create a message queue. 543 * qlines - message queue size in cache lines. Includes 2-line header. 544 */ 545 int gru_create_message_queue(struct gru_message_queue_desc *mqd, 546 void *p, unsigned int bytes, int nasid, int vector, int apicid) 547 { 548 struct message_queue *mq = p; 549 unsigned int qlines; 550 551 qlines = bytes / GRU_CACHE_LINE_BYTES - 2; 552 memset(mq, 0, bytes); 553 mq->start = &mq->data; 554 mq->start2 = &mq->data + (qlines / 2 - 1) * GRU_CACHE_LINE_BYTES; 555 mq->next = &mq->data; 556 mq->limit = &mq->data + (qlines - 2) * GRU_CACHE_LINE_BYTES; 557 mq->qlines = qlines; 558 mq->hstatus[0] = 0; 559 mq->hstatus[1] = 1; 560 mq->head = gru_mesq_head(2, qlines / 2 + 1); 561 mqd->mq = mq; 562 mqd->mq_gpa = uv_gpa(mq); 563 mqd->qlines = qlines; 564 mqd->interrupt_pnode = nasid >> 1; 565 mqd->interrupt_vector = vector; 566 mqd->interrupt_apicid = apicid; 567 return 0; 568 } 569 EXPORT_SYMBOL_GPL(gru_create_message_queue); 570 571 /* 572 * Send a NOOP message to a message queue 573 * Returns: 574 * 0 - if queue is full after the send. This is the normal case 575 * but various races can change this. 576 * -1 - if mesq sent successfully but queue not full 577 * >0 - unexpected error. MQE_xxx returned 578 */ 579 static int send_noop_message(void *cb, struct gru_message_queue_desc *mqd, 580 void *mesg) 581 { 582 const struct message_header noop_header = { 583 .present = MQS_NOOP, .lines = 1}; 584 unsigned long m; 585 int substatus, ret; 586 struct message_header save_mhdr, *mhdr = mesg; 587 588 STAT(mesq_noop); 589 save_mhdr = *mhdr; 590 *mhdr = noop_header; 591 gru_mesq(cb, mqd->mq_gpa, gru_get_tri(mhdr), 1, IMA); 592 ret = gru_wait(cb); 593 594 if (ret) { 595 substatus = gru_get_cb_message_queue_substatus(cb); 596 switch (substatus) { 597 case CBSS_NO_ERROR: 598 STAT(mesq_noop_unexpected_error); 599 ret = MQE_UNEXPECTED_CB_ERR; 600 break; 601 case CBSS_LB_OVERFLOWED: 602 STAT(mesq_noop_lb_overflow); 603 ret = MQE_CONGESTION; 604 break; 605 case CBSS_QLIMIT_REACHED: 606 STAT(mesq_noop_qlimit_reached); 607 ret = 0; 608 break; 609 case CBSS_AMO_NACKED: 610 STAT(mesq_noop_amo_nacked); 611 ret = MQE_CONGESTION; 612 break; 613 case CBSS_PUT_NACKED: 614 STAT(mesq_noop_put_nacked); 615 m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6); 616 gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, 1, 1, 617 IMA); 618 if (gru_wait(cb) == CBS_IDLE) 619 ret = MQIE_AGAIN; 620 else 621 ret = MQE_UNEXPECTED_CB_ERR; 622 break; 623 case CBSS_PAGE_OVERFLOW: 624 STAT(mesq_noop_page_overflow); 625 fallthrough; 626 default: 627 BUG(); 628 } 629 } 630 *mhdr = save_mhdr; 631 return ret; 632 } 633 634 /* 635 * Handle a gru_mesq full. 636 */ 637 static int send_message_queue_full(void *cb, struct gru_message_queue_desc *mqd, 638 void *mesg, int lines) 639 { 640 union gru_mesqhead mqh; 641 unsigned int limit, head; 642 unsigned long avalue; 643 int half, qlines; 644 645 /* Determine if switching to first/second half of q */ 646 avalue = gru_get_amo_value(cb); 647 head = gru_get_amo_value_head(cb); 648 limit = gru_get_amo_value_limit(cb); 649 650 qlines = mqd->qlines; 651 half = (limit != qlines); 652 653 if (half) 654 mqh = gru_mesq_head(qlines / 2 + 1, qlines); 655 else 656 mqh = gru_mesq_head(2, qlines / 2 + 1); 657 658 /* Try to get lock for switching head pointer */ 659 gru_gamir(cb, EOP_IR_CLR, HSTATUS(mqd->mq_gpa, half), XTYPE_DW, IMA); 660 if (gru_wait(cb) != CBS_IDLE) 661 goto cberr; 662 if (!gru_get_amo_value(cb)) { 663 STAT(mesq_qf_locked); 664 return MQE_QUEUE_FULL; 665 } 666 667 /* Got the lock. Send optional NOP if queue not full, */ 668 if (head != limit) { 669 if (send_noop_message(cb, mqd, mesg)) { 670 gru_gamir(cb, EOP_IR_INC, HSTATUS(mqd->mq_gpa, half), 671 XTYPE_DW, IMA); 672 if (gru_wait(cb) != CBS_IDLE) 673 goto cberr; 674 STAT(mesq_qf_noop_not_full); 675 return MQIE_AGAIN; 676 } 677 avalue++; 678 } 679 680 /* Then flip queuehead to other half of queue. */ 681 gru_gamer(cb, EOP_ERR_CSWAP, mqd->mq_gpa, XTYPE_DW, mqh.val, avalue, 682 IMA); 683 if (gru_wait(cb) != CBS_IDLE) 684 goto cberr; 685 686 /* If not successfully in swapping queue head, clear the hstatus lock */ 687 if (gru_get_amo_value(cb) != avalue) { 688 STAT(mesq_qf_switch_head_failed); 689 gru_gamir(cb, EOP_IR_INC, HSTATUS(mqd->mq_gpa, half), XTYPE_DW, 690 IMA); 691 if (gru_wait(cb) != CBS_IDLE) 692 goto cberr; 693 } 694 return MQIE_AGAIN; 695 cberr: 696 STAT(mesq_qf_unexpected_error); 697 return MQE_UNEXPECTED_CB_ERR; 698 } 699 700 /* 701 * Handle a PUT failure. Note: if message was a 2-line message, one of the 702 * lines might have successfully have been written. Before sending the 703 * message, "present" must be cleared in BOTH lines to prevent the receiver 704 * from prematurely seeing the full message. 705 */ 706 static int send_message_put_nacked(void *cb, struct gru_message_queue_desc *mqd, 707 void *mesg, int lines) 708 { 709 unsigned long m; 710 int ret, loops = 200; /* experimentally determined */ 711 712 m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6); 713 if (lines == 2) { 714 gru_vset(cb, m, 0, XTYPE_CL, lines, 1, IMA); 715 if (gru_wait(cb) != CBS_IDLE) 716 return MQE_UNEXPECTED_CB_ERR; 717 } 718 gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, lines, 1, IMA); 719 if (gru_wait(cb) != CBS_IDLE) 720 return MQE_UNEXPECTED_CB_ERR; 721 722 if (!mqd->interrupt_vector) 723 return MQE_OK; 724 725 /* 726 * Send a noop message in order to deliver a cross-partition interrupt 727 * to the SSI that contains the target message queue. Normally, the 728 * interrupt is automatically delivered by hardware following mesq 729 * operations, but some error conditions require explicit delivery. 730 * The noop message will trigger delivery. Otherwise partition failures 731 * could cause unrecovered errors. 732 */ 733 do { 734 ret = send_noop_message(cb, mqd, mesg); 735 } while ((ret == MQIE_AGAIN || ret == MQE_CONGESTION) && (loops-- > 0)); 736 737 if (ret == MQIE_AGAIN || ret == MQE_CONGESTION) { 738 /* 739 * Don't indicate to the app to resend the message, as it's 740 * already been successfully sent. We simply send an OK 741 * (rather than fail the send with MQE_UNEXPECTED_CB_ERR), 742 * assuming that the other side is receiving enough 743 * interrupts to get this message processed anyway. 744 */ 745 ret = MQE_OK; 746 } 747 return ret; 748 } 749 750 /* 751 * Handle a gru_mesq failure. Some of these failures are software recoverable 752 * or retryable. 753 */ 754 static int send_message_failure(void *cb, struct gru_message_queue_desc *mqd, 755 void *mesg, int lines) 756 { 757 int substatus, ret = 0; 758 759 substatus = gru_get_cb_message_queue_substatus(cb); 760 switch (substatus) { 761 case CBSS_NO_ERROR: 762 STAT(mesq_send_unexpected_error); 763 ret = MQE_UNEXPECTED_CB_ERR; 764 break; 765 case CBSS_LB_OVERFLOWED: 766 STAT(mesq_send_lb_overflow); 767 ret = MQE_CONGESTION; 768 break; 769 case CBSS_QLIMIT_REACHED: 770 STAT(mesq_send_qlimit_reached); 771 ret = send_message_queue_full(cb, mqd, mesg, lines); 772 break; 773 case CBSS_AMO_NACKED: 774 STAT(mesq_send_amo_nacked); 775 ret = MQE_CONGESTION; 776 break; 777 case CBSS_PUT_NACKED: 778 STAT(mesq_send_put_nacked); 779 ret = send_message_put_nacked(cb, mqd, mesg, lines); 780 break; 781 case CBSS_PAGE_OVERFLOW: 782 STAT(mesq_page_overflow); 783 fallthrough; 784 default: 785 BUG(); 786 } 787 return ret; 788 } 789 790 /* 791 * Send a message to a message queue 792 * mqd message queue descriptor 793 * mesg message. ust be vaddr within a GSEG 794 * bytes message size (<= 2 CL) 795 */ 796 int gru_send_message_gpa(struct gru_message_queue_desc *mqd, void *mesg, 797 unsigned int bytes) 798 { 799 struct message_header *mhdr; 800 void *cb; 801 void *dsr; 802 int istatus, clines, ret; 803 804 STAT(mesq_send); 805 BUG_ON(bytes < sizeof(int) || bytes > 2 * GRU_CACHE_LINE_BYTES); 806 807 clines = DIV_ROUND_UP(bytes, GRU_CACHE_LINE_BYTES); 808 if (gru_get_cpu_resources(bytes, &cb, &dsr)) 809 return MQE_BUG_NO_RESOURCES; 810 memcpy(dsr, mesg, bytes); 811 mhdr = dsr; 812 mhdr->present = MQS_FULL; 813 mhdr->lines = clines; 814 if (clines == 2) { 815 mhdr->present2 = get_present2(mhdr); 816 restore_present2(mhdr, MQS_FULL); 817 } 818 819 do { 820 ret = MQE_OK; 821 gru_mesq(cb, mqd->mq_gpa, gru_get_tri(mhdr), clines, IMA); 822 istatus = gru_wait(cb); 823 if (istatus != CBS_IDLE) 824 ret = send_message_failure(cb, mqd, dsr, clines); 825 } while (ret == MQIE_AGAIN); 826 gru_free_cpu_resources(cb, dsr); 827 828 if (ret) 829 STAT(mesq_send_failed); 830 return ret; 831 } 832 EXPORT_SYMBOL_GPL(gru_send_message_gpa); 833 834 /* 835 * Advance the receive pointer for the queue to the next message. 836 */ 837 void gru_free_message(struct gru_message_queue_desc *mqd, void *mesg) 838 { 839 struct message_queue *mq = mqd->mq; 840 struct message_header *mhdr = mq->next; 841 void *next, *pnext; 842 int half = -1; 843 int lines = mhdr->lines; 844 845 if (lines == 2) 846 restore_present2(mhdr, MQS_EMPTY); 847 mhdr->present = MQS_EMPTY; 848 849 pnext = mq->next; 850 next = pnext + GRU_CACHE_LINE_BYTES * lines; 851 if (next == mq->limit) { 852 next = mq->start; 853 half = 1; 854 } else if (pnext < mq->start2 && next >= mq->start2) { 855 half = 0; 856 } 857 858 if (half >= 0) 859 mq->hstatus[half] = 1; 860 mq->next = next; 861 } 862 EXPORT_SYMBOL_GPL(gru_free_message); 863 864 /* 865 * Get next message from message queue. Return NULL if no message 866 * present. User must call next_message() to move to next message. 867 * rmq message queue 868 */ 869 void *gru_get_next_message(struct gru_message_queue_desc *mqd) 870 { 871 struct message_queue *mq = mqd->mq; 872 struct message_header *mhdr = mq->next; 873 int present = mhdr->present; 874 875 /* skip NOOP messages */ 876 while (present == MQS_NOOP) { 877 gru_free_message(mqd, mhdr); 878 mhdr = mq->next; 879 present = mhdr->present; 880 } 881 882 /* Wait for both halves of 2 line messages */ 883 if (present == MQS_FULL && mhdr->lines == 2 && 884 get_present2(mhdr) == MQS_EMPTY) 885 present = MQS_EMPTY; 886 887 if (!present) { 888 STAT(mesq_receive_none); 889 return NULL; 890 } 891 892 if (mhdr->lines == 2) 893 restore_present2(mhdr, mhdr->present2); 894 895 STAT(mesq_receive); 896 return mhdr; 897 } 898 EXPORT_SYMBOL_GPL(gru_get_next_message); 899 900 /* ---------------------- GRU DATA COPY FUNCTIONS ---------------------------*/ 901 902 /* 903 * Load a DW from a global GPA. The GPA can be a memory or MMR address. 904 */ 905 int gru_read_gpa(unsigned long *value, unsigned long gpa) 906 { 907 void *cb; 908 void *dsr; 909 int ret, iaa; 910 911 STAT(read_gpa); 912 if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES, &cb, &dsr)) 913 return MQE_BUG_NO_RESOURCES; 914 iaa = gpa >> 62; 915 gru_vload_phys(cb, gpa, gru_get_tri(dsr), iaa, IMA); 916 ret = gru_wait(cb); 917 if (ret == CBS_IDLE) 918 *value = *(unsigned long *)dsr; 919 gru_free_cpu_resources(cb, dsr); 920 return ret; 921 } 922 EXPORT_SYMBOL_GPL(gru_read_gpa); 923 924 925 /* 926 * Copy a block of data using the GRU resources 927 */ 928 int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa, 929 unsigned int bytes) 930 { 931 void *cb; 932 void *dsr; 933 int ret; 934 935 STAT(copy_gpa); 936 if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES, &cb, &dsr)) 937 return MQE_BUG_NO_RESOURCES; 938 gru_bcopy(cb, src_gpa, dest_gpa, gru_get_tri(dsr), 939 XTYPE_B, bytes, GRU_NUM_KERNEL_DSR_CL, IMA); 940 ret = gru_wait(cb); 941 gru_free_cpu_resources(cb, dsr); 942 return ret; 943 } 944 EXPORT_SYMBOL_GPL(gru_copy_gpa); 945 946 /* ------------------- KERNEL QUICKTESTS RUN AT STARTUP ----------------*/ 947 /* Temp - will delete after we gain confidence in the GRU */ 948 949 static int quicktest0(unsigned long arg) 950 { 951 unsigned long word0; 952 unsigned long word1; 953 void *cb; 954 void *dsr; 955 unsigned long *p; 956 int ret = -EIO; 957 958 if (gru_get_cpu_resources(GRU_CACHE_LINE_BYTES, &cb, &dsr)) 959 return MQE_BUG_NO_RESOURCES; 960 p = dsr; 961 word0 = MAGIC; 962 word1 = 0; 963 964 gru_vload(cb, uv_gpa(&word0), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA); 965 if (gru_wait(cb) != CBS_IDLE) { 966 printk(KERN_DEBUG "GRU:%d quicktest0: CBR failure 1\n", smp_processor_id()); 967 goto done; 968 } 969 970 if (*p != MAGIC) { 971 printk(KERN_DEBUG "GRU:%d quicktest0 bad magic 0x%lx\n", smp_processor_id(), *p); 972 goto done; 973 } 974 gru_vstore(cb, uv_gpa(&word1), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA); 975 if (gru_wait(cb) != CBS_IDLE) { 976 printk(KERN_DEBUG "GRU:%d quicktest0: CBR failure 2\n", smp_processor_id()); 977 goto done; 978 } 979 980 if (word0 != word1 || word1 != MAGIC) { 981 printk(KERN_DEBUG 982 "GRU:%d quicktest0 err: found 0x%lx, expected 0x%lx\n", 983 smp_processor_id(), word1, MAGIC); 984 goto done; 985 } 986 ret = 0; 987 988 done: 989 gru_free_cpu_resources(cb, dsr); 990 return ret; 991 } 992 993 #define ALIGNUP(p, q) ((void *)(((unsigned long)(p) + (q) - 1) & ~(q - 1))) 994 995 static int quicktest1(unsigned long arg) 996 { 997 struct gru_message_queue_desc mqd; 998 void *p, *mq; 999 int i, ret = -EIO; 1000 char mes[GRU_CACHE_LINE_BYTES], *m; 1001 1002 /* Need 1K cacheline aligned that does not cross page boundary */ 1003 p = kmalloc(4096, 0); 1004 if (p == NULL) 1005 return -ENOMEM; 1006 mq = ALIGNUP(p, 1024); 1007 memset(mes, 0xee, sizeof(mes)); 1008 1009 gru_create_message_queue(&mqd, mq, 8 * GRU_CACHE_LINE_BYTES, 0, 0, 0); 1010 for (i = 0; i < 6; i++) { 1011 mes[8] = i; 1012 do { 1013 ret = gru_send_message_gpa(&mqd, mes, sizeof(mes)); 1014 } while (ret == MQE_CONGESTION); 1015 if (ret) 1016 break; 1017 } 1018 if (ret != MQE_QUEUE_FULL || i != 4) { 1019 printk(KERN_DEBUG "GRU:%d quicktest1: unexpected status %d, i %d\n", 1020 smp_processor_id(), ret, i); 1021 goto done; 1022 } 1023 1024 for (i = 0; i < 6; i++) { 1025 m = gru_get_next_message(&mqd); 1026 if (!m || m[8] != i) 1027 break; 1028 gru_free_message(&mqd, m); 1029 } 1030 if (i != 4) { 1031 printk(KERN_DEBUG "GRU:%d quicktest2: bad message, i %d, m %p, m8 %d\n", 1032 smp_processor_id(), i, m, m ? m[8] : -1); 1033 goto done; 1034 } 1035 ret = 0; 1036 1037 done: 1038 kfree(p); 1039 return ret; 1040 } 1041 1042 static int quicktest2(unsigned long arg) 1043 { 1044 static DECLARE_COMPLETION(cmp); 1045 unsigned long han; 1046 int blade_id = 0; 1047 int numcb = 4; 1048 int ret = 0; 1049 unsigned long *buf; 1050 void *cb0, *cb; 1051 struct gru_control_block_status *gen; 1052 int i, k, istatus, bytes; 1053 1054 bytes = numcb * 4 * 8; 1055 buf = kmalloc(bytes, GFP_KERNEL); 1056 if (!buf) 1057 return -ENOMEM; 1058 1059 ret = -EBUSY; 1060 han = gru_reserve_async_resources(blade_id, numcb, 0, &cmp); 1061 if (!han) 1062 goto done; 1063 1064 gru_lock_async_resource(han, &cb0, NULL); 1065 memset(buf, 0xee, bytes); 1066 for (i = 0; i < numcb; i++) 1067 gru_vset(cb0 + i * GRU_HANDLE_STRIDE, uv_gpa(&buf[i * 4]), 0, 1068 XTYPE_DW, 4, 1, IMA_INTERRUPT); 1069 1070 ret = 0; 1071 k = numcb; 1072 do { 1073 gru_wait_async_cbr(han); 1074 for (i = 0; i < numcb; i++) { 1075 cb = cb0 + i * GRU_HANDLE_STRIDE; 1076 istatus = gru_check_status(cb); 1077 if (istatus != CBS_ACTIVE && istatus != CBS_CALL_OS) 1078 break; 1079 } 1080 if (i == numcb) 1081 continue; 1082 if (istatus != CBS_IDLE) { 1083 printk(KERN_DEBUG "GRU:%d quicktest2: cb %d, exception\n", smp_processor_id(), i); 1084 ret = -EFAULT; 1085 } else if (buf[4 * i] || buf[4 * i + 1] || buf[4 * i + 2] || 1086 buf[4 * i + 3]) { 1087 printk(KERN_DEBUG "GRU:%d quicktest2:cb %d, buf 0x%lx, 0x%lx, 0x%lx, 0x%lx\n", 1088 smp_processor_id(), i, buf[4 * i], buf[4 * i + 1], buf[4 * i + 2], buf[4 * i + 3]); 1089 ret = -EIO; 1090 } 1091 k--; 1092 gen = cb; 1093 gen->istatus = CBS_CALL_OS; /* don't handle this CBR again */ 1094 } while (k); 1095 BUG_ON(cmp.done); 1096 1097 gru_unlock_async_resource(han); 1098 gru_release_async_resources(han); 1099 done: 1100 kfree(buf); 1101 return ret; 1102 } 1103 1104 #define BUFSIZE 200 1105 static int quicktest3(unsigned long arg) 1106 { 1107 char buf1[BUFSIZE], buf2[BUFSIZE]; 1108 int ret = 0; 1109 1110 memset(buf2, 0, sizeof(buf2)); 1111 memset(buf1, get_cycles() & 255, sizeof(buf1)); 1112 gru_copy_gpa(uv_gpa(buf2), uv_gpa(buf1), BUFSIZE); 1113 if (memcmp(buf1, buf2, BUFSIZE)) { 1114 printk(KERN_DEBUG "GRU:%d quicktest3 error\n", smp_processor_id()); 1115 ret = -EIO; 1116 } 1117 return ret; 1118 } 1119 1120 /* 1121 * Debugging only. User hook for various kernel tests 1122 * of driver & gru. 1123 */ 1124 int gru_ktest(unsigned long arg) 1125 { 1126 int ret = -EINVAL; 1127 1128 switch (arg & 0xff) { 1129 case 0: 1130 ret = quicktest0(arg); 1131 break; 1132 case 1: 1133 ret = quicktest1(arg); 1134 break; 1135 case 2: 1136 ret = quicktest2(arg); 1137 break; 1138 case 3: 1139 ret = quicktest3(arg); 1140 break; 1141 case 99: 1142 ret = gru_free_kernel_contexts(); 1143 break; 1144 } 1145 return ret; 1146 1147 } 1148 1149 int gru_kservices_init(void) 1150 { 1151 return 0; 1152 } 1153 1154 void gru_kservices_exit(void) 1155 { 1156 if (gru_free_kernel_contexts()) 1157 BUG(); 1158 } 1159 1160