1 // SPDX-License-Identifier: GPL-2.0 2 /* ldc.c: Logical Domain Channel link-layer protocol driver. 3 * 4 * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net> 5 */ 6 7 #include <linux/kernel.h> 8 #include <linux/export.h> 9 #include <linux/slab.h> 10 #include <linux/spinlock.h> 11 #include <linux/delay.h> 12 #include <linux/errno.h> 13 #include <linux/string.h> 14 #include <linux/scatterlist.h> 15 #include <linux/interrupt.h> 16 #include <linux/list.h> 17 #include <linux/init.h> 18 #include <linux/bitmap.h> 19 #include <asm/iommu-common.h> 20 21 #include <asm/hypervisor.h> 22 #include <asm/iommu.h> 23 #include <asm/page.h> 24 #include <asm/ldc.h> 25 #include <asm/mdesc.h> 26 27 #define DRV_MODULE_NAME "ldc" 28 #define PFX DRV_MODULE_NAME ": " 29 #define DRV_MODULE_VERSION "1.1" 30 #define DRV_MODULE_RELDATE "July 22, 2008" 31 32 #define COOKIE_PGSZ_CODE 0xf000000000000000ULL 33 #define COOKIE_PGSZ_CODE_SHIFT 60ULL 34 35 36 static char version[] = 37 DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n"; 38 39 /* Packet header layout for unreliable and reliable mode frames. 40 * When in RAW mode, packets are simply straight 64-byte payloads 41 * with no headers. 42 */ 43 struct ldc_packet { 44 u8 type; 45 #define LDC_CTRL 0x01 46 #define LDC_DATA 0x02 47 #define LDC_ERR 0x10 48 49 u8 stype; 50 #define LDC_INFO 0x01 51 #define LDC_ACK 0x02 52 #define LDC_NACK 0x04 53 54 u8 ctrl; 55 #define LDC_VERS 0x01 /* Link Version */ 56 #define LDC_RTS 0x02 /* Request To Send */ 57 #define LDC_RTR 0x03 /* Ready To Receive */ 58 #define LDC_RDX 0x04 /* Ready for Data eXchange */ 59 #define LDC_CTRL_MSK 0x0f 60 61 u8 env; 62 #define LDC_LEN 0x3f 63 #define LDC_FRAG_MASK 0xc0 64 #define LDC_START 0x40 65 #define LDC_STOP 0x80 66 67 u32 seqid; 68 69 union { 70 u8 u_data[LDC_PACKET_SIZE - 8]; 71 struct { 72 u32 pad; 73 u32 ackid; 74 u8 r_data[LDC_PACKET_SIZE - 8 - 8]; 75 } r; 76 } u; 77 }; 78 79 struct ldc_version { 80 u16 major; 81 u16 minor; 82 }; 83 84 /* Ordered from largest major to lowest. */ 85 static struct ldc_version ver_arr[] = { 86 { .major = 1, .minor = 0 }, 87 }; 88 89 #define LDC_DEFAULT_MTU (4 * LDC_PACKET_SIZE) 90 #define LDC_DEFAULT_NUM_ENTRIES (PAGE_SIZE / LDC_PACKET_SIZE) 91 92 struct ldc_channel; 93 94 struct ldc_mode_ops { 95 int (*write)(struct ldc_channel *, const void *, unsigned int); 96 int (*read)(struct ldc_channel *, void *, unsigned int); 97 }; 98 99 static const struct ldc_mode_ops raw_ops; 100 static const struct ldc_mode_ops nonraw_ops; 101 static const struct ldc_mode_ops stream_ops; 102 103 int ldom_domaining_enabled; 104 105 struct ldc_iommu { 106 /* Protects ldc_unmap. */ 107 spinlock_t lock; 108 struct ldc_mtable_entry *page_table; 109 struct iommu_map_table iommu_map_table; 110 }; 111 112 struct ldc_channel { 113 /* Protects all operations that depend upon channel state. */ 114 spinlock_t lock; 115 116 unsigned long id; 117 118 u8 *mssbuf; 119 u32 mssbuf_len; 120 u32 mssbuf_off; 121 122 struct ldc_packet *tx_base; 123 unsigned long tx_head; 124 unsigned long tx_tail; 125 unsigned long tx_num_entries; 126 unsigned long tx_ra; 127 128 unsigned long tx_acked; 129 130 struct ldc_packet *rx_base; 131 unsigned long rx_head; 132 unsigned long rx_tail; 133 unsigned long rx_num_entries; 134 unsigned long rx_ra; 135 136 u32 rcv_nxt; 137 u32 snd_nxt; 138 139 unsigned long chan_state; 140 141 struct ldc_channel_config cfg; 142 void *event_arg; 143 144 const struct ldc_mode_ops *mops; 145 146 struct ldc_iommu iommu; 147 148 struct ldc_version ver; 149 150 u8 hs_state; 151 #define LDC_HS_CLOSED 0x00 152 #define LDC_HS_OPEN 0x01 153 #define LDC_HS_GOTVERS 0x02 154 #define LDC_HS_SENTRTR 0x03 155 #define LDC_HS_GOTRTR 0x04 156 #define LDC_HS_COMPLETE 0x10 157 158 u8 flags; 159 #define LDC_FLAG_ALLOCED_QUEUES 0x01 160 #define LDC_FLAG_REGISTERED_QUEUES 0x02 161 #define LDC_FLAG_REGISTERED_IRQS 0x04 162 #define LDC_FLAG_RESET 0x10 163 164 u8 mss; 165 u8 state; 166 167 #define LDC_IRQ_NAME_MAX 32 168 char rx_irq_name[LDC_IRQ_NAME_MAX]; 169 char tx_irq_name[LDC_IRQ_NAME_MAX]; 170 171 struct hlist_head mh_list; 172 173 struct hlist_node list; 174 }; 175 176 #define ldcdbg(TYPE, f, a...) \ 177 do { if (lp->cfg.debug & LDC_DEBUG_##TYPE) \ 178 printk(KERN_INFO PFX "ID[%lu] " f, lp->id, ## a); \ 179 } while (0) 180 181 #define LDC_ABORT(lp) ldc_abort((lp), __func__) 182 183 static const char *state_to_str(u8 state) 184 { 185 switch (state) { 186 case LDC_STATE_INVALID: 187 return "INVALID"; 188 case LDC_STATE_INIT: 189 return "INIT"; 190 case LDC_STATE_BOUND: 191 return "BOUND"; 192 case LDC_STATE_READY: 193 return "READY"; 194 case LDC_STATE_CONNECTED: 195 return "CONNECTED"; 196 default: 197 return "<UNKNOWN>"; 198 } 199 } 200 201 static unsigned long __advance(unsigned long off, unsigned long num_entries) 202 { 203 off += LDC_PACKET_SIZE; 204 if (off == (num_entries * LDC_PACKET_SIZE)) 205 off = 0; 206 207 return off; 208 } 209 210 static unsigned long rx_advance(struct ldc_channel *lp, unsigned long off) 211 { 212 return __advance(off, lp->rx_num_entries); 213 } 214 215 static unsigned long tx_advance(struct ldc_channel *lp, unsigned long off) 216 { 217 return __advance(off, lp->tx_num_entries); 218 } 219 220 static struct ldc_packet *handshake_get_tx_packet(struct ldc_channel *lp, 221 unsigned long *new_tail) 222 { 223 struct ldc_packet *p; 224 unsigned long t; 225 226 t = tx_advance(lp, lp->tx_tail); 227 if (t == lp->tx_head) 228 return NULL; 229 230 *new_tail = t; 231 232 p = lp->tx_base; 233 return p + (lp->tx_tail / LDC_PACKET_SIZE); 234 } 235 236 /* When we are in reliable or stream mode, have to track the next packet 237 * we haven't gotten an ACK for in the TX queue using tx_acked. We have 238 * to be careful not to stomp over the queue past that point. During 239 * the handshake, we don't have TX data packets pending in the queue 240 * and that's why handshake_get_tx_packet() need not be mindful of 241 * lp->tx_acked. 242 */ 243 static unsigned long head_for_data(struct ldc_channel *lp) 244 { 245 if (lp->cfg.mode == LDC_MODE_STREAM) 246 return lp->tx_acked; 247 return lp->tx_head; 248 } 249 250 static int tx_has_space_for(struct ldc_channel *lp, unsigned int size) 251 { 252 unsigned long limit, tail, new_tail, diff; 253 unsigned int mss; 254 255 limit = head_for_data(lp); 256 tail = lp->tx_tail; 257 new_tail = tx_advance(lp, tail); 258 if (new_tail == limit) 259 return 0; 260 261 if (limit > new_tail) 262 diff = limit - new_tail; 263 else 264 diff = (limit + 265 ((lp->tx_num_entries * LDC_PACKET_SIZE) - new_tail)); 266 diff /= LDC_PACKET_SIZE; 267 mss = lp->mss; 268 269 if (diff * mss < size) 270 return 0; 271 272 return 1; 273 } 274 275 static struct ldc_packet *data_get_tx_packet(struct ldc_channel *lp, 276 unsigned long *new_tail) 277 { 278 struct ldc_packet *p; 279 unsigned long h, t; 280 281 h = head_for_data(lp); 282 t = tx_advance(lp, lp->tx_tail); 283 if (t == h) 284 return NULL; 285 286 *new_tail = t; 287 288 p = lp->tx_base; 289 return p + (lp->tx_tail / LDC_PACKET_SIZE); 290 } 291 292 static int set_tx_tail(struct ldc_channel *lp, unsigned long tail) 293 { 294 unsigned long orig_tail = lp->tx_tail; 295 int limit = 1000; 296 297 lp->tx_tail = tail; 298 while (limit-- > 0) { 299 unsigned long err; 300 301 err = sun4v_ldc_tx_set_qtail(lp->id, tail); 302 if (!err) 303 return 0; 304 305 if (err != HV_EWOULDBLOCK) { 306 lp->tx_tail = orig_tail; 307 return -EINVAL; 308 } 309 udelay(1); 310 } 311 312 lp->tx_tail = orig_tail; 313 return -EBUSY; 314 } 315 316 /* This just updates the head value in the hypervisor using 317 * a polling loop with a timeout. The caller takes care of 318 * upating software state representing the head change, if any. 319 */ 320 static int __set_rx_head(struct ldc_channel *lp, unsigned long head) 321 { 322 int limit = 1000; 323 324 while (limit-- > 0) { 325 unsigned long err; 326 327 err = sun4v_ldc_rx_set_qhead(lp->id, head); 328 if (!err) 329 return 0; 330 331 if (err != HV_EWOULDBLOCK) 332 return -EINVAL; 333 334 udelay(1); 335 } 336 337 return -EBUSY; 338 } 339 340 static int send_tx_packet(struct ldc_channel *lp, 341 struct ldc_packet *p, 342 unsigned long new_tail) 343 { 344 BUG_ON(p != (lp->tx_base + (lp->tx_tail / LDC_PACKET_SIZE))); 345 346 return set_tx_tail(lp, new_tail); 347 } 348 349 static struct ldc_packet *handshake_compose_ctrl(struct ldc_channel *lp, 350 u8 stype, u8 ctrl, 351 void *data, int dlen, 352 unsigned long *new_tail) 353 { 354 struct ldc_packet *p = handshake_get_tx_packet(lp, new_tail); 355 356 if (p) { 357 memset(p, 0, sizeof(*p)); 358 p->type = LDC_CTRL; 359 p->stype = stype; 360 p->ctrl = ctrl; 361 if (data) 362 memcpy(p->u.u_data, data, dlen); 363 } 364 return p; 365 } 366 367 static int start_handshake(struct ldc_channel *lp) 368 { 369 struct ldc_packet *p; 370 struct ldc_version *ver; 371 unsigned long new_tail; 372 373 ver = &ver_arr[0]; 374 375 ldcdbg(HS, "SEND VER INFO maj[%u] min[%u]\n", 376 ver->major, ver->minor); 377 378 p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS, 379 ver, sizeof(*ver), &new_tail); 380 if (p) { 381 int err = send_tx_packet(lp, p, new_tail); 382 if (!err) 383 lp->flags &= ~LDC_FLAG_RESET; 384 return err; 385 } 386 return -EBUSY; 387 } 388 389 static int send_version_nack(struct ldc_channel *lp, 390 u16 major, u16 minor) 391 { 392 struct ldc_packet *p; 393 struct ldc_version ver; 394 unsigned long new_tail; 395 396 ver.major = major; 397 ver.minor = minor; 398 399 p = handshake_compose_ctrl(lp, LDC_NACK, LDC_VERS, 400 &ver, sizeof(ver), &new_tail); 401 if (p) { 402 ldcdbg(HS, "SEND VER NACK maj[%u] min[%u]\n", 403 ver.major, ver.minor); 404 405 return send_tx_packet(lp, p, new_tail); 406 } 407 return -EBUSY; 408 } 409 410 static int send_version_ack(struct ldc_channel *lp, 411 struct ldc_version *vp) 412 { 413 struct ldc_packet *p; 414 unsigned long new_tail; 415 416 p = handshake_compose_ctrl(lp, LDC_ACK, LDC_VERS, 417 vp, sizeof(*vp), &new_tail); 418 if (p) { 419 ldcdbg(HS, "SEND VER ACK maj[%u] min[%u]\n", 420 vp->major, vp->minor); 421 422 return send_tx_packet(lp, p, new_tail); 423 } 424 return -EBUSY; 425 } 426 427 static int send_rts(struct ldc_channel *lp) 428 { 429 struct ldc_packet *p; 430 unsigned long new_tail; 431 432 p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTS, NULL, 0, 433 &new_tail); 434 if (p) { 435 p->env = lp->cfg.mode; 436 p->seqid = 0; 437 lp->rcv_nxt = 0; 438 439 ldcdbg(HS, "SEND RTS env[0x%x] seqid[0x%x]\n", 440 p->env, p->seqid); 441 442 return send_tx_packet(lp, p, new_tail); 443 } 444 return -EBUSY; 445 } 446 447 static int send_rtr(struct ldc_channel *lp) 448 { 449 struct ldc_packet *p; 450 unsigned long new_tail; 451 452 p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTR, NULL, 0, 453 &new_tail); 454 if (p) { 455 p->env = lp->cfg.mode; 456 p->seqid = 0; 457 458 ldcdbg(HS, "SEND RTR env[0x%x] seqid[0x%x]\n", 459 p->env, p->seqid); 460 461 return send_tx_packet(lp, p, new_tail); 462 } 463 return -EBUSY; 464 } 465 466 static int send_rdx(struct ldc_channel *lp) 467 { 468 struct ldc_packet *p; 469 unsigned long new_tail; 470 471 p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RDX, NULL, 0, 472 &new_tail); 473 if (p) { 474 p->env = 0; 475 p->seqid = ++lp->snd_nxt; 476 p->u.r.ackid = lp->rcv_nxt; 477 478 ldcdbg(HS, "SEND RDX env[0x%x] seqid[0x%x] ackid[0x%x]\n", 479 p->env, p->seqid, p->u.r.ackid); 480 481 return send_tx_packet(lp, p, new_tail); 482 } 483 return -EBUSY; 484 } 485 486 static int send_data_nack(struct ldc_channel *lp, struct ldc_packet *data_pkt) 487 { 488 struct ldc_packet *p; 489 unsigned long new_tail; 490 int err; 491 492 p = data_get_tx_packet(lp, &new_tail); 493 if (!p) 494 return -EBUSY; 495 memset(p, 0, sizeof(*p)); 496 p->type = data_pkt->type; 497 p->stype = LDC_NACK; 498 p->ctrl = data_pkt->ctrl & LDC_CTRL_MSK; 499 p->seqid = lp->snd_nxt + 1; 500 p->u.r.ackid = lp->rcv_nxt; 501 502 ldcdbg(HS, "SEND DATA NACK type[0x%x] ctl[0x%x] seq[0x%x] ack[0x%x]\n", 503 p->type, p->ctrl, p->seqid, p->u.r.ackid); 504 505 err = send_tx_packet(lp, p, new_tail); 506 if (!err) 507 lp->snd_nxt++; 508 509 return err; 510 } 511 512 static int ldc_abort(struct ldc_channel *lp, const char *msg) 513 { 514 unsigned long hv_err; 515 516 ldcdbg(STATE, "ABORT[%s]\n", msg); 517 ldc_print(lp); 518 519 /* We report but do not act upon the hypervisor errors because 520 * there really isn't much we can do if they fail at this point. 521 */ 522 hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries); 523 if (hv_err) 524 printk(KERN_ERR PFX "ldc_abort: " 525 "sun4v_ldc_tx_qconf(%lx,%lx,%lx) failed, err=%lu\n", 526 lp->id, lp->tx_ra, lp->tx_num_entries, hv_err); 527 528 hv_err = sun4v_ldc_tx_get_state(lp->id, 529 &lp->tx_head, 530 &lp->tx_tail, 531 &lp->chan_state); 532 if (hv_err) 533 printk(KERN_ERR PFX "ldc_abort: " 534 "sun4v_ldc_tx_get_state(%lx,...) failed, err=%lu\n", 535 lp->id, hv_err); 536 537 hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries); 538 if (hv_err) 539 printk(KERN_ERR PFX "ldc_abort: " 540 "sun4v_ldc_rx_qconf(%lx,%lx,%lx) failed, err=%lu\n", 541 lp->id, lp->rx_ra, lp->rx_num_entries, hv_err); 542 543 /* Refetch the RX queue state as well, because we could be invoked 544 * here in the queue processing context. 545 */ 546 hv_err = sun4v_ldc_rx_get_state(lp->id, 547 &lp->rx_head, 548 &lp->rx_tail, 549 &lp->chan_state); 550 if (hv_err) 551 printk(KERN_ERR PFX "ldc_abort: " 552 "sun4v_ldc_rx_get_state(%lx,...) failed, err=%lu\n", 553 lp->id, hv_err); 554 555 return -ECONNRESET; 556 } 557 558 static struct ldc_version *find_by_major(u16 major) 559 { 560 struct ldc_version *ret = NULL; 561 int i; 562 563 for (i = 0; i < ARRAY_SIZE(ver_arr); i++) { 564 struct ldc_version *v = &ver_arr[i]; 565 if (v->major <= major) { 566 ret = v; 567 break; 568 } 569 } 570 return ret; 571 } 572 573 static int process_ver_info(struct ldc_channel *lp, struct ldc_version *vp) 574 { 575 struct ldc_version *vap; 576 int err; 577 578 ldcdbg(HS, "GOT VERSION INFO major[%x] minor[%x]\n", 579 vp->major, vp->minor); 580 581 if (lp->hs_state == LDC_HS_GOTVERS) { 582 lp->hs_state = LDC_HS_OPEN; 583 memset(&lp->ver, 0, sizeof(lp->ver)); 584 } 585 586 vap = find_by_major(vp->major); 587 if (!vap) { 588 err = send_version_nack(lp, 0, 0); 589 } else if (vap->major != vp->major) { 590 err = send_version_nack(lp, vap->major, vap->minor); 591 } else { 592 struct ldc_version ver = *vp; 593 if (ver.minor > vap->minor) 594 ver.minor = vap->minor; 595 err = send_version_ack(lp, &ver); 596 if (!err) { 597 lp->ver = ver; 598 lp->hs_state = LDC_HS_GOTVERS; 599 } 600 } 601 if (err) 602 return LDC_ABORT(lp); 603 604 return 0; 605 } 606 607 static int process_ver_ack(struct ldc_channel *lp, struct ldc_version *vp) 608 { 609 ldcdbg(HS, "GOT VERSION ACK major[%x] minor[%x]\n", 610 vp->major, vp->minor); 611 612 if (lp->hs_state == LDC_HS_GOTVERS) { 613 if (lp->ver.major != vp->major || 614 lp->ver.minor != vp->minor) 615 return LDC_ABORT(lp); 616 } else { 617 lp->ver = *vp; 618 lp->hs_state = LDC_HS_GOTVERS; 619 } 620 if (send_rts(lp)) 621 return LDC_ABORT(lp); 622 return 0; 623 } 624 625 static int process_ver_nack(struct ldc_channel *lp, struct ldc_version *vp) 626 { 627 struct ldc_version *vap; 628 struct ldc_packet *p; 629 unsigned long new_tail; 630 631 if (vp->major == 0 && vp->minor == 0) 632 return LDC_ABORT(lp); 633 634 vap = find_by_major(vp->major); 635 if (!vap) 636 return LDC_ABORT(lp); 637 638 p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS, 639 vap, sizeof(*vap), 640 &new_tail); 641 if (!p) 642 return LDC_ABORT(lp); 643 644 return send_tx_packet(lp, p, new_tail); 645 } 646 647 static int process_version(struct ldc_channel *lp, 648 struct ldc_packet *p) 649 { 650 struct ldc_version *vp; 651 652 vp = (struct ldc_version *) p->u.u_data; 653 654 switch (p->stype) { 655 case LDC_INFO: 656 return process_ver_info(lp, vp); 657 658 case LDC_ACK: 659 return process_ver_ack(lp, vp); 660 661 case LDC_NACK: 662 return process_ver_nack(lp, vp); 663 664 default: 665 return LDC_ABORT(lp); 666 } 667 } 668 669 static int process_rts(struct ldc_channel *lp, 670 struct ldc_packet *p) 671 { 672 ldcdbg(HS, "GOT RTS stype[%x] seqid[%x] env[%x]\n", 673 p->stype, p->seqid, p->env); 674 675 if (p->stype != LDC_INFO || 676 lp->hs_state != LDC_HS_GOTVERS || 677 p->env != lp->cfg.mode) 678 return LDC_ABORT(lp); 679 680 lp->snd_nxt = p->seqid; 681 lp->rcv_nxt = p->seqid; 682 lp->hs_state = LDC_HS_SENTRTR; 683 if (send_rtr(lp)) 684 return LDC_ABORT(lp); 685 686 return 0; 687 } 688 689 static int process_rtr(struct ldc_channel *lp, 690 struct ldc_packet *p) 691 { 692 ldcdbg(HS, "GOT RTR stype[%x] seqid[%x] env[%x]\n", 693 p->stype, p->seqid, p->env); 694 695 if (p->stype != LDC_INFO || 696 p->env != lp->cfg.mode) 697 return LDC_ABORT(lp); 698 699 lp->snd_nxt = p->seqid; 700 lp->hs_state = LDC_HS_COMPLETE; 701 ldc_set_state(lp, LDC_STATE_CONNECTED); 702 send_rdx(lp); 703 704 return LDC_EVENT_UP; 705 } 706 707 static int rx_seq_ok(struct ldc_channel *lp, u32 seqid) 708 { 709 return lp->rcv_nxt + 1 == seqid; 710 } 711 712 static int process_rdx(struct ldc_channel *lp, 713 struct ldc_packet *p) 714 { 715 ldcdbg(HS, "GOT RDX stype[%x] seqid[%x] env[%x] ackid[%x]\n", 716 p->stype, p->seqid, p->env, p->u.r.ackid); 717 718 if (p->stype != LDC_INFO || 719 !(rx_seq_ok(lp, p->seqid))) 720 return LDC_ABORT(lp); 721 722 lp->rcv_nxt = p->seqid; 723 724 lp->hs_state = LDC_HS_COMPLETE; 725 ldc_set_state(lp, LDC_STATE_CONNECTED); 726 727 return LDC_EVENT_UP; 728 } 729 730 static int process_control_frame(struct ldc_channel *lp, 731 struct ldc_packet *p) 732 { 733 switch (p->ctrl) { 734 case LDC_VERS: 735 return process_version(lp, p); 736 737 case LDC_RTS: 738 return process_rts(lp, p); 739 740 case LDC_RTR: 741 return process_rtr(lp, p); 742 743 case LDC_RDX: 744 return process_rdx(lp, p); 745 746 default: 747 return LDC_ABORT(lp); 748 } 749 } 750 751 static int process_error_frame(struct ldc_channel *lp, 752 struct ldc_packet *p) 753 { 754 return LDC_ABORT(lp); 755 } 756 757 static int process_data_ack(struct ldc_channel *lp, 758 struct ldc_packet *ack) 759 { 760 unsigned long head = lp->tx_acked; 761 u32 ackid = ack->u.r.ackid; 762 763 while (1) { 764 struct ldc_packet *p = lp->tx_base + (head / LDC_PACKET_SIZE); 765 766 head = tx_advance(lp, head); 767 768 if (p->seqid == ackid) { 769 lp->tx_acked = head; 770 return 0; 771 } 772 if (head == lp->tx_tail) 773 return LDC_ABORT(lp); 774 } 775 776 return 0; 777 } 778 779 static void send_events(struct ldc_channel *lp, unsigned int event_mask) 780 { 781 if (event_mask & LDC_EVENT_RESET) 782 lp->cfg.event(lp->event_arg, LDC_EVENT_RESET); 783 if (event_mask & LDC_EVENT_UP) 784 lp->cfg.event(lp->event_arg, LDC_EVENT_UP); 785 if (event_mask & LDC_EVENT_DATA_READY) 786 lp->cfg.event(lp->event_arg, LDC_EVENT_DATA_READY); 787 } 788 789 static irqreturn_t ldc_rx(int irq, void *dev_id) 790 { 791 struct ldc_channel *lp = dev_id; 792 unsigned long orig_state, flags; 793 unsigned int event_mask; 794 795 spin_lock_irqsave(&lp->lock, flags); 796 797 orig_state = lp->chan_state; 798 799 /* We should probably check for hypervisor errors here and 800 * reset the LDC channel if we get one. 801 */ 802 sun4v_ldc_rx_get_state(lp->id, 803 &lp->rx_head, 804 &lp->rx_tail, 805 &lp->chan_state); 806 807 ldcdbg(RX, "RX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n", 808 orig_state, lp->chan_state, lp->rx_head, lp->rx_tail); 809 810 event_mask = 0; 811 812 if (lp->cfg.mode == LDC_MODE_RAW && 813 lp->chan_state == LDC_CHANNEL_UP) { 814 lp->hs_state = LDC_HS_COMPLETE; 815 ldc_set_state(lp, LDC_STATE_CONNECTED); 816 817 /* 818 * Generate an LDC_EVENT_UP event if the channel 819 * was not already up. 820 */ 821 if (orig_state != LDC_CHANNEL_UP) { 822 event_mask |= LDC_EVENT_UP; 823 orig_state = lp->chan_state; 824 } 825 } 826 827 /* If we are in reset state, flush the RX queue and ignore 828 * everything. 829 */ 830 if (lp->flags & LDC_FLAG_RESET) { 831 (void) ldc_rx_reset(lp); 832 goto out; 833 } 834 835 /* Once we finish the handshake, we let the ldc_read() 836 * paths do all of the control frame and state management. 837 * Just trigger the callback. 838 */ 839 if (lp->hs_state == LDC_HS_COMPLETE) { 840 handshake_complete: 841 if (lp->chan_state != orig_state) { 842 unsigned int event = LDC_EVENT_RESET; 843 844 if (lp->chan_state == LDC_CHANNEL_UP) 845 event = LDC_EVENT_UP; 846 847 event_mask |= event; 848 } 849 if (lp->rx_head != lp->rx_tail) 850 event_mask |= LDC_EVENT_DATA_READY; 851 852 goto out; 853 } 854 855 if (lp->chan_state != orig_state) 856 goto out; 857 858 while (lp->rx_head != lp->rx_tail) { 859 struct ldc_packet *p; 860 unsigned long new; 861 int err; 862 863 p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE); 864 865 switch (p->type) { 866 case LDC_CTRL: 867 err = process_control_frame(lp, p); 868 if (err > 0) 869 event_mask |= err; 870 break; 871 872 case LDC_DATA: 873 event_mask |= LDC_EVENT_DATA_READY; 874 err = 0; 875 break; 876 877 case LDC_ERR: 878 err = process_error_frame(lp, p); 879 break; 880 881 default: 882 err = LDC_ABORT(lp); 883 break; 884 } 885 886 if (err < 0) 887 break; 888 889 new = lp->rx_head; 890 new += LDC_PACKET_SIZE; 891 if (new == (lp->rx_num_entries * LDC_PACKET_SIZE)) 892 new = 0; 893 lp->rx_head = new; 894 895 err = __set_rx_head(lp, new); 896 if (err < 0) { 897 (void) LDC_ABORT(lp); 898 break; 899 } 900 if (lp->hs_state == LDC_HS_COMPLETE) 901 goto handshake_complete; 902 } 903 904 out: 905 spin_unlock_irqrestore(&lp->lock, flags); 906 907 send_events(lp, event_mask); 908 909 return IRQ_HANDLED; 910 } 911 912 static irqreturn_t ldc_tx(int irq, void *dev_id) 913 { 914 struct ldc_channel *lp = dev_id; 915 unsigned long flags, orig_state; 916 unsigned int event_mask = 0; 917 918 spin_lock_irqsave(&lp->lock, flags); 919 920 orig_state = lp->chan_state; 921 922 /* We should probably check for hypervisor errors here and 923 * reset the LDC channel if we get one. 924 */ 925 sun4v_ldc_tx_get_state(lp->id, 926 &lp->tx_head, 927 &lp->tx_tail, 928 &lp->chan_state); 929 930 ldcdbg(TX, " TX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n", 931 orig_state, lp->chan_state, lp->tx_head, lp->tx_tail); 932 933 if (lp->cfg.mode == LDC_MODE_RAW && 934 lp->chan_state == LDC_CHANNEL_UP) { 935 lp->hs_state = LDC_HS_COMPLETE; 936 ldc_set_state(lp, LDC_STATE_CONNECTED); 937 938 /* 939 * Generate an LDC_EVENT_UP event if the channel 940 * was not already up. 941 */ 942 if (orig_state != LDC_CHANNEL_UP) { 943 event_mask |= LDC_EVENT_UP; 944 orig_state = lp->chan_state; 945 } 946 } 947 948 spin_unlock_irqrestore(&lp->lock, flags); 949 950 send_events(lp, event_mask); 951 952 return IRQ_HANDLED; 953 } 954 955 /* XXX ldc_alloc() and ldc_free() needs to run under a mutex so 956 * XXX that addition and removal from the ldc_channel_list has 957 * XXX atomicity, otherwise the __ldc_channel_exists() check is 958 * XXX totally pointless as another thread can slip into ldc_alloc() 959 * XXX and add a channel with the same ID. There also needs to be 960 * XXX a spinlock for ldc_channel_list. 961 */ 962 static HLIST_HEAD(ldc_channel_list); 963 964 static int __ldc_channel_exists(unsigned long id) 965 { 966 struct ldc_channel *lp; 967 968 hlist_for_each_entry(lp, &ldc_channel_list, list) { 969 if (lp->id == id) 970 return 1; 971 } 972 return 0; 973 } 974 975 static int alloc_queue(const char *name, unsigned long num_entries, 976 struct ldc_packet **base, unsigned long *ra) 977 { 978 unsigned long size, order; 979 void *q; 980 981 size = num_entries * LDC_PACKET_SIZE; 982 order = get_order(size); 983 984 q = (void *) __get_free_pages(GFP_KERNEL, order); 985 if (!q) { 986 printk(KERN_ERR PFX "Alloc of %s queue failed with " 987 "size=%lu order=%lu\n", name, size, order); 988 return -ENOMEM; 989 } 990 991 memset(q, 0, PAGE_SIZE << order); 992 993 *base = q; 994 *ra = __pa(q); 995 996 return 0; 997 } 998 999 static void free_queue(unsigned long num_entries, struct ldc_packet *q) 1000 { 1001 unsigned long size, order; 1002 1003 if (!q) 1004 return; 1005 1006 size = num_entries * LDC_PACKET_SIZE; 1007 order = get_order(size); 1008 1009 free_pages((unsigned long)q, order); 1010 } 1011 1012 static unsigned long ldc_cookie_to_index(u64 cookie, void *arg) 1013 { 1014 u64 szcode = cookie >> COOKIE_PGSZ_CODE_SHIFT; 1015 /* struct ldc_iommu *ldc_iommu = (struct ldc_iommu *)arg; */ 1016 1017 cookie &= ~COOKIE_PGSZ_CODE; 1018 1019 return (cookie >> (13ULL + (szcode * 3ULL))); 1020 } 1021 1022 static void ldc_demap(struct ldc_iommu *iommu, unsigned long id, u64 cookie, 1023 unsigned long entry, unsigned long npages) 1024 { 1025 struct ldc_mtable_entry *base; 1026 unsigned long i, shift; 1027 1028 shift = (cookie >> COOKIE_PGSZ_CODE_SHIFT) * 3; 1029 base = iommu->page_table + entry; 1030 for (i = 0; i < npages; i++) { 1031 if (base->cookie) 1032 sun4v_ldc_revoke(id, cookie + (i << shift), 1033 base->cookie); 1034 base->mte = 0; 1035 } 1036 } 1037 1038 /* XXX Make this configurable... XXX */ 1039 #define LDC_IOTABLE_SIZE (8 * 1024) 1040 1041 static int ldc_iommu_init(const char *name, struct ldc_channel *lp) 1042 { 1043 unsigned long sz, num_tsb_entries, tsbsize, order; 1044 struct ldc_iommu *ldc_iommu = &lp->iommu; 1045 struct iommu_map_table *iommu = &ldc_iommu->iommu_map_table; 1046 struct ldc_mtable_entry *table; 1047 unsigned long hv_err; 1048 int err; 1049 1050 num_tsb_entries = LDC_IOTABLE_SIZE; 1051 tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry); 1052 spin_lock_init(&ldc_iommu->lock); 1053 1054 sz = num_tsb_entries / 8; 1055 sz = (sz + 7UL) & ~7UL; 1056 iommu->map = kzalloc(sz, GFP_KERNEL); 1057 if (!iommu->map) { 1058 printk(KERN_ERR PFX "Alloc of arena map failed, sz=%lu\n", sz); 1059 return -ENOMEM; 1060 } 1061 iommu_tbl_pool_init(iommu, num_tsb_entries, PAGE_SHIFT, 1062 NULL, false /* no large pool */, 1063 1 /* npools */, 1064 true /* skip span boundary check */); 1065 1066 order = get_order(tsbsize); 1067 1068 table = (struct ldc_mtable_entry *) 1069 __get_free_pages(GFP_KERNEL, order); 1070 err = -ENOMEM; 1071 if (!table) { 1072 printk(KERN_ERR PFX "Alloc of MTE table failed, " 1073 "size=%lu order=%lu\n", tsbsize, order); 1074 goto out_free_map; 1075 } 1076 1077 memset(table, 0, PAGE_SIZE << order); 1078 1079 ldc_iommu->page_table = table; 1080 1081 hv_err = sun4v_ldc_set_map_table(lp->id, __pa(table), 1082 num_tsb_entries); 1083 err = -EINVAL; 1084 if (hv_err) 1085 goto out_free_table; 1086 1087 return 0; 1088 1089 out_free_table: 1090 free_pages((unsigned long) table, order); 1091 ldc_iommu->page_table = NULL; 1092 1093 out_free_map: 1094 kfree(iommu->map); 1095 iommu->map = NULL; 1096 1097 return err; 1098 } 1099 1100 static void ldc_iommu_release(struct ldc_channel *lp) 1101 { 1102 struct ldc_iommu *ldc_iommu = &lp->iommu; 1103 struct iommu_map_table *iommu = &ldc_iommu->iommu_map_table; 1104 unsigned long num_tsb_entries, tsbsize, order; 1105 1106 (void) sun4v_ldc_set_map_table(lp->id, 0, 0); 1107 1108 num_tsb_entries = iommu->poolsize * iommu->nr_pools; 1109 tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry); 1110 order = get_order(tsbsize); 1111 1112 free_pages((unsigned long) ldc_iommu->page_table, order); 1113 ldc_iommu->page_table = NULL; 1114 1115 kfree(iommu->map); 1116 iommu->map = NULL; 1117 } 1118 1119 struct ldc_channel *ldc_alloc(unsigned long id, 1120 const struct ldc_channel_config *cfgp, 1121 void *event_arg, 1122 const char *name) 1123 { 1124 struct ldc_channel *lp; 1125 const struct ldc_mode_ops *mops; 1126 unsigned long dummy1, dummy2, hv_err; 1127 u8 mss, *mssbuf; 1128 int err; 1129 1130 err = -ENODEV; 1131 if (!ldom_domaining_enabled) 1132 goto out_err; 1133 1134 err = -EINVAL; 1135 if (!cfgp) 1136 goto out_err; 1137 if (!name) 1138 goto out_err; 1139 1140 switch (cfgp->mode) { 1141 case LDC_MODE_RAW: 1142 mops = &raw_ops; 1143 mss = LDC_PACKET_SIZE; 1144 break; 1145 1146 case LDC_MODE_UNRELIABLE: 1147 mops = &nonraw_ops; 1148 mss = LDC_PACKET_SIZE - 8; 1149 break; 1150 1151 case LDC_MODE_STREAM: 1152 mops = &stream_ops; 1153 mss = LDC_PACKET_SIZE - 8 - 8; 1154 break; 1155 1156 default: 1157 goto out_err; 1158 } 1159 1160 if (!cfgp->event || !event_arg || !cfgp->rx_irq || !cfgp->tx_irq) 1161 goto out_err; 1162 1163 hv_err = sun4v_ldc_tx_qinfo(id, &dummy1, &dummy2); 1164 err = -ENODEV; 1165 if (hv_err == HV_ECHANNEL) 1166 goto out_err; 1167 1168 err = -EEXIST; 1169 if (__ldc_channel_exists(id)) 1170 goto out_err; 1171 1172 mssbuf = NULL; 1173 1174 lp = kzalloc(sizeof(*lp), GFP_KERNEL); 1175 err = -ENOMEM; 1176 if (!lp) 1177 goto out_err; 1178 1179 spin_lock_init(&lp->lock); 1180 1181 lp->id = id; 1182 1183 err = ldc_iommu_init(name, lp); 1184 if (err) 1185 goto out_free_ldc; 1186 1187 lp->mops = mops; 1188 lp->mss = mss; 1189 1190 lp->cfg = *cfgp; 1191 if (!lp->cfg.mtu) 1192 lp->cfg.mtu = LDC_DEFAULT_MTU; 1193 1194 if (lp->cfg.mode == LDC_MODE_STREAM) { 1195 mssbuf = kzalloc(lp->cfg.mtu, GFP_KERNEL); 1196 if (!mssbuf) { 1197 err = -ENOMEM; 1198 goto out_free_iommu; 1199 } 1200 lp->mssbuf = mssbuf; 1201 } 1202 1203 lp->event_arg = event_arg; 1204 1205 /* XXX allow setting via ldc_channel_config to override defaults 1206 * XXX or use some formula based upon mtu 1207 */ 1208 lp->tx_num_entries = LDC_DEFAULT_NUM_ENTRIES; 1209 lp->rx_num_entries = LDC_DEFAULT_NUM_ENTRIES; 1210 1211 err = alloc_queue("TX", lp->tx_num_entries, 1212 &lp->tx_base, &lp->tx_ra); 1213 if (err) 1214 goto out_free_mssbuf; 1215 1216 err = alloc_queue("RX", lp->rx_num_entries, 1217 &lp->rx_base, &lp->rx_ra); 1218 if (err) 1219 goto out_free_txq; 1220 1221 lp->flags |= LDC_FLAG_ALLOCED_QUEUES; 1222 1223 lp->hs_state = LDC_HS_CLOSED; 1224 ldc_set_state(lp, LDC_STATE_INIT); 1225 1226 INIT_HLIST_NODE(&lp->list); 1227 hlist_add_head(&lp->list, &ldc_channel_list); 1228 1229 INIT_HLIST_HEAD(&lp->mh_list); 1230 1231 snprintf(lp->rx_irq_name, LDC_IRQ_NAME_MAX, "%s RX", name); 1232 snprintf(lp->tx_irq_name, LDC_IRQ_NAME_MAX, "%s TX", name); 1233 1234 err = request_irq(lp->cfg.rx_irq, ldc_rx, 0, 1235 lp->rx_irq_name, lp); 1236 if (err) 1237 goto out_free_txq; 1238 1239 err = request_irq(lp->cfg.tx_irq, ldc_tx, 0, 1240 lp->tx_irq_name, lp); 1241 if (err) { 1242 free_irq(lp->cfg.rx_irq, lp); 1243 goto out_free_txq; 1244 } 1245 1246 return lp; 1247 1248 out_free_txq: 1249 free_queue(lp->tx_num_entries, lp->tx_base); 1250 1251 out_free_mssbuf: 1252 kfree(mssbuf); 1253 1254 out_free_iommu: 1255 ldc_iommu_release(lp); 1256 1257 out_free_ldc: 1258 kfree(lp); 1259 1260 out_err: 1261 return ERR_PTR(err); 1262 } 1263 EXPORT_SYMBOL(ldc_alloc); 1264 1265 void ldc_unbind(struct ldc_channel *lp) 1266 { 1267 if (lp->flags & LDC_FLAG_REGISTERED_IRQS) { 1268 free_irq(lp->cfg.rx_irq, lp); 1269 free_irq(lp->cfg.tx_irq, lp); 1270 lp->flags &= ~LDC_FLAG_REGISTERED_IRQS; 1271 } 1272 1273 if (lp->flags & LDC_FLAG_REGISTERED_QUEUES) { 1274 sun4v_ldc_tx_qconf(lp->id, 0, 0); 1275 sun4v_ldc_rx_qconf(lp->id, 0, 0); 1276 lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES; 1277 } 1278 if (lp->flags & LDC_FLAG_ALLOCED_QUEUES) { 1279 free_queue(lp->tx_num_entries, lp->tx_base); 1280 free_queue(lp->rx_num_entries, lp->rx_base); 1281 lp->flags &= ~LDC_FLAG_ALLOCED_QUEUES; 1282 } 1283 1284 ldc_set_state(lp, LDC_STATE_INIT); 1285 } 1286 EXPORT_SYMBOL(ldc_unbind); 1287 1288 void ldc_free(struct ldc_channel *lp) 1289 { 1290 ldc_unbind(lp); 1291 hlist_del(&lp->list); 1292 kfree(lp->mssbuf); 1293 ldc_iommu_release(lp); 1294 1295 kfree(lp); 1296 } 1297 EXPORT_SYMBOL(ldc_free); 1298 1299 /* Bind the channel. This registers the LDC queues with 1300 * the hypervisor and puts the channel into a pseudo-listening 1301 * state. This does not initiate a handshake, ldc_connect() does 1302 * that. 1303 */ 1304 int ldc_bind(struct ldc_channel *lp) 1305 { 1306 unsigned long hv_err, flags; 1307 int err = -EINVAL; 1308 1309 if (lp->state != LDC_STATE_INIT) 1310 return -EINVAL; 1311 1312 spin_lock_irqsave(&lp->lock, flags); 1313 1314 enable_irq(lp->cfg.rx_irq); 1315 enable_irq(lp->cfg.tx_irq); 1316 1317 lp->flags |= LDC_FLAG_REGISTERED_IRQS; 1318 1319 err = -ENODEV; 1320 hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0); 1321 if (hv_err) 1322 goto out_free_irqs; 1323 1324 hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries); 1325 if (hv_err) 1326 goto out_free_irqs; 1327 1328 hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0); 1329 if (hv_err) 1330 goto out_unmap_tx; 1331 1332 hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries); 1333 if (hv_err) 1334 goto out_unmap_tx; 1335 1336 lp->flags |= LDC_FLAG_REGISTERED_QUEUES; 1337 1338 hv_err = sun4v_ldc_tx_get_state(lp->id, 1339 &lp->tx_head, 1340 &lp->tx_tail, 1341 &lp->chan_state); 1342 err = -EBUSY; 1343 if (hv_err) 1344 goto out_unmap_rx; 1345 1346 lp->tx_acked = lp->tx_head; 1347 1348 lp->hs_state = LDC_HS_OPEN; 1349 ldc_set_state(lp, LDC_STATE_BOUND); 1350 1351 if (lp->cfg.mode == LDC_MODE_RAW) { 1352 /* 1353 * There is no handshake in RAW mode, so handshake 1354 * is completed. 1355 */ 1356 lp->hs_state = LDC_HS_COMPLETE; 1357 } 1358 1359 spin_unlock_irqrestore(&lp->lock, flags); 1360 1361 return 0; 1362 1363 out_unmap_rx: 1364 lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES; 1365 sun4v_ldc_rx_qconf(lp->id, 0, 0); 1366 1367 out_unmap_tx: 1368 sun4v_ldc_tx_qconf(lp->id, 0, 0); 1369 1370 out_free_irqs: 1371 lp->flags &= ~LDC_FLAG_REGISTERED_IRQS; 1372 free_irq(lp->cfg.tx_irq, lp); 1373 free_irq(lp->cfg.rx_irq, lp); 1374 1375 spin_unlock_irqrestore(&lp->lock, flags); 1376 1377 return err; 1378 } 1379 EXPORT_SYMBOL(ldc_bind); 1380 1381 int ldc_connect(struct ldc_channel *lp) 1382 { 1383 unsigned long flags; 1384 int err; 1385 1386 if (lp->cfg.mode == LDC_MODE_RAW) 1387 return -EINVAL; 1388 1389 spin_lock_irqsave(&lp->lock, flags); 1390 1391 if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) || 1392 !(lp->flags & LDC_FLAG_REGISTERED_QUEUES) || 1393 lp->hs_state != LDC_HS_OPEN) 1394 err = ((lp->hs_state > LDC_HS_OPEN) ? 0 : -EINVAL); 1395 else 1396 err = start_handshake(lp); 1397 1398 spin_unlock_irqrestore(&lp->lock, flags); 1399 1400 return err; 1401 } 1402 EXPORT_SYMBOL(ldc_connect); 1403 1404 int ldc_disconnect(struct ldc_channel *lp) 1405 { 1406 unsigned long hv_err, flags; 1407 int err; 1408 1409 if (lp->cfg.mode == LDC_MODE_RAW) 1410 return -EINVAL; 1411 1412 if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) || 1413 !(lp->flags & LDC_FLAG_REGISTERED_QUEUES)) 1414 return -EINVAL; 1415 1416 spin_lock_irqsave(&lp->lock, flags); 1417 1418 err = -ENODEV; 1419 hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0); 1420 if (hv_err) 1421 goto out_err; 1422 1423 hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries); 1424 if (hv_err) 1425 goto out_err; 1426 1427 hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0); 1428 if (hv_err) 1429 goto out_err; 1430 1431 hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries); 1432 if (hv_err) 1433 goto out_err; 1434 1435 ldc_set_state(lp, LDC_STATE_BOUND); 1436 lp->hs_state = LDC_HS_OPEN; 1437 lp->flags |= LDC_FLAG_RESET; 1438 1439 spin_unlock_irqrestore(&lp->lock, flags); 1440 1441 return 0; 1442 1443 out_err: 1444 sun4v_ldc_tx_qconf(lp->id, 0, 0); 1445 sun4v_ldc_rx_qconf(lp->id, 0, 0); 1446 free_irq(lp->cfg.tx_irq, lp); 1447 free_irq(lp->cfg.rx_irq, lp); 1448 lp->flags &= ~(LDC_FLAG_REGISTERED_IRQS | 1449 LDC_FLAG_REGISTERED_QUEUES); 1450 ldc_set_state(lp, LDC_STATE_INIT); 1451 1452 spin_unlock_irqrestore(&lp->lock, flags); 1453 1454 return err; 1455 } 1456 EXPORT_SYMBOL(ldc_disconnect); 1457 1458 int ldc_state(struct ldc_channel *lp) 1459 { 1460 return lp->state; 1461 } 1462 EXPORT_SYMBOL(ldc_state); 1463 1464 void ldc_set_state(struct ldc_channel *lp, u8 state) 1465 { 1466 ldcdbg(STATE, "STATE (%s) --> (%s)\n", 1467 state_to_str(lp->state), 1468 state_to_str(state)); 1469 1470 lp->state = state; 1471 } 1472 EXPORT_SYMBOL(ldc_set_state); 1473 1474 int ldc_mode(struct ldc_channel *lp) 1475 { 1476 return lp->cfg.mode; 1477 } 1478 EXPORT_SYMBOL(ldc_mode); 1479 1480 int ldc_rx_reset(struct ldc_channel *lp) 1481 { 1482 return __set_rx_head(lp, lp->rx_tail); 1483 } 1484 EXPORT_SYMBOL(ldc_rx_reset); 1485 1486 void __ldc_print(struct ldc_channel *lp, const char *caller) 1487 { 1488 pr_info("%s: id=0x%lx flags=0x%x state=%s cstate=0x%lx hsstate=0x%x\n" 1489 "\trx_h=0x%lx rx_t=0x%lx rx_n=%ld\n" 1490 "\ttx_h=0x%lx tx_t=0x%lx tx_n=%ld\n" 1491 "\trcv_nxt=%u snd_nxt=%u\n", 1492 caller, lp->id, lp->flags, state_to_str(lp->state), 1493 lp->chan_state, lp->hs_state, 1494 lp->rx_head, lp->rx_tail, lp->rx_num_entries, 1495 lp->tx_head, lp->tx_tail, lp->tx_num_entries, 1496 lp->rcv_nxt, lp->snd_nxt); 1497 } 1498 EXPORT_SYMBOL(__ldc_print); 1499 1500 static int write_raw(struct ldc_channel *lp, const void *buf, unsigned int size) 1501 { 1502 struct ldc_packet *p; 1503 unsigned long new_tail, hv_err; 1504 int err; 1505 1506 hv_err = sun4v_ldc_tx_get_state(lp->id, &lp->tx_head, &lp->tx_tail, 1507 &lp->chan_state); 1508 if (unlikely(hv_err)) 1509 return -EBUSY; 1510 1511 if (unlikely(lp->chan_state != LDC_CHANNEL_UP)) 1512 return LDC_ABORT(lp); 1513 1514 if (size > LDC_PACKET_SIZE) 1515 return -EMSGSIZE; 1516 1517 p = data_get_tx_packet(lp, &new_tail); 1518 if (!p) 1519 return -EAGAIN; 1520 1521 memcpy(p, buf, size); 1522 1523 err = send_tx_packet(lp, p, new_tail); 1524 if (!err) 1525 err = size; 1526 1527 return err; 1528 } 1529 1530 static int read_raw(struct ldc_channel *lp, void *buf, unsigned int size) 1531 { 1532 struct ldc_packet *p; 1533 unsigned long hv_err, new; 1534 int err; 1535 1536 if (size < LDC_PACKET_SIZE) 1537 return -EINVAL; 1538 1539 hv_err = sun4v_ldc_rx_get_state(lp->id, 1540 &lp->rx_head, 1541 &lp->rx_tail, 1542 &lp->chan_state); 1543 if (hv_err) 1544 return LDC_ABORT(lp); 1545 1546 if (lp->chan_state == LDC_CHANNEL_DOWN || 1547 lp->chan_state == LDC_CHANNEL_RESETTING) 1548 return -ECONNRESET; 1549 1550 if (lp->rx_head == lp->rx_tail) 1551 return 0; 1552 1553 p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE); 1554 memcpy(buf, p, LDC_PACKET_SIZE); 1555 1556 new = rx_advance(lp, lp->rx_head); 1557 lp->rx_head = new; 1558 1559 err = __set_rx_head(lp, new); 1560 if (err < 0) 1561 err = -ECONNRESET; 1562 else 1563 err = LDC_PACKET_SIZE; 1564 1565 return err; 1566 } 1567 1568 static const struct ldc_mode_ops raw_ops = { 1569 .write = write_raw, 1570 .read = read_raw, 1571 }; 1572 1573 static int write_nonraw(struct ldc_channel *lp, const void *buf, 1574 unsigned int size) 1575 { 1576 unsigned long hv_err, tail; 1577 unsigned int copied; 1578 u32 seq; 1579 int err; 1580 1581 hv_err = sun4v_ldc_tx_get_state(lp->id, &lp->tx_head, &lp->tx_tail, 1582 &lp->chan_state); 1583 if (unlikely(hv_err)) 1584 return -EBUSY; 1585 1586 if (unlikely(lp->chan_state != LDC_CHANNEL_UP)) 1587 return LDC_ABORT(lp); 1588 1589 if (!tx_has_space_for(lp, size)) 1590 return -EAGAIN; 1591 1592 seq = lp->snd_nxt; 1593 copied = 0; 1594 tail = lp->tx_tail; 1595 while (copied < size) { 1596 struct ldc_packet *p = lp->tx_base + (tail / LDC_PACKET_SIZE); 1597 u8 *data = ((lp->cfg.mode == LDC_MODE_UNRELIABLE) ? 1598 p->u.u_data : 1599 p->u.r.r_data); 1600 int data_len; 1601 1602 p->type = LDC_DATA; 1603 p->stype = LDC_INFO; 1604 p->ctrl = 0; 1605 1606 data_len = size - copied; 1607 if (data_len > lp->mss) 1608 data_len = lp->mss; 1609 1610 BUG_ON(data_len > LDC_LEN); 1611 1612 p->env = (data_len | 1613 (copied == 0 ? LDC_START : 0) | 1614 (data_len == size - copied ? LDC_STOP : 0)); 1615 1616 p->seqid = ++seq; 1617 1618 ldcdbg(DATA, "SENT DATA [%02x:%02x:%02x:%02x:%08x]\n", 1619 p->type, 1620 p->stype, 1621 p->ctrl, 1622 p->env, 1623 p->seqid); 1624 1625 memcpy(data, buf, data_len); 1626 buf += data_len; 1627 copied += data_len; 1628 1629 tail = tx_advance(lp, tail); 1630 } 1631 1632 err = set_tx_tail(lp, tail); 1633 if (!err) { 1634 lp->snd_nxt = seq; 1635 err = size; 1636 } 1637 1638 return err; 1639 } 1640 1641 static int rx_bad_seq(struct ldc_channel *lp, struct ldc_packet *p, 1642 struct ldc_packet *first_frag) 1643 { 1644 int err; 1645 1646 if (first_frag) 1647 lp->rcv_nxt = first_frag->seqid - 1; 1648 1649 err = send_data_nack(lp, p); 1650 if (err) 1651 return err; 1652 1653 err = ldc_rx_reset(lp); 1654 if (err < 0) 1655 return LDC_ABORT(lp); 1656 1657 return 0; 1658 } 1659 1660 static int data_ack_nack(struct ldc_channel *lp, struct ldc_packet *p) 1661 { 1662 if (p->stype & LDC_ACK) { 1663 int err = process_data_ack(lp, p); 1664 if (err) 1665 return err; 1666 } 1667 if (p->stype & LDC_NACK) 1668 return LDC_ABORT(lp); 1669 1670 return 0; 1671 } 1672 1673 static int rx_data_wait(struct ldc_channel *lp, unsigned long cur_head) 1674 { 1675 unsigned long dummy; 1676 int limit = 1000; 1677 1678 ldcdbg(DATA, "DATA WAIT cur_head[%lx] rx_head[%lx] rx_tail[%lx]\n", 1679 cur_head, lp->rx_head, lp->rx_tail); 1680 while (limit-- > 0) { 1681 unsigned long hv_err; 1682 1683 hv_err = sun4v_ldc_rx_get_state(lp->id, 1684 &dummy, 1685 &lp->rx_tail, 1686 &lp->chan_state); 1687 if (hv_err) 1688 return LDC_ABORT(lp); 1689 1690 if (lp->chan_state == LDC_CHANNEL_DOWN || 1691 lp->chan_state == LDC_CHANNEL_RESETTING) 1692 return -ECONNRESET; 1693 1694 if (cur_head != lp->rx_tail) { 1695 ldcdbg(DATA, "DATA WAIT DONE " 1696 "head[%lx] tail[%lx] chan_state[%lx]\n", 1697 dummy, lp->rx_tail, lp->chan_state); 1698 return 0; 1699 } 1700 1701 udelay(1); 1702 } 1703 return -EAGAIN; 1704 } 1705 1706 static int rx_set_head(struct ldc_channel *lp, unsigned long head) 1707 { 1708 int err = __set_rx_head(lp, head); 1709 1710 if (err < 0) 1711 return LDC_ABORT(lp); 1712 1713 lp->rx_head = head; 1714 return 0; 1715 } 1716 1717 static void send_data_ack(struct ldc_channel *lp) 1718 { 1719 unsigned long new_tail; 1720 struct ldc_packet *p; 1721 1722 p = data_get_tx_packet(lp, &new_tail); 1723 if (likely(p)) { 1724 int err; 1725 1726 memset(p, 0, sizeof(*p)); 1727 p->type = LDC_DATA; 1728 p->stype = LDC_ACK; 1729 p->ctrl = 0; 1730 p->seqid = lp->snd_nxt + 1; 1731 p->u.r.ackid = lp->rcv_nxt; 1732 1733 err = send_tx_packet(lp, p, new_tail); 1734 if (!err) 1735 lp->snd_nxt++; 1736 } 1737 } 1738 1739 static int read_nonraw(struct ldc_channel *lp, void *buf, unsigned int size) 1740 { 1741 struct ldc_packet *first_frag; 1742 unsigned long hv_err, new; 1743 int err, copied; 1744 1745 hv_err = sun4v_ldc_rx_get_state(lp->id, 1746 &lp->rx_head, 1747 &lp->rx_tail, 1748 &lp->chan_state); 1749 if (hv_err) 1750 return LDC_ABORT(lp); 1751 1752 if (lp->chan_state == LDC_CHANNEL_DOWN || 1753 lp->chan_state == LDC_CHANNEL_RESETTING) 1754 return -ECONNRESET; 1755 1756 if (lp->rx_head == lp->rx_tail) 1757 return 0; 1758 1759 first_frag = NULL; 1760 copied = err = 0; 1761 new = lp->rx_head; 1762 while (1) { 1763 struct ldc_packet *p; 1764 int pkt_len; 1765 1766 BUG_ON(new == lp->rx_tail); 1767 p = lp->rx_base + (new / LDC_PACKET_SIZE); 1768 1769 ldcdbg(RX, "RX read pkt[%02x:%02x:%02x:%02x:%08x:%08x] " 1770 "rcv_nxt[%08x]\n", 1771 p->type, 1772 p->stype, 1773 p->ctrl, 1774 p->env, 1775 p->seqid, 1776 p->u.r.ackid, 1777 lp->rcv_nxt); 1778 1779 if (unlikely(!rx_seq_ok(lp, p->seqid))) { 1780 err = rx_bad_seq(lp, p, first_frag); 1781 copied = 0; 1782 break; 1783 } 1784 1785 if (p->type & LDC_CTRL) { 1786 err = process_control_frame(lp, p); 1787 if (err < 0) 1788 break; 1789 err = 0; 1790 } 1791 1792 lp->rcv_nxt = p->seqid; 1793 1794 /* 1795 * If this is a control-only packet, there is nothing 1796 * else to do but advance the rx queue since the packet 1797 * was already processed above. 1798 */ 1799 if (!(p->type & LDC_DATA)) { 1800 new = rx_advance(lp, new); 1801 break; 1802 } 1803 if (p->stype & (LDC_ACK | LDC_NACK)) { 1804 err = data_ack_nack(lp, p); 1805 if (err) 1806 break; 1807 } 1808 if (!(p->stype & LDC_INFO)) { 1809 new = rx_advance(lp, new); 1810 err = rx_set_head(lp, new); 1811 if (err) 1812 break; 1813 goto no_data; 1814 } 1815 1816 pkt_len = p->env & LDC_LEN; 1817 1818 /* Every initial packet starts with the START bit set. 1819 * 1820 * Singleton packets will have both START+STOP set. 1821 * 1822 * Fragments will have START set in the first frame, STOP 1823 * set in the last frame, and neither bit set in middle 1824 * frames of the packet. 1825 * 1826 * Therefore if we are at the beginning of a packet and 1827 * we don't see START, or we are in the middle of a fragmented 1828 * packet and do see START, we are unsynchronized and should 1829 * flush the RX queue. 1830 */ 1831 if ((first_frag == NULL && !(p->env & LDC_START)) || 1832 (first_frag != NULL && (p->env & LDC_START))) { 1833 if (!first_frag) 1834 new = rx_advance(lp, new); 1835 1836 err = rx_set_head(lp, new); 1837 if (err) 1838 break; 1839 1840 if (!first_frag) 1841 goto no_data; 1842 } 1843 if (!first_frag) 1844 first_frag = p; 1845 1846 if (pkt_len > size - copied) { 1847 /* User didn't give us a big enough buffer, 1848 * what to do? This is a pretty serious error. 1849 * 1850 * Since we haven't updated the RX ring head to 1851 * consume any of the packets, signal the error 1852 * to the user and just leave the RX ring alone. 1853 * 1854 * This seems the best behavior because this allows 1855 * a user of the LDC layer to start with a small 1856 * RX buffer for ldc_read() calls and use -EMSGSIZE 1857 * as a cue to enlarge it's read buffer. 1858 */ 1859 err = -EMSGSIZE; 1860 break; 1861 } 1862 1863 /* Ok, we are gonna eat this one. */ 1864 new = rx_advance(lp, new); 1865 1866 memcpy(buf, 1867 (lp->cfg.mode == LDC_MODE_UNRELIABLE ? 1868 p->u.u_data : p->u.r.r_data), pkt_len); 1869 buf += pkt_len; 1870 copied += pkt_len; 1871 1872 if (p->env & LDC_STOP) 1873 break; 1874 1875 no_data: 1876 if (new == lp->rx_tail) { 1877 err = rx_data_wait(lp, new); 1878 if (err) 1879 break; 1880 } 1881 } 1882 1883 if (!err) 1884 err = rx_set_head(lp, new); 1885 1886 if (err && first_frag) 1887 lp->rcv_nxt = first_frag->seqid - 1; 1888 1889 if (!err) { 1890 err = copied; 1891 if (err > 0 && lp->cfg.mode != LDC_MODE_UNRELIABLE) 1892 send_data_ack(lp); 1893 } 1894 1895 return err; 1896 } 1897 1898 static const struct ldc_mode_ops nonraw_ops = { 1899 .write = write_nonraw, 1900 .read = read_nonraw, 1901 }; 1902 1903 static int write_stream(struct ldc_channel *lp, const void *buf, 1904 unsigned int size) 1905 { 1906 if (size > lp->cfg.mtu) 1907 size = lp->cfg.mtu; 1908 return write_nonraw(lp, buf, size); 1909 } 1910 1911 static int read_stream(struct ldc_channel *lp, void *buf, unsigned int size) 1912 { 1913 if (!lp->mssbuf_len) { 1914 int err = read_nonraw(lp, lp->mssbuf, lp->cfg.mtu); 1915 if (err < 0) 1916 return err; 1917 1918 lp->mssbuf_len = err; 1919 lp->mssbuf_off = 0; 1920 } 1921 1922 if (size > lp->mssbuf_len) 1923 size = lp->mssbuf_len; 1924 memcpy(buf, lp->mssbuf + lp->mssbuf_off, size); 1925 1926 lp->mssbuf_off += size; 1927 lp->mssbuf_len -= size; 1928 1929 return size; 1930 } 1931 1932 static const struct ldc_mode_ops stream_ops = { 1933 .write = write_stream, 1934 .read = read_stream, 1935 }; 1936 1937 int ldc_write(struct ldc_channel *lp, const void *buf, unsigned int size) 1938 { 1939 unsigned long flags; 1940 int err; 1941 1942 if (!buf) 1943 return -EINVAL; 1944 1945 if (!size) 1946 return 0; 1947 1948 spin_lock_irqsave(&lp->lock, flags); 1949 1950 if (lp->hs_state != LDC_HS_COMPLETE) 1951 err = -ENOTCONN; 1952 else 1953 err = lp->mops->write(lp, buf, size); 1954 1955 spin_unlock_irqrestore(&lp->lock, flags); 1956 1957 return err; 1958 } 1959 EXPORT_SYMBOL(ldc_write); 1960 1961 int ldc_read(struct ldc_channel *lp, void *buf, unsigned int size) 1962 { 1963 unsigned long flags; 1964 int err; 1965 1966 ldcdbg(RX, "%s: entered size=%d\n", __func__, size); 1967 1968 if (!buf) 1969 return -EINVAL; 1970 1971 if (!size) 1972 return 0; 1973 1974 spin_lock_irqsave(&lp->lock, flags); 1975 1976 if (lp->hs_state != LDC_HS_COMPLETE) 1977 err = -ENOTCONN; 1978 else 1979 err = lp->mops->read(lp, buf, size); 1980 1981 spin_unlock_irqrestore(&lp->lock, flags); 1982 1983 ldcdbg(RX, "%s: mode=%d, head=%lu, tail=%lu rv=%d\n", __func__, 1984 lp->cfg.mode, lp->rx_head, lp->rx_tail, err); 1985 1986 return err; 1987 } 1988 EXPORT_SYMBOL(ldc_read); 1989 1990 static u64 pagesize_code(void) 1991 { 1992 switch (PAGE_SIZE) { 1993 default: 1994 case (8ULL * 1024ULL): 1995 return 0; 1996 case (64ULL * 1024ULL): 1997 return 1; 1998 case (512ULL * 1024ULL): 1999 return 2; 2000 case (4ULL * 1024ULL * 1024ULL): 2001 return 3; 2002 case (32ULL * 1024ULL * 1024ULL): 2003 return 4; 2004 case (256ULL * 1024ULL * 1024ULL): 2005 return 5; 2006 } 2007 } 2008 2009 static u64 make_cookie(u64 index, u64 pgsz_code, u64 page_offset) 2010 { 2011 return ((pgsz_code << COOKIE_PGSZ_CODE_SHIFT) | 2012 (index << PAGE_SHIFT) | 2013 page_offset); 2014 } 2015 2016 2017 static struct ldc_mtable_entry *alloc_npages(struct ldc_iommu *iommu, 2018 unsigned long npages) 2019 { 2020 long entry; 2021 2022 entry = iommu_tbl_range_alloc(NULL, &iommu->iommu_map_table, 2023 npages, NULL, (unsigned long)-1, 0); 2024 if (unlikely(entry == IOMMU_ERROR_CODE)) 2025 return NULL; 2026 2027 return iommu->page_table + entry; 2028 } 2029 2030 static u64 perm_to_mte(unsigned int map_perm) 2031 { 2032 u64 mte_base; 2033 2034 mte_base = pagesize_code(); 2035 2036 if (map_perm & LDC_MAP_SHADOW) { 2037 if (map_perm & LDC_MAP_R) 2038 mte_base |= LDC_MTE_COPY_R; 2039 if (map_perm & LDC_MAP_W) 2040 mte_base |= LDC_MTE_COPY_W; 2041 } 2042 if (map_perm & LDC_MAP_DIRECT) { 2043 if (map_perm & LDC_MAP_R) 2044 mte_base |= LDC_MTE_READ; 2045 if (map_perm & LDC_MAP_W) 2046 mte_base |= LDC_MTE_WRITE; 2047 if (map_perm & LDC_MAP_X) 2048 mte_base |= LDC_MTE_EXEC; 2049 } 2050 if (map_perm & LDC_MAP_IO) { 2051 if (map_perm & LDC_MAP_R) 2052 mte_base |= LDC_MTE_IOMMU_R; 2053 if (map_perm & LDC_MAP_W) 2054 mte_base |= LDC_MTE_IOMMU_W; 2055 } 2056 2057 return mte_base; 2058 } 2059 2060 static int pages_in_region(unsigned long base, long len) 2061 { 2062 int count = 0; 2063 2064 do { 2065 unsigned long new = (base + PAGE_SIZE) & PAGE_MASK; 2066 2067 len -= (new - base); 2068 base = new; 2069 count++; 2070 } while (len > 0); 2071 2072 return count; 2073 } 2074 2075 struct cookie_state { 2076 struct ldc_mtable_entry *page_table; 2077 struct ldc_trans_cookie *cookies; 2078 u64 mte_base; 2079 u64 prev_cookie; 2080 u32 pte_idx; 2081 u32 nc; 2082 }; 2083 2084 static void fill_cookies(struct cookie_state *sp, unsigned long pa, 2085 unsigned long off, unsigned long len) 2086 { 2087 do { 2088 unsigned long tlen, new = pa + PAGE_SIZE; 2089 u64 this_cookie; 2090 2091 sp->page_table[sp->pte_idx].mte = sp->mte_base | pa; 2092 2093 tlen = PAGE_SIZE; 2094 if (off) 2095 tlen = PAGE_SIZE - off; 2096 if (tlen > len) 2097 tlen = len; 2098 2099 this_cookie = make_cookie(sp->pte_idx, 2100 pagesize_code(), off); 2101 2102 off = 0; 2103 2104 if (this_cookie == sp->prev_cookie) { 2105 sp->cookies[sp->nc - 1].cookie_size += tlen; 2106 } else { 2107 sp->cookies[sp->nc].cookie_addr = this_cookie; 2108 sp->cookies[sp->nc].cookie_size = tlen; 2109 sp->nc++; 2110 } 2111 sp->prev_cookie = this_cookie + tlen; 2112 2113 sp->pte_idx++; 2114 2115 len -= tlen; 2116 pa = new; 2117 } while (len > 0); 2118 } 2119 2120 static int sg_count_one(struct scatterlist *sg) 2121 { 2122 unsigned long base = page_to_pfn(sg_page(sg)) << PAGE_SHIFT; 2123 long len = sg->length; 2124 2125 if ((sg->offset | len) & (8UL - 1)) 2126 return -EFAULT; 2127 2128 return pages_in_region(base + sg->offset, len); 2129 } 2130 2131 static int sg_count_pages(struct scatterlist *sg, int num_sg) 2132 { 2133 int count; 2134 int i; 2135 2136 count = 0; 2137 for (i = 0; i < num_sg; i++) { 2138 int err = sg_count_one(sg + i); 2139 if (err < 0) 2140 return err; 2141 count += err; 2142 } 2143 2144 return count; 2145 } 2146 2147 int ldc_map_sg(struct ldc_channel *lp, 2148 struct scatterlist *sg, int num_sg, 2149 struct ldc_trans_cookie *cookies, int ncookies, 2150 unsigned int map_perm) 2151 { 2152 unsigned long i, npages; 2153 struct ldc_mtable_entry *base; 2154 struct cookie_state state; 2155 struct ldc_iommu *iommu; 2156 int err; 2157 struct scatterlist *s; 2158 2159 if (map_perm & ~LDC_MAP_ALL) 2160 return -EINVAL; 2161 2162 err = sg_count_pages(sg, num_sg); 2163 if (err < 0) 2164 return err; 2165 2166 npages = err; 2167 if (err > ncookies) 2168 return -EMSGSIZE; 2169 2170 iommu = &lp->iommu; 2171 2172 base = alloc_npages(iommu, npages); 2173 2174 if (!base) 2175 return -ENOMEM; 2176 2177 state.page_table = iommu->page_table; 2178 state.cookies = cookies; 2179 state.mte_base = perm_to_mte(map_perm); 2180 state.prev_cookie = ~(u64)0; 2181 state.pte_idx = (base - iommu->page_table); 2182 state.nc = 0; 2183 2184 for_each_sg(sg, s, num_sg, i) { 2185 fill_cookies(&state, page_to_pfn(sg_page(s)) << PAGE_SHIFT, 2186 s->offset, s->length); 2187 } 2188 2189 return state.nc; 2190 } 2191 EXPORT_SYMBOL(ldc_map_sg); 2192 2193 int ldc_map_single(struct ldc_channel *lp, 2194 void *buf, unsigned int len, 2195 struct ldc_trans_cookie *cookies, int ncookies, 2196 unsigned int map_perm) 2197 { 2198 unsigned long npages, pa; 2199 struct ldc_mtable_entry *base; 2200 struct cookie_state state; 2201 struct ldc_iommu *iommu; 2202 2203 if ((map_perm & ~LDC_MAP_ALL) || (ncookies < 1)) 2204 return -EINVAL; 2205 2206 pa = __pa(buf); 2207 if ((pa | len) & (8UL - 1)) 2208 return -EFAULT; 2209 2210 npages = pages_in_region(pa, len); 2211 2212 iommu = &lp->iommu; 2213 2214 base = alloc_npages(iommu, npages); 2215 2216 if (!base) 2217 return -ENOMEM; 2218 2219 state.page_table = iommu->page_table; 2220 state.cookies = cookies; 2221 state.mte_base = perm_to_mte(map_perm); 2222 state.prev_cookie = ~(u64)0; 2223 state.pte_idx = (base - iommu->page_table); 2224 state.nc = 0; 2225 fill_cookies(&state, (pa & PAGE_MASK), (pa & ~PAGE_MASK), len); 2226 BUG_ON(state.nc > ncookies); 2227 2228 return state.nc; 2229 } 2230 EXPORT_SYMBOL(ldc_map_single); 2231 2232 2233 static void free_npages(unsigned long id, struct ldc_iommu *iommu, 2234 u64 cookie, u64 size) 2235 { 2236 unsigned long npages, entry; 2237 2238 npages = PAGE_ALIGN(((cookie & ~PAGE_MASK) + size)) >> PAGE_SHIFT; 2239 2240 entry = ldc_cookie_to_index(cookie, iommu); 2241 ldc_demap(iommu, id, cookie, entry, npages); 2242 iommu_tbl_range_free(&iommu->iommu_map_table, cookie, npages, entry); 2243 } 2244 2245 void ldc_unmap(struct ldc_channel *lp, struct ldc_trans_cookie *cookies, 2246 int ncookies) 2247 { 2248 struct ldc_iommu *iommu = &lp->iommu; 2249 int i; 2250 unsigned long flags; 2251 2252 spin_lock_irqsave(&iommu->lock, flags); 2253 for (i = 0; i < ncookies; i++) { 2254 u64 addr = cookies[i].cookie_addr; 2255 u64 size = cookies[i].cookie_size; 2256 2257 free_npages(lp->id, iommu, addr, size); 2258 } 2259 spin_unlock_irqrestore(&iommu->lock, flags); 2260 } 2261 EXPORT_SYMBOL(ldc_unmap); 2262 2263 int ldc_copy(struct ldc_channel *lp, int copy_dir, 2264 void *buf, unsigned int len, unsigned long offset, 2265 struct ldc_trans_cookie *cookies, int ncookies) 2266 { 2267 unsigned int orig_len; 2268 unsigned long ra; 2269 int i; 2270 2271 if (copy_dir != LDC_COPY_IN && copy_dir != LDC_COPY_OUT) { 2272 printk(KERN_ERR PFX "ldc_copy: ID[%lu] Bad copy_dir[%d]\n", 2273 lp->id, copy_dir); 2274 return -EINVAL; 2275 } 2276 2277 ra = __pa(buf); 2278 if ((ra | len | offset) & (8UL - 1)) { 2279 printk(KERN_ERR PFX "ldc_copy: ID[%lu] Unaligned buffer " 2280 "ra[%lx] len[%x] offset[%lx]\n", 2281 lp->id, ra, len, offset); 2282 return -EFAULT; 2283 } 2284 2285 if (lp->hs_state != LDC_HS_COMPLETE || 2286 (lp->flags & LDC_FLAG_RESET)) { 2287 printk(KERN_ERR PFX "ldc_copy: ID[%lu] Link down hs_state[%x] " 2288 "flags[%x]\n", lp->id, lp->hs_state, lp->flags); 2289 return -ECONNRESET; 2290 } 2291 2292 orig_len = len; 2293 for (i = 0; i < ncookies; i++) { 2294 unsigned long cookie_raddr = cookies[i].cookie_addr; 2295 unsigned long this_len = cookies[i].cookie_size; 2296 unsigned long actual_len; 2297 2298 if (unlikely(offset)) { 2299 unsigned long this_off = offset; 2300 2301 if (this_off > this_len) 2302 this_off = this_len; 2303 2304 offset -= this_off; 2305 this_len -= this_off; 2306 if (!this_len) 2307 continue; 2308 cookie_raddr += this_off; 2309 } 2310 2311 if (this_len > len) 2312 this_len = len; 2313 2314 while (1) { 2315 unsigned long hv_err; 2316 2317 hv_err = sun4v_ldc_copy(lp->id, copy_dir, 2318 cookie_raddr, ra, 2319 this_len, &actual_len); 2320 if (unlikely(hv_err)) { 2321 printk(KERN_ERR PFX "ldc_copy: ID[%lu] " 2322 "HV error %lu\n", 2323 lp->id, hv_err); 2324 if (lp->hs_state != LDC_HS_COMPLETE || 2325 (lp->flags & LDC_FLAG_RESET)) 2326 return -ECONNRESET; 2327 else 2328 return -EFAULT; 2329 } 2330 2331 cookie_raddr += actual_len; 2332 ra += actual_len; 2333 len -= actual_len; 2334 if (actual_len == this_len) 2335 break; 2336 2337 this_len -= actual_len; 2338 } 2339 2340 if (!len) 2341 break; 2342 } 2343 2344 /* It is caller policy what to do about short copies. 2345 * For example, a networking driver can declare the 2346 * packet a runt and drop it. 2347 */ 2348 2349 return orig_len - len; 2350 } 2351 EXPORT_SYMBOL(ldc_copy); 2352 2353 void *ldc_alloc_exp_dring(struct ldc_channel *lp, unsigned int len, 2354 struct ldc_trans_cookie *cookies, int *ncookies, 2355 unsigned int map_perm) 2356 { 2357 void *buf; 2358 int err; 2359 2360 if (len & (8UL - 1)) 2361 return ERR_PTR(-EINVAL); 2362 2363 buf = kzalloc(len, GFP_ATOMIC); 2364 if (!buf) 2365 return ERR_PTR(-ENOMEM); 2366 2367 err = ldc_map_single(lp, buf, len, cookies, *ncookies, map_perm); 2368 if (err < 0) { 2369 kfree(buf); 2370 return ERR_PTR(err); 2371 } 2372 *ncookies = err; 2373 2374 return buf; 2375 } 2376 EXPORT_SYMBOL(ldc_alloc_exp_dring); 2377 2378 void ldc_free_exp_dring(struct ldc_channel *lp, void *buf, unsigned int len, 2379 struct ldc_trans_cookie *cookies, int ncookies) 2380 { 2381 ldc_unmap(lp, cookies, ncookies); 2382 kfree(buf); 2383 } 2384 EXPORT_SYMBOL(ldc_free_exp_dring); 2385 2386 static int __init ldc_init(void) 2387 { 2388 unsigned long major, minor; 2389 struct mdesc_handle *hp; 2390 const u64 *v; 2391 int err; 2392 u64 mp; 2393 2394 hp = mdesc_grab(); 2395 if (!hp) 2396 return -ENODEV; 2397 2398 mp = mdesc_node_by_name(hp, MDESC_NODE_NULL, "platform"); 2399 err = -ENODEV; 2400 if (mp == MDESC_NODE_NULL) 2401 goto out; 2402 2403 v = mdesc_get_property(hp, mp, "domaining-enabled", NULL); 2404 if (!v) 2405 goto out; 2406 2407 major = 1; 2408 minor = 0; 2409 if (sun4v_hvapi_register(HV_GRP_LDOM, major, &minor)) { 2410 printk(KERN_INFO PFX "Could not register LDOM hvapi.\n"); 2411 goto out; 2412 } 2413 2414 printk(KERN_INFO "%s", version); 2415 2416 if (!*v) { 2417 printk(KERN_INFO PFX "Domaining disabled.\n"); 2418 goto out; 2419 } 2420 ldom_domaining_enabled = 1; 2421 err = 0; 2422 2423 out: 2424 mdesc_release(hp); 2425 return err; 2426 } 2427 2428 core_initcall(ldc_init); 2429