1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * ACRN_HSM: Handle I/O requests 4 * 5 * Copyright (C) 2020 Intel Corporation. All rights reserved. 6 * 7 * Authors: 8 * Jason Chen CJ <jason.cj.chen@intel.com> 9 * Fengwei Yin <fengwei.yin@intel.com> 10 */ 11 12 #include <linux/interrupt.h> 13 #include <linux/io.h> 14 #include <linux/kthread.h> 15 #include <linux/mm.h> 16 #include <linux/slab.h> 17 18 #include <asm/acrn.h> 19 20 #include "acrn_drv.h" 21 22 static void ioreq_pause(void); 23 static void ioreq_resume(void); 24 25 static void ioreq_dispatcher(struct work_struct *work); 26 static struct workqueue_struct *ioreq_wq; 27 static DECLARE_WORK(ioreq_work, ioreq_dispatcher); 28 29 static inline bool has_pending_request(struct acrn_ioreq_client *client) 30 { 31 return !bitmap_empty(client->ioreqs_map, ACRN_IO_REQUEST_MAX); 32 } 33 34 static inline bool is_destroying(struct acrn_ioreq_client *client) 35 { 36 return test_bit(ACRN_IOREQ_CLIENT_DESTROYING, &client->flags); 37 } 38 39 static int ioreq_complete_request(struct acrn_vm *vm, u16 vcpu, 40 struct acrn_io_request *acrn_req) 41 { 42 bool polling_mode; 43 int ret = 0; 44 45 polling_mode = acrn_req->completion_polling; 46 /* Add barrier() to make sure the writes are done before completion */ 47 smp_store_release(&acrn_req->processed, ACRN_IOREQ_STATE_COMPLETE); 48 49 /* 50 * To fulfill the requirement of real-time in several industry 51 * scenarios, like automotive, ACRN can run under the partition mode, 52 * in which User VMs and Service VM are bound to dedicated CPU cores. 53 * Polling mode of handling the I/O request is introduced to achieve a 54 * faster I/O request handling. In polling mode, the hypervisor polls 55 * I/O request's completion. Once an I/O request is marked as 56 * ACRN_IOREQ_STATE_COMPLETE, hypervisor resumes from the polling point 57 * to continue the I/O request flow. Thus, the completion notification 58 * from HSM of I/O request is not needed. Please note, 59 * completion_polling needs to be read before the I/O request being 60 * marked as ACRN_IOREQ_STATE_COMPLETE to avoid racing with the 61 * hypervisor. 62 */ 63 if (!polling_mode) { 64 ret = hcall_notify_req_finish(vm->vmid, vcpu); 65 if (ret < 0) 66 dev_err(acrn_dev.this_device, 67 "Notify I/O request finished failed!\n"); 68 } 69 70 return ret; 71 } 72 73 static int acrn_ioreq_complete_request(struct acrn_ioreq_client *client, 74 u16 vcpu, 75 struct acrn_io_request *acrn_req) 76 { 77 int ret; 78 79 if (vcpu >= client->vm->vcpu_num) 80 return -EINVAL; 81 82 clear_bit(vcpu, client->ioreqs_map); 83 if (!acrn_req) { 84 acrn_req = (struct acrn_io_request *)client->vm->ioreq_buf; 85 acrn_req += vcpu; 86 } 87 88 ret = ioreq_complete_request(client->vm, vcpu, acrn_req); 89 90 return ret; 91 } 92 93 int acrn_ioreq_request_default_complete(struct acrn_vm *vm, u16 vcpu) 94 { 95 int ret = 0; 96 97 spin_lock_bh(&vm->ioreq_clients_lock); 98 if (vm->default_client) 99 ret = acrn_ioreq_complete_request(vm->default_client, 100 vcpu, NULL); 101 spin_unlock_bh(&vm->ioreq_clients_lock); 102 103 return ret; 104 } 105 106 /** 107 * acrn_ioreq_range_add() - Add an iorange monitored by an ioreq client 108 * @client: The ioreq client 109 * @type: Type (ACRN_IOREQ_TYPE_MMIO or ACRN_IOREQ_TYPE_PORTIO) 110 * @start: Start address of iorange 111 * @end: End address of iorange 112 * 113 * Return: 0 on success, <0 on error 114 */ 115 int acrn_ioreq_range_add(struct acrn_ioreq_client *client, 116 u32 type, u64 start, u64 end) 117 { 118 struct acrn_ioreq_range *range; 119 120 if (end < start) { 121 dev_err(acrn_dev.this_device, 122 "Invalid IO range [0x%llx,0x%llx]\n", start, end); 123 return -EINVAL; 124 } 125 126 range = kzalloc(sizeof(*range), GFP_KERNEL); 127 if (!range) 128 return -ENOMEM; 129 130 range->type = type; 131 range->start = start; 132 range->end = end; 133 134 write_lock_bh(&client->range_lock); 135 list_add(&range->list, &client->range_list); 136 write_unlock_bh(&client->range_lock); 137 138 return 0; 139 } 140 141 /** 142 * acrn_ioreq_range_del() - Del an iorange monitored by an ioreq client 143 * @client: The ioreq client 144 * @type: Type (ACRN_IOREQ_TYPE_MMIO or ACRN_IOREQ_TYPE_PORTIO) 145 * @start: Start address of iorange 146 * @end: End address of iorange 147 */ 148 void acrn_ioreq_range_del(struct acrn_ioreq_client *client, 149 u32 type, u64 start, u64 end) 150 { 151 struct acrn_ioreq_range *range; 152 153 write_lock_bh(&client->range_lock); 154 list_for_each_entry(range, &client->range_list, list) { 155 if (type == range->type && 156 start == range->start && 157 end == range->end) { 158 list_del(&range->list); 159 kfree(range); 160 break; 161 } 162 } 163 write_unlock_bh(&client->range_lock); 164 } 165 166 /* 167 * ioreq_task() is the execution entity of handler thread of an I/O client. 168 * The handler callback of the I/O client is called within the handler thread. 169 */ 170 static int ioreq_task(void *data) 171 { 172 struct acrn_ioreq_client *client = data; 173 struct acrn_io_request *req; 174 unsigned long *ioreqs_map; 175 int vcpu, ret; 176 177 /* 178 * Lockless access to ioreqs_map is safe, because 179 * 1) set_bit() and clear_bit() are atomic operations. 180 * 2) I/O requests arrives serialized. The access flow of ioreqs_map is: 181 * set_bit() - in ioreq_work handler 182 * Handler callback handles corresponding I/O request 183 * clear_bit() - in handler thread (include ACRN userspace) 184 * Mark corresponding I/O request completed 185 * Loop again if a new I/O request occurs 186 */ 187 ioreqs_map = client->ioreqs_map; 188 while (!kthread_should_stop()) { 189 acrn_ioreq_client_wait(client); 190 while (has_pending_request(client)) { 191 vcpu = find_first_bit(ioreqs_map, client->vm->vcpu_num); 192 req = client->vm->ioreq_buf->req_slot + vcpu; 193 ret = client->handler(client, req); 194 if (ret < 0) { 195 dev_err(acrn_dev.this_device, 196 "IO handle failure: %d\n", ret); 197 break; 198 } 199 acrn_ioreq_complete_request(client, vcpu, req); 200 } 201 } 202 203 return 0; 204 } 205 206 /* 207 * For the non-default I/O clients, give them chance to complete the current 208 * I/O requests if there are any. For the default I/O client, it is safe to 209 * clear all pending I/O requests because the clearing request is from ACRN 210 * userspace. 211 */ 212 void acrn_ioreq_request_clear(struct acrn_vm *vm) 213 { 214 struct acrn_ioreq_client *client; 215 bool has_pending = false; 216 unsigned long vcpu; 217 int retry = 10; 218 219 /* 220 * IO requests of this VM will be completed directly in 221 * acrn_ioreq_dispatch if ACRN_VM_FLAG_CLEARING_IOREQ flag is set. 222 */ 223 set_bit(ACRN_VM_FLAG_CLEARING_IOREQ, &vm->flags); 224 225 /* 226 * acrn_ioreq_request_clear is only called in VM reset case. Simply 227 * wait 100ms in total for the IO requests' completion. 228 */ 229 do { 230 spin_lock_bh(&vm->ioreq_clients_lock); 231 list_for_each_entry(client, &vm->ioreq_clients, list) { 232 has_pending = has_pending_request(client); 233 if (has_pending) 234 break; 235 } 236 spin_unlock_bh(&vm->ioreq_clients_lock); 237 238 if (has_pending) 239 schedule_timeout_interruptible(HZ / 100); 240 } while (has_pending && --retry > 0); 241 if (retry == 0) 242 dev_warn(acrn_dev.this_device, 243 "%s cannot flush pending request!\n", client->name); 244 245 /* Clear all ioreqs belonging to the default client */ 246 spin_lock_bh(&vm->ioreq_clients_lock); 247 client = vm->default_client; 248 if (client) { 249 vcpu = find_next_bit(client->ioreqs_map, 250 ACRN_IO_REQUEST_MAX, 0); 251 while (vcpu < ACRN_IO_REQUEST_MAX) { 252 acrn_ioreq_complete_request(client, vcpu, NULL); 253 vcpu = find_next_bit(client->ioreqs_map, 254 ACRN_IO_REQUEST_MAX, vcpu + 1); 255 } 256 } 257 spin_unlock_bh(&vm->ioreq_clients_lock); 258 259 /* Clear ACRN_VM_FLAG_CLEARING_IOREQ flag after the clearing */ 260 clear_bit(ACRN_VM_FLAG_CLEARING_IOREQ, &vm->flags); 261 } 262 263 int acrn_ioreq_client_wait(struct acrn_ioreq_client *client) 264 { 265 if (client->is_default) { 266 /* 267 * In the default client, a user space thread waits on the 268 * waitqueue. The is_destroying() check is used to notify user 269 * space the client is going to be destroyed. 270 */ 271 wait_event_interruptible(client->wq, 272 has_pending_request(client) || 273 is_destroying(client)); 274 if (is_destroying(client)) 275 return -ENODEV; 276 } else { 277 wait_event_interruptible(client->wq, 278 has_pending_request(client) || 279 kthread_should_stop()); 280 } 281 282 return 0; 283 } 284 285 static bool is_cfg_addr(struct acrn_io_request *req) 286 { 287 return ((req->type == ACRN_IOREQ_TYPE_PORTIO) && 288 (req->reqs.pio_request.address == 0xcf8)); 289 } 290 291 static bool is_cfg_data(struct acrn_io_request *req) 292 { 293 return ((req->type == ACRN_IOREQ_TYPE_PORTIO) && 294 ((req->reqs.pio_request.address >= 0xcfc) && 295 (req->reqs.pio_request.address < (0xcfc + 4)))); 296 } 297 298 /* The low 8-bit of supported pci_reg addr.*/ 299 #define PCI_LOWREG_MASK 0xFC 300 /* The high 4-bit of supported pci_reg addr */ 301 #define PCI_HIGHREG_MASK 0xF00 302 /* Max number of supported functions */ 303 #define PCI_FUNCMAX 7 304 /* Max number of supported slots */ 305 #define PCI_SLOTMAX 31 306 /* Max number of supported buses */ 307 #define PCI_BUSMAX 255 308 #define CONF1_ENABLE 0x80000000UL 309 /* 310 * A PCI configuration space access via PIO 0xCF8 and 0xCFC normally has two 311 * following steps: 312 * 1) writes address into 0xCF8 port 313 * 2) accesses data in/from 0xCFC 314 * This function combines such paired PCI configuration space I/O requests into 315 * one ACRN_IOREQ_TYPE_PCICFG type I/O request and continues the processing. 316 */ 317 static bool handle_cf8cfc(struct acrn_vm *vm, 318 struct acrn_io_request *req, u16 vcpu) 319 { 320 int offset, pci_cfg_addr, pci_reg; 321 bool is_handled = false; 322 323 if (is_cfg_addr(req)) { 324 WARN_ON(req->reqs.pio_request.size != 4); 325 if (req->reqs.pio_request.direction == ACRN_IOREQ_DIR_WRITE) 326 vm->pci_conf_addr = req->reqs.pio_request.value; 327 else 328 req->reqs.pio_request.value = vm->pci_conf_addr; 329 is_handled = true; 330 } else if (is_cfg_data(req)) { 331 if (!(vm->pci_conf_addr & CONF1_ENABLE)) { 332 if (req->reqs.pio_request.direction == 333 ACRN_IOREQ_DIR_READ) 334 req->reqs.pio_request.value = 0xffffffff; 335 is_handled = true; 336 } else { 337 offset = req->reqs.pio_request.address - 0xcfc; 338 339 req->type = ACRN_IOREQ_TYPE_PCICFG; 340 pci_cfg_addr = vm->pci_conf_addr; 341 req->reqs.pci_request.bus = 342 (pci_cfg_addr >> 16) & PCI_BUSMAX; 343 req->reqs.pci_request.dev = 344 (pci_cfg_addr >> 11) & PCI_SLOTMAX; 345 req->reqs.pci_request.func = 346 (pci_cfg_addr >> 8) & PCI_FUNCMAX; 347 pci_reg = (pci_cfg_addr & PCI_LOWREG_MASK) + 348 ((pci_cfg_addr >> 16) & PCI_HIGHREG_MASK); 349 req->reqs.pci_request.reg = pci_reg + offset; 350 } 351 } 352 353 if (is_handled) 354 ioreq_complete_request(vm, vcpu, req); 355 356 return is_handled; 357 } 358 359 static bool in_range(struct acrn_ioreq_range *range, 360 struct acrn_io_request *req) 361 { 362 bool ret = false; 363 364 if (range->type == req->type) { 365 switch (req->type) { 366 case ACRN_IOREQ_TYPE_MMIO: 367 if (req->reqs.mmio_request.address >= range->start && 368 (req->reqs.mmio_request.address + 369 req->reqs.mmio_request.size - 1) <= range->end) 370 ret = true; 371 break; 372 case ACRN_IOREQ_TYPE_PORTIO: 373 if (req->reqs.pio_request.address >= range->start && 374 (req->reqs.pio_request.address + 375 req->reqs.pio_request.size - 1) <= range->end) 376 ret = true; 377 break; 378 default: 379 break; 380 } 381 } 382 383 return ret; 384 } 385 386 static struct acrn_ioreq_client *find_ioreq_client(struct acrn_vm *vm, 387 struct acrn_io_request *req) 388 { 389 struct acrn_ioreq_client *client, *found = NULL; 390 struct acrn_ioreq_range *range; 391 392 lockdep_assert_held(&vm->ioreq_clients_lock); 393 394 list_for_each_entry(client, &vm->ioreq_clients, list) { 395 read_lock_bh(&client->range_lock); 396 list_for_each_entry(range, &client->range_list, list) { 397 if (in_range(range, req)) { 398 found = client; 399 break; 400 } 401 } 402 read_unlock_bh(&client->range_lock); 403 if (found) 404 break; 405 } 406 return found ? found : vm->default_client; 407 } 408 409 /** 410 * acrn_ioreq_client_create() - Create an ioreq client 411 * @vm: The VM that this client belongs to 412 * @handler: The ioreq_handler of ioreq client acrn_hsm will create a kernel 413 * thread and call the handler to handle I/O requests. 414 * @priv: Private data for the handler 415 * @is_default: If it is the default client 416 * @name: The name of ioreq client 417 * 418 * Return: acrn_ioreq_client pointer on success, NULL on error 419 */ 420 struct acrn_ioreq_client *acrn_ioreq_client_create(struct acrn_vm *vm, 421 ioreq_handler_t handler, 422 void *priv, bool is_default, 423 const char *name) 424 { 425 struct acrn_ioreq_client *client; 426 427 if (!handler && !is_default) { 428 dev_dbg(acrn_dev.this_device, 429 "Cannot create non-default client w/o handler!\n"); 430 return NULL; 431 } 432 client = kzalloc(sizeof(*client), GFP_KERNEL); 433 if (!client) 434 return NULL; 435 436 client->handler = handler; 437 client->vm = vm; 438 client->priv = priv; 439 client->is_default = is_default; 440 if (name) 441 strncpy(client->name, name, sizeof(client->name) - 1); 442 rwlock_init(&client->range_lock); 443 INIT_LIST_HEAD(&client->range_list); 444 init_waitqueue_head(&client->wq); 445 446 if (client->handler) { 447 client->thread = kthread_run(ioreq_task, client, "VM%u-%s", 448 client->vm->vmid, client->name); 449 if (IS_ERR(client->thread)) { 450 kfree(client); 451 return NULL; 452 } 453 } 454 455 spin_lock_bh(&vm->ioreq_clients_lock); 456 if (is_default) 457 vm->default_client = client; 458 else 459 list_add(&client->list, &vm->ioreq_clients); 460 spin_unlock_bh(&vm->ioreq_clients_lock); 461 462 dev_dbg(acrn_dev.this_device, "Created ioreq client %s.\n", name); 463 return client; 464 } 465 466 /** 467 * acrn_ioreq_client_destroy() - Destroy an ioreq client 468 * @client: The ioreq client 469 */ 470 void acrn_ioreq_client_destroy(struct acrn_ioreq_client *client) 471 { 472 struct acrn_ioreq_range *range, *next; 473 struct acrn_vm *vm = client->vm; 474 475 dev_dbg(acrn_dev.this_device, 476 "Destroy ioreq client %s.\n", client->name); 477 ioreq_pause(); 478 set_bit(ACRN_IOREQ_CLIENT_DESTROYING, &client->flags); 479 if (client->is_default) 480 wake_up_interruptible(&client->wq); 481 else 482 kthread_stop(client->thread); 483 484 spin_lock_bh(&vm->ioreq_clients_lock); 485 if (client->is_default) 486 vm->default_client = NULL; 487 else 488 list_del(&client->list); 489 spin_unlock_bh(&vm->ioreq_clients_lock); 490 491 write_lock_bh(&client->range_lock); 492 list_for_each_entry_safe(range, next, &client->range_list, list) { 493 list_del(&range->list); 494 kfree(range); 495 } 496 write_unlock_bh(&client->range_lock); 497 kfree(client); 498 499 ioreq_resume(); 500 } 501 502 static int acrn_ioreq_dispatch(struct acrn_vm *vm) 503 { 504 struct acrn_ioreq_client *client; 505 struct acrn_io_request *req; 506 int i; 507 508 for (i = 0; i < vm->vcpu_num; i++) { 509 req = vm->ioreq_buf->req_slot + i; 510 511 /* barrier the read of processed of acrn_io_request */ 512 if (smp_load_acquire(&req->processed) == 513 ACRN_IOREQ_STATE_PENDING) { 514 /* Complete the IO request directly in clearing stage */ 515 if (test_bit(ACRN_VM_FLAG_CLEARING_IOREQ, &vm->flags)) { 516 ioreq_complete_request(vm, i, req); 517 continue; 518 } 519 if (handle_cf8cfc(vm, req, i)) 520 continue; 521 522 spin_lock_bh(&vm->ioreq_clients_lock); 523 client = find_ioreq_client(vm, req); 524 if (!client) { 525 dev_err(acrn_dev.this_device, 526 "Failed to find ioreq client!\n"); 527 spin_unlock_bh(&vm->ioreq_clients_lock); 528 return -EINVAL; 529 } 530 if (!client->is_default) 531 req->kernel_handled = 1; 532 else 533 req->kernel_handled = 0; 534 /* 535 * Add barrier() to make sure the writes are done 536 * before setting ACRN_IOREQ_STATE_PROCESSING 537 */ 538 smp_store_release(&req->processed, 539 ACRN_IOREQ_STATE_PROCESSING); 540 set_bit(i, client->ioreqs_map); 541 wake_up_interruptible(&client->wq); 542 spin_unlock_bh(&vm->ioreq_clients_lock); 543 } 544 } 545 546 return 0; 547 } 548 549 static void ioreq_dispatcher(struct work_struct *work) 550 { 551 struct acrn_vm *vm; 552 553 read_lock(&acrn_vm_list_lock); 554 list_for_each_entry(vm, &acrn_vm_list, list) { 555 if (!vm->ioreq_buf) 556 break; 557 acrn_ioreq_dispatch(vm); 558 } 559 read_unlock(&acrn_vm_list_lock); 560 } 561 562 static void ioreq_intr_handler(void) 563 { 564 queue_work(ioreq_wq, &ioreq_work); 565 } 566 567 static void ioreq_pause(void) 568 { 569 /* Flush and unarm the handler to ensure no I/O requests pending */ 570 acrn_remove_intr_handler(); 571 drain_workqueue(ioreq_wq); 572 } 573 574 static void ioreq_resume(void) 575 { 576 /* Schedule after enabling in case other clients miss interrupt */ 577 acrn_setup_intr_handler(ioreq_intr_handler); 578 queue_work(ioreq_wq, &ioreq_work); 579 } 580 581 int acrn_ioreq_intr_setup(void) 582 { 583 acrn_setup_intr_handler(ioreq_intr_handler); 584 ioreq_wq = alloc_workqueue("ioreq_wq", 585 WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_UNBOUND, 1); 586 if (!ioreq_wq) { 587 dev_err(acrn_dev.this_device, "Failed to alloc workqueue!\n"); 588 acrn_remove_intr_handler(); 589 return -ENOMEM; 590 } 591 return 0; 592 } 593 594 void acrn_ioreq_intr_remove(void) 595 { 596 if (ioreq_wq) 597 destroy_workqueue(ioreq_wq); 598 acrn_remove_intr_handler(); 599 } 600 601 int acrn_ioreq_init(struct acrn_vm *vm, u64 buf_vma) 602 { 603 struct acrn_ioreq_buffer *set_buffer; 604 struct page *page; 605 int ret; 606 607 if (vm->ioreq_buf) 608 return -EEXIST; 609 610 set_buffer = kzalloc(sizeof(*set_buffer), GFP_KERNEL); 611 if (!set_buffer) 612 return -ENOMEM; 613 614 ret = pin_user_pages_fast(buf_vma, 1, 615 FOLL_WRITE | FOLL_LONGTERM, &page); 616 if (unlikely(ret != 1) || !page) { 617 dev_err(acrn_dev.this_device, "Failed to pin ioreq page!\n"); 618 ret = -EFAULT; 619 goto free_buf; 620 } 621 622 vm->ioreq_buf = page_address(page); 623 vm->ioreq_page = page; 624 set_buffer->ioreq_buf = page_to_phys(page); 625 ret = hcall_set_ioreq_buffer(vm->vmid, virt_to_phys(set_buffer)); 626 if (ret < 0) { 627 dev_err(acrn_dev.this_device, "Failed to init ioreq buffer!\n"); 628 unpin_user_page(page); 629 vm->ioreq_buf = NULL; 630 goto free_buf; 631 } 632 633 dev_dbg(acrn_dev.this_device, 634 "Init ioreq buffer %pK!\n", vm->ioreq_buf); 635 ret = 0; 636 free_buf: 637 kfree(set_buffer); 638 return ret; 639 } 640 641 void acrn_ioreq_deinit(struct acrn_vm *vm) 642 { 643 struct acrn_ioreq_client *client, *next; 644 645 dev_dbg(acrn_dev.this_device, 646 "Deinit ioreq buffer %pK!\n", vm->ioreq_buf); 647 /* Destroy all clients belonging to this VM */ 648 list_for_each_entry_safe(client, next, &vm->ioreq_clients, list) 649 acrn_ioreq_client_destroy(client); 650 if (vm->default_client) 651 acrn_ioreq_client_destroy(vm->default_client); 652 653 if (vm->ioreq_buf && vm->ioreq_page) { 654 unpin_user_page(vm->ioreq_page); 655 vm->ioreq_buf = NULL; 656 } 657 } 658