1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright(c) 2019 Intel Corporation. All rights rsvd. */ 3 #include <linux/init.h> 4 #include <linux/kernel.h> 5 #include <linux/module.h> 6 #include <linux/pci.h> 7 #include <linux/io-64-nonatomic-lo-hi.h> 8 #include <linux/dmaengine.h> 9 #include <linux/delay.h> 10 #include <uapi/linux/idxd.h> 11 #include "../dmaengine.h" 12 #include "idxd.h" 13 #include "registers.h" 14 15 enum irq_work_type { 16 IRQ_WORK_NORMAL = 0, 17 IRQ_WORK_PROCESS_FAULT, 18 }; 19 20 struct idxd_resubmit { 21 struct work_struct work; 22 struct idxd_desc *desc; 23 }; 24 25 struct idxd_int_handle_revoke { 26 struct work_struct work; 27 struct idxd_device *idxd; 28 }; 29 30 static void idxd_device_reinit(struct work_struct *work) 31 { 32 struct idxd_device *idxd = container_of(work, struct idxd_device, work); 33 struct device *dev = &idxd->pdev->dev; 34 int rc, i; 35 36 idxd_device_reset(idxd); 37 rc = idxd_device_config(idxd); 38 if (rc < 0) 39 goto out; 40 41 rc = idxd_device_enable(idxd); 42 if (rc < 0) 43 goto out; 44 45 for (i = 0; i < idxd->max_wqs; i++) { 46 if (test_bit(i, idxd->wq_enable_map)) { 47 struct idxd_wq *wq = idxd->wqs[i]; 48 49 rc = idxd_wq_enable(wq); 50 if (rc < 0) { 51 clear_bit(i, idxd->wq_enable_map); 52 dev_warn(dev, "Unable to re-enable wq %s\n", 53 dev_name(wq_confdev(wq))); 54 } 55 } 56 } 57 58 return; 59 60 out: 61 idxd_device_clear_state(idxd); 62 } 63 64 /* 65 * The function sends a drain descriptor for the interrupt handle. The drain ensures 66 * all descriptors with this interrupt handle is flushed and the interrupt 67 * will allow the cleanup of the outstanding descriptors. 68 */ 69 static void idxd_int_handle_revoke_drain(struct idxd_irq_entry *ie) 70 { 71 struct idxd_wq *wq = ie_to_wq(ie); 72 struct idxd_device *idxd = wq->idxd; 73 struct device *dev = &idxd->pdev->dev; 74 struct dsa_hw_desc desc = {}; 75 void __iomem *portal; 76 int rc; 77 78 /* Issue a simple drain operation with interrupt but no completion record */ 79 desc.flags = IDXD_OP_FLAG_RCI; 80 desc.opcode = DSA_OPCODE_DRAIN; 81 desc.priv = 1; 82 83 if (ie->pasid != INVALID_IOASID) 84 desc.pasid = ie->pasid; 85 desc.int_handle = ie->int_handle; 86 portal = idxd_wq_portal_addr(wq); 87 88 /* 89 * The wmb() makes sure that the descriptor is all there before we 90 * issue. 91 */ 92 wmb(); 93 if (wq_dedicated(wq)) { 94 iosubmit_cmds512(portal, &desc, 1); 95 } else { 96 rc = idxd_enqcmds(wq, portal, &desc); 97 /* This should not fail unless hardware failed. */ 98 if (rc < 0) 99 dev_warn(dev, "Failed to submit drain desc on wq %d\n", wq->id); 100 } 101 } 102 103 static void idxd_abort_invalid_int_handle_descs(struct idxd_irq_entry *ie) 104 { 105 LIST_HEAD(flist); 106 struct idxd_desc *d, *t; 107 struct llist_node *head; 108 109 spin_lock(&ie->list_lock); 110 head = llist_del_all(&ie->pending_llist); 111 if (head) { 112 llist_for_each_entry_safe(d, t, head, llnode) 113 list_add_tail(&d->list, &ie->work_list); 114 } 115 116 list_for_each_entry_safe(d, t, &ie->work_list, list) { 117 if (d->completion->status == DSA_COMP_INT_HANDLE_INVAL) 118 list_move_tail(&d->list, &flist); 119 } 120 spin_unlock(&ie->list_lock); 121 122 list_for_each_entry_safe(d, t, &flist, list) { 123 list_del(&d->list); 124 idxd_dma_complete_txd(d, IDXD_COMPLETE_ABORT, true); 125 } 126 } 127 128 static void idxd_int_handle_revoke(struct work_struct *work) 129 { 130 struct idxd_int_handle_revoke *revoke = 131 container_of(work, struct idxd_int_handle_revoke, work); 132 struct idxd_device *idxd = revoke->idxd; 133 struct pci_dev *pdev = idxd->pdev; 134 struct device *dev = &pdev->dev; 135 int i, new_handle, rc; 136 137 if (!idxd->request_int_handles) { 138 kfree(revoke); 139 dev_warn(dev, "Unexpected int handle refresh interrupt.\n"); 140 return; 141 } 142 143 /* 144 * The loop attempts to acquire new interrupt handle for all interrupt 145 * vectors that supports a handle. If a new interrupt handle is acquired and the 146 * wq is kernel type, the driver will kill the percpu_ref to pause all 147 * ongoing descriptor submissions. The interrupt handle is then changed. 148 * After change, the percpu_ref is revived and all the pending submissions 149 * are woken to try again. A drain is sent to for the interrupt handle 150 * at the end to make sure all invalid int handle descriptors are processed. 151 */ 152 for (i = 1; i < idxd->irq_cnt; i++) { 153 struct idxd_irq_entry *ie = idxd_get_ie(idxd, i); 154 struct idxd_wq *wq = ie_to_wq(ie); 155 156 if (ie->int_handle == INVALID_INT_HANDLE) 157 continue; 158 159 rc = idxd_device_request_int_handle(idxd, i, &new_handle, IDXD_IRQ_MSIX); 160 if (rc < 0) { 161 dev_warn(dev, "get int handle %d failed: %d\n", i, rc); 162 /* 163 * Failed to acquire new interrupt handle. Kill the WQ 164 * and release all the pending submitters. The submitters will 165 * get error return code and handle appropriately. 166 */ 167 ie->int_handle = INVALID_INT_HANDLE; 168 idxd_wq_quiesce(wq); 169 idxd_abort_invalid_int_handle_descs(ie); 170 continue; 171 } 172 173 /* No change in interrupt handle, nothing needs to be done */ 174 if (ie->int_handle == new_handle) 175 continue; 176 177 if (wq->state != IDXD_WQ_ENABLED || wq->type != IDXD_WQT_KERNEL) { 178 /* 179 * All the MSIX interrupts are allocated at once during probe. 180 * Therefore we need to update all interrupts even if the WQ 181 * isn't supporting interrupt operations. 182 */ 183 ie->int_handle = new_handle; 184 continue; 185 } 186 187 mutex_lock(&wq->wq_lock); 188 reinit_completion(&wq->wq_resurrect); 189 190 /* Kill percpu_ref to pause additional descriptor submissions */ 191 percpu_ref_kill(&wq->wq_active); 192 193 /* Wait for all submitters quiesce before we change interrupt handle */ 194 wait_for_completion(&wq->wq_dead); 195 196 ie->int_handle = new_handle; 197 198 /* Revive percpu ref and wake up all the waiting submitters */ 199 percpu_ref_reinit(&wq->wq_active); 200 complete_all(&wq->wq_resurrect); 201 mutex_unlock(&wq->wq_lock); 202 203 /* 204 * The delay here is to wait for all possible MOVDIR64B that 205 * are issued before percpu_ref_kill() has happened to have 206 * reached the PCIe domain before the drain is issued. The driver 207 * needs to ensure that the drain descriptor issued does not pass 208 * all the other issued descriptors that contain the invalid 209 * interrupt handle in order to ensure that the drain descriptor 210 * interrupt will allow the cleanup of all the descriptors with 211 * invalid interrupt handle. 212 */ 213 if (wq_dedicated(wq)) 214 udelay(100); 215 idxd_int_handle_revoke_drain(ie); 216 } 217 kfree(revoke); 218 } 219 220 static void process_evl_entry(struct idxd_device *idxd, struct __evl_entry *entry_head) 221 { 222 struct device *dev = &idxd->pdev->dev; 223 u8 status; 224 225 status = DSA_COMP_STATUS(entry_head->error); 226 dev_warn_ratelimited(dev, "Device error %#x operation: %#x fault addr: %#llx\n", 227 status, entry_head->operation, entry_head->fault_addr); 228 } 229 230 static void process_evl_entries(struct idxd_device *idxd) 231 { 232 union evl_status_reg evl_status; 233 unsigned int h, t; 234 struct idxd_evl *evl = idxd->evl; 235 struct __evl_entry *entry_head; 236 unsigned int ent_size = evl_ent_size(idxd); 237 u32 size; 238 239 evl_status.bits = 0; 240 evl_status.int_pending = 1; 241 242 spin_lock(&evl->lock); 243 /* Clear interrupt pending bit */ 244 iowrite32(evl_status.bits_upper32, 245 idxd->reg_base + IDXD_EVLSTATUS_OFFSET + sizeof(u32)); 246 h = evl->head; 247 evl_status.bits = ioread64(idxd->reg_base + IDXD_EVLSTATUS_OFFSET); 248 t = evl_status.tail; 249 size = idxd->evl->size; 250 251 while (h != t) { 252 entry_head = (struct __evl_entry *)(evl->log + (h * ent_size)); 253 process_evl_entry(idxd, entry_head); 254 h = (h + 1) % size; 255 } 256 257 evl->head = h; 258 evl_status.head = h; 259 iowrite32(evl_status.bits_lower32, idxd->reg_base + IDXD_EVLSTATUS_OFFSET); 260 spin_unlock(&evl->lock); 261 } 262 263 irqreturn_t idxd_misc_thread(int vec, void *data) 264 { 265 struct idxd_irq_entry *irq_entry = data; 266 struct idxd_device *idxd = ie_to_idxd(irq_entry); 267 struct device *dev = &idxd->pdev->dev; 268 union gensts_reg gensts; 269 u32 val = 0; 270 int i; 271 bool err = false; 272 u32 cause; 273 274 cause = ioread32(idxd->reg_base + IDXD_INTCAUSE_OFFSET); 275 if (!cause) 276 return IRQ_NONE; 277 278 iowrite32(cause, idxd->reg_base + IDXD_INTCAUSE_OFFSET); 279 280 if (cause & IDXD_INTC_HALT_STATE) 281 goto halt; 282 283 if (cause & IDXD_INTC_ERR) { 284 spin_lock(&idxd->dev_lock); 285 for (i = 0; i < 4; i++) 286 idxd->sw_err.bits[i] = ioread64(idxd->reg_base + 287 IDXD_SWERR_OFFSET + i * sizeof(u64)); 288 289 iowrite64(idxd->sw_err.bits[0] & IDXD_SWERR_ACK, 290 idxd->reg_base + IDXD_SWERR_OFFSET); 291 292 if (idxd->sw_err.valid && idxd->sw_err.wq_idx_valid) { 293 int id = idxd->sw_err.wq_idx; 294 struct idxd_wq *wq = idxd->wqs[id]; 295 296 if (wq->type == IDXD_WQT_USER) 297 wake_up_interruptible(&wq->err_queue); 298 } else { 299 int i; 300 301 for (i = 0; i < idxd->max_wqs; i++) { 302 struct idxd_wq *wq = idxd->wqs[i]; 303 304 if (wq->type == IDXD_WQT_USER) 305 wake_up_interruptible(&wq->err_queue); 306 } 307 } 308 309 spin_unlock(&idxd->dev_lock); 310 val |= IDXD_INTC_ERR; 311 312 for (i = 0; i < 4; i++) 313 dev_warn(dev, "err[%d]: %#16.16llx\n", 314 i, idxd->sw_err.bits[i]); 315 err = true; 316 } 317 318 if (cause & IDXD_INTC_INT_HANDLE_REVOKED) { 319 struct idxd_int_handle_revoke *revoke; 320 321 val |= IDXD_INTC_INT_HANDLE_REVOKED; 322 323 revoke = kzalloc(sizeof(*revoke), GFP_ATOMIC); 324 if (revoke) { 325 revoke->idxd = idxd; 326 INIT_WORK(&revoke->work, idxd_int_handle_revoke); 327 queue_work(idxd->wq, &revoke->work); 328 329 } else { 330 dev_err(dev, "Failed to allocate work for int handle revoke\n"); 331 idxd_wqs_quiesce(idxd); 332 } 333 } 334 335 if (cause & IDXD_INTC_CMD) { 336 val |= IDXD_INTC_CMD; 337 complete(idxd->cmd_done); 338 } 339 340 if (cause & IDXD_INTC_OCCUPY) { 341 /* Driver does not utilize occupancy interrupt */ 342 val |= IDXD_INTC_OCCUPY; 343 } 344 345 if (cause & IDXD_INTC_PERFMON_OVFL) { 346 val |= IDXD_INTC_PERFMON_OVFL; 347 perfmon_counter_overflow(idxd); 348 } 349 350 if (cause & IDXD_INTC_EVL) { 351 val |= IDXD_INTC_EVL; 352 process_evl_entries(idxd); 353 } 354 355 val ^= cause; 356 if (val) 357 dev_warn_once(dev, "Unexpected interrupt cause bits set: %#x\n", 358 val); 359 360 if (!err) 361 goto out; 362 363 halt: 364 gensts.bits = ioread32(idxd->reg_base + IDXD_GENSTATS_OFFSET); 365 if (gensts.state == IDXD_DEVICE_STATE_HALT) { 366 idxd->state = IDXD_DEV_HALTED; 367 if (gensts.reset_type == IDXD_DEVICE_RESET_SOFTWARE) { 368 /* 369 * If we need a software reset, we will throw the work 370 * on a system workqueue in order to allow interrupts 371 * for the device command completions. 372 */ 373 INIT_WORK(&idxd->work, idxd_device_reinit); 374 queue_work(idxd->wq, &idxd->work); 375 } else { 376 idxd->state = IDXD_DEV_HALTED; 377 idxd_wqs_quiesce(idxd); 378 idxd_wqs_unmap_portal(idxd); 379 idxd_device_clear_state(idxd); 380 dev_err(&idxd->pdev->dev, 381 "idxd halted, need %s.\n", 382 gensts.reset_type == IDXD_DEVICE_RESET_FLR ? 383 "FLR" : "system reset"); 384 } 385 } 386 387 out: 388 return IRQ_HANDLED; 389 } 390 391 static void idxd_int_handle_resubmit_work(struct work_struct *work) 392 { 393 struct idxd_resubmit *irw = container_of(work, struct idxd_resubmit, work); 394 struct idxd_desc *desc = irw->desc; 395 struct idxd_wq *wq = desc->wq; 396 int rc; 397 398 desc->completion->status = 0; 399 rc = idxd_submit_desc(wq, desc); 400 if (rc < 0) { 401 dev_dbg(&wq->idxd->pdev->dev, "Failed to resubmit desc %d to wq %d.\n", 402 desc->id, wq->id); 403 /* 404 * If the error is not -EAGAIN, it means the submission failed due to wq 405 * has been killed instead of ENQCMDS failure. Here the driver needs to 406 * notify the submitter of the failure by reporting abort status. 407 * 408 * -EAGAIN comes from ENQCMDS failure. idxd_submit_desc() will handle the 409 * abort. 410 */ 411 if (rc != -EAGAIN) { 412 desc->completion->status = IDXD_COMP_DESC_ABORT; 413 idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT, false); 414 } 415 idxd_free_desc(wq, desc); 416 } 417 kfree(irw); 418 } 419 420 bool idxd_queue_int_handle_resubmit(struct idxd_desc *desc) 421 { 422 struct idxd_wq *wq = desc->wq; 423 struct idxd_device *idxd = wq->idxd; 424 struct idxd_resubmit *irw; 425 426 irw = kzalloc(sizeof(*irw), GFP_KERNEL); 427 if (!irw) 428 return false; 429 430 irw->desc = desc; 431 INIT_WORK(&irw->work, idxd_int_handle_resubmit_work); 432 queue_work(idxd->wq, &irw->work); 433 return true; 434 } 435 436 static void irq_process_pending_llist(struct idxd_irq_entry *irq_entry) 437 { 438 struct idxd_desc *desc, *t; 439 struct llist_node *head; 440 441 head = llist_del_all(&irq_entry->pending_llist); 442 if (!head) 443 return; 444 445 llist_for_each_entry_safe(desc, t, head, llnode) { 446 u8 status = desc->completion->status & DSA_COMP_STATUS_MASK; 447 448 if (status) { 449 /* 450 * Check against the original status as ABORT is software defined 451 * and 0xff, which DSA_COMP_STATUS_MASK can mask out. 452 */ 453 if (unlikely(desc->completion->status == IDXD_COMP_DESC_ABORT)) { 454 idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT, true); 455 continue; 456 } 457 458 idxd_dma_complete_txd(desc, IDXD_COMPLETE_NORMAL, true); 459 } else { 460 spin_lock(&irq_entry->list_lock); 461 list_add_tail(&desc->list, 462 &irq_entry->work_list); 463 spin_unlock(&irq_entry->list_lock); 464 } 465 } 466 } 467 468 static void irq_process_work_list(struct idxd_irq_entry *irq_entry) 469 { 470 LIST_HEAD(flist); 471 struct idxd_desc *desc, *n; 472 473 /* 474 * This lock protects list corruption from access of list outside of the irq handler 475 * thread. 476 */ 477 spin_lock(&irq_entry->list_lock); 478 if (list_empty(&irq_entry->work_list)) { 479 spin_unlock(&irq_entry->list_lock); 480 return; 481 } 482 483 list_for_each_entry_safe(desc, n, &irq_entry->work_list, list) { 484 if (desc->completion->status) { 485 list_move_tail(&desc->list, &flist); 486 } 487 } 488 489 spin_unlock(&irq_entry->list_lock); 490 491 list_for_each_entry(desc, &flist, list) { 492 /* 493 * Check against the original status as ABORT is software defined 494 * and 0xff, which DSA_COMP_STATUS_MASK can mask out. 495 */ 496 if (unlikely(desc->completion->status == IDXD_COMP_DESC_ABORT)) { 497 idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT, true); 498 continue; 499 } 500 501 idxd_dma_complete_txd(desc, IDXD_COMPLETE_NORMAL, true); 502 } 503 } 504 505 irqreturn_t idxd_wq_thread(int irq, void *data) 506 { 507 struct idxd_irq_entry *irq_entry = data; 508 509 /* 510 * There are two lists we are processing. The pending_llist is where 511 * submmiter adds all the submitted descriptor after sending it to 512 * the workqueue. It's a lockless singly linked list. The work_list 513 * is the common linux double linked list. We are in a scenario of 514 * multiple producers and a single consumer. The producers are all 515 * the kernel submitters of descriptors, and the consumer is the 516 * kernel irq handler thread for the msix vector when using threaded 517 * irq. To work with the restrictions of llist to remain lockless, 518 * we are doing the following steps: 519 * 1. Iterate through the work_list and process any completed 520 * descriptor. Delete the completed entries during iteration. 521 * 2. llist_del_all() from the pending list. 522 * 3. Iterate through the llist that was deleted from the pending list 523 * and process the completed entries. 524 * 4. If the entry is still waiting on hardware, list_add_tail() to 525 * the work_list. 526 */ 527 irq_process_work_list(irq_entry); 528 irq_process_pending_llist(irq_entry); 529 530 return IRQ_HANDLED; 531 } 532