1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright(c) 2019 Intel Corporation. All rights rsvd. */ 3 #include <linux/init.h> 4 #include <linux/kernel.h> 5 #include <linux/module.h> 6 #include <linux/pci.h> 7 #include <linux/io-64-nonatomic-lo-hi.h> 8 #include <linux/dmaengine.h> 9 #include <linux/delay.h> 10 #include <uapi/linux/idxd.h> 11 #include "../dmaengine.h" 12 #include "idxd.h" 13 #include "registers.h" 14 15 enum irq_work_type { 16 IRQ_WORK_NORMAL = 0, 17 IRQ_WORK_PROCESS_FAULT, 18 }; 19 20 struct idxd_fault { 21 struct work_struct work; 22 u64 addr; 23 struct idxd_device *idxd; 24 }; 25 26 struct idxd_resubmit { 27 struct work_struct work; 28 struct idxd_desc *desc; 29 }; 30 31 struct idxd_int_handle_revoke { 32 struct work_struct work; 33 struct idxd_device *idxd; 34 }; 35 36 static void idxd_device_reinit(struct work_struct *work) 37 { 38 struct idxd_device *idxd = container_of(work, struct idxd_device, work); 39 struct device *dev = &idxd->pdev->dev; 40 int rc, i; 41 42 idxd_device_reset(idxd); 43 rc = idxd_device_config(idxd); 44 if (rc < 0) 45 goto out; 46 47 rc = idxd_device_enable(idxd); 48 if (rc < 0) 49 goto out; 50 51 for (i = 0; i < idxd->max_wqs; i++) { 52 struct idxd_wq *wq = idxd->wqs[i]; 53 54 if (wq->state == IDXD_WQ_ENABLED) { 55 rc = idxd_wq_enable(wq); 56 if (rc < 0) { 57 dev_warn(dev, "Unable to re-enable wq %s\n", 58 dev_name(wq_confdev(wq))); 59 } 60 } 61 } 62 63 return; 64 65 out: 66 idxd_device_clear_state(idxd); 67 } 68 69 /* 70 * The function sends a drain descriptor for the interrupt handle. The drain ensures 71 * all descriptors with this interrupt handle is flushed and the interrupt 72 * will allow the cleanup of the outstanding descriptors. 73 */ 74 static void idxd_int_handle_revoke_drain(struct idxd_irq_entry *ie) 75 { 76 struct idxd_wq *wq = ie_to_wq(ie); 77 struct idxd_device *idxd = wq->idxd; 78 struct device *dev = &idxd->pdev->dev; 79 struct dsa_hw_desc desc = {}; 80 void __iomem *portal; 81 int rc; 82 83 /* Issue a simple drain operation with interrupt but no completion record */ 84 desc.flags = IDXD_OP_FLAG_RCI; 85 desc.opcode = DSA_OPCODE_DRAIN; 86 desc.priv = 1; 87 88 if (ie->pasid != INVALID_IOASID) 89 desc.pasid = ie->pasid; 90 desc.int_handle = ie->int_handle; 91 portal = idxd_wq_portal_addr(wq); 92 93 /* 94 * The wmb() makes sure that the descriptor is all there before we 95 * issue. 96 */ 97 wmb(); 98 if (wq_dedicated(wq)) { 99 iosubmit_cmds512(portal, &desc, 1); 100 } else { 101 rc = idxd_enqcmds(wq, portal, &desc); 102 /* This should not fail unless hardware failed. */ 103 if (rc < 0) 104 dev_warn(dev, "Failed to submit drain desc on wq %d\n", wq->id); 105 } 106 } 107 108 static void idxd_abort_invalid_int_handle_descs(struct idxd_irq_entry *ie) 109 { 110 LIST_HEAD(flist); 111 struct idxd_desc *d, *t; 112 struct llist_node *head; 113 114 spin_lock(&ie->list_lock); 115 head = llist_del_all(&ie->pending_llist); 116 if (head) { 117 llist_for_each_entry_safe(d, t, head, llnode) 118 list_add_tail(&d->list, &ie->work_list); 119 } 120 121 list_for_each_entry_safe(d, t, &ie->work_list, list) { 122 if (d->completion->status == DSA_COMP_INT_HANDLE_INVAL) 123 list_move_tail(&d->list, &flist); 124 } 125 spin_unlock(&ie->list_lock); 126 127 list_for_each_entry_safe(d, t, &flist, list) { 128 list_del(&d->list); 129 idxd_dma_complete_txd(d, IDXD_COMPLETE_ABORT, true); 130 } 131 } 132 133 static void idxd_int_handle_revoke(struct work_struct *work) 134 { 135 struct idxd_int_handle_revoke *revoke = 136 container_of(work, struct idxd_int_handle_revoke, work); 137 struct idxd_device *idxd = revoke->idxd; 138 struct pci_dev *pdev = idxd->pdev; 139 struct device *dev = &pdev->dev; 140 int i, new_handle, rc; 141 142 if (!idxd->request_int_handles) { 143 kfree(revoke); 144 dev_warn(dev, "Unexpected int handle refresh interrupt.\n"); 145 return; 146 } 147 148 /* 149 * The loop attempts to acquire new interrupt handle for all interrupt 150 * vectors that supports a handle. If a new interrupt handle is acquired and the 151 * wq is kernel type, the driver will kill the percpu_ref to pause all 152 * ongoing descriptor submissions. The interrupt handle is then changed. 153 * After change, the percpu_ref is revived and all the pending submissions 154 * are woken to try again. A drain is sent to for the interrupt handle 155 * at the end to make sure all invalid int handle descriptors are processed. 156 */ 157 for (i = 1; i < idxd->irq_cnt; i++) { 158 struct idxd_irq_entry *ie = idxd_get_ie(idxd, i); 159 struct idxd_wq *wq = ie_to_wq(ie); 160 161 if (ie->int_handle == INVALID_INT_HANDLE) 162 continue; 163 164 rc = idxd_device_request_int_handle(idxd, i, &new_handle, IDXD_IRQ_MSIX); 165 if (rc < 0) { 166 dev_warn(dev, "get int handle %d failed: %d\n", i, rc); 167 /* 168 * Failed to acquire new interrupt handle. Kill the WQ 169 * and release all the pending submitters. The submitters will 170 * get error return code and handle appropriately. 171 */ 172 ie->int_handle = INVALID_INT_HANDLE; 173 idxd_wq_quiesce(wq); 174 idxd_abort_invalid_int_handle_descs(ie); 175 continue; 176 } 177 178 /* No change in interrupt handle, nothing needs to be done */ 179 if (ie->int_handle == new_handle) 180 continue; 181 182 if (wq->state != IDXD_WQ_ENABLED || wq->type != IDXD_WQT_KERNEL) { 183 /* 184 * All the MSIX interrupts are allocated at once during probe. 185 * Therefore we need to update all interrupts even if the WQ 186 * isn't supporting interrupt operations. 187 */ 188 ie->int_handle = new_handle; 189 continue; 190 } 191 192 mutex_lock(&wq->wq_lock); 193 reinit_completion(&wq->wq_resurrect); 194 195 /* Kill percpu_ref to pause additional descriptor submissions */ 196 percpu_ref_kill(&wq->wq_active); 197 198 /* Wait for all submitters quiesce before we change interrupt handle */ 199 wait_for_completion(&wq->wq_dead); 200 201 ie->int_handle = new_handle; 202 203 /* Revive percpu ref and wake up all the waiting submitters */ 204 percpu_ref_reinit(&wq->wq_active); 205 complete_all(&wq->wq_resurrect); 206 mutex_unlock(&wq->wq_lock); 207 208 /* 209 * The delay here is to wait for all possible MOVDIR64B that 210 * are issued before percpu_ref_kill() has happened to have 211 * reached the PCIe domain before the drain is issued. The driver 212 * needs to ensure that the drain descriptor issued does not pass 213 * all the other issued descriptors that contain the invalid 214 * interrupt handle in order to ensure that the drain descriptor 215 * interrupt will allow the cleanup of all the descriptors with 216 * invalid interrupt handle. 217 */ 218 if (wq_dedicated(wq)) 219 udelay(100); 220 idxd_int_handle_revoke_drain(ie); 221 } 222 kfree(revoke); 223 } 224 225 static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) 226 { 227 struct device *dev = &idxd->pdev->dev; 228 union gensts_reg gensts; 229 u32 val = 0; 230 int i; 231 bool err = false; 232 233 if (cause & IDXD_INTC_HALT_STATE) 234 goto halt; 235 236 if (cause & IDXD_INTC_ERR) { 237 spin_lock(&idxd->dev_lock); 238 for (i = 0; i < 4; i++) 239 idxd->sw_err.bits[i] = ioread64(idxd->reg_base + 240 IDXD_SWERR_OFFSET + i * sizeof(u64)); 241 242 iowrite64(idxd->sw_err.bits[0] & IDXD_SWERR_ACK, 243 idxd->reg_base + IDXD_SWERR_OFFSET); 244 245 if (idxd->sw_err.valid && idxd->sw_err.wq_idx_valid) { 246 int id = idxd->sw_err.wq_idx; 247 struct idxd_wq *wq = idxd->wqs[id]; 248 249 if (wq->type == IDXD_WQT_USER) 250 wake_up_interruptible(&wq->err_queue); 251 } else { 252 int i; 253 254 for (i = 0; i < idxd->max_wqs; i++) { 255 struct idxd_wq *wq = idxd->wqs[i]; 256 257 if (wq->type == IDXD_WQT_USER) 258 wake_up_interruptible(&wq->err_queue); 259 } 260 } 261 262 spin_unlock(&idxd->dev_lock); 263 val |= IDXD_INTC_ERR; 264 265 for (i = 0; i < 4; i++) 266 dev_warn(dev, "err[%d]: %#16.16llx\n", 267 i, idxd->sw_err.bits[i]); 268 err = true; 269 } 270 271 if (cause & IDXD_INTC_INT_HANDLE_REVOKED) { 272 struct idxd_int_handle_revoke *revoke; 273 274 val |= IDXD_INTC_INT_HANDLE_REVOKED; 275 276 revoke = kzalloc(sizeof(*revoke), GFP_ATOMIC); 277 if (revoke) { 278 revoke->idxd = idxd; 279 INIT_WORK(&revoke->work, idxd_int_handle_revoke); 280 queue_work(idxd->wq, &revoke->work); 281 282 } else { 283 dev_err(dev, "Failed to allocate work for int handle revoke\n"); 284 idxd_wqs_quiesce(idxd); 285 } 286 } 287 288 if (cause & IDXD_INTC_CMD) { 289 val |= IDXD_INTC_CMD; 290 complete(idxd->cmd_done); 291 } 292 293 if (cause & IDXD_INTC_OCCUPY) { 294 /* Driver does not utilize occupancy interrupt */ 295 val |= IDXD_INTC_OCCUPY; 296 } 297 298 if (cause & IDXD_INTC_PERFMON_OVFL) { 299 val |= IDXD_INTC_PERFMON_OVFL; 300 perfmon_counter_overflow(idxd); 301 } 302 303 val ^= cause; 304 if (val) 305 dev_warn_once(dev, "Unexpected interrupt cause bits set: %#x\n", 306 val); 307 308 if (!err) 309 return 0; 310 311 halt: 312 gensts.bits = ioread32(idxd->reg_base + IDXD_GENSTATS_OFFSET); 313 if (gensts.state == IDXD_DEVICE_STATE_HALT) { 314 idxd->state = IDXD_DEV_HALTED; 315 if (gensts.reset_type == IDXD_DEVICE_RESET_SOFTWARE) { 316 /* 317 * If we need a software reset, we will throw the work 318 * on a system workqueue in order to allow interrupts 319 * for the device command completions. 320 */ 321 INIT_WORK(&idxd->work, idxd_device_reinit); 322 queue_work(idxd->wq, &idxd->work); 323 } else { 324 idxd->state = IDXD_DEV_HALTED; 325 idxd_wqs_quiesce(idxd); 326 idxd_wqs_unmap_portal(idxd); 327 idxd_device_clear_state(idxd); 328 dev_err(&idxd->pdev->dev, 329 "idxd halted, need %s.\n", 330 gensts.reset_type == IDXD_DEVICE_RESET_FLR ? 331 "FLR" : "system reset"); 332 return -ENXIO; 333 } 334 } 335 336 return 0; 337 } 338 339 irqreturn_t idxd_misc_thread(int vec, void *data) 340 { 341 struct idxd_irq_entry *irq_entry = data; 342 struct idxd_device *idxd = ie_to_idxd(irq_entry); 343 int rc; 344 u32 cause; 345 346 cause = ioread32(idxd->reg_base + IDXD_INTCAUSE_OFFSET); 347 if (cause) 348 iowrite32(cause, idxd->reg_base + IDXD_INTCAUSE_OFFSET); 349 350 while (cause) { 351 rc = process_misc_interrupts(idxd, cause); 352 if (rc < 0) 353 break; 354 cause = ioread32(idxd->reg_base + IDXD_INTCAUSE_OFFSET); 355 if (cause) 356 iowrite32(cause, idxd->reg_base + IDXD_INTCAUSE_OFFSET); 357 } 358 359 return IRQ_HANDLED; 360 } 361 362 static void idxd_int_handle_resubmit_work(struct work_struct *work) 363 { 364 struct idxd_resubmit *irw = container_of(work, struct idxd_resubmit, work); 365 struct idxd_desc *desc = irw->desc; 366 struct idxd_wq *wq = desc->wq; 367 int rc; 368 369 desc->completion->status = 0; 370 rc = idxd_submit_desc(wq, desc); 371 if (rc < 0) { 372 dev_dbg(&wq->idxd->pdev->dev, "Failed to resubmit desc %d to wq %d.\n", 373 desc->id, wq->id); 374 /* 375 * If the error is not -EAGAIN, it means the submission failed due to wq 376 * has been killed instead of ENQCMDS failure. Here the driver needs to 377 * notify the submitter of the failure by reporting abort status. 378 * 379 * -EAGAIN comes from ENQCMDS failure. idxd_submit_desc() will handle the 380 * abort. 381 */ 382 if (rc != -EAGAIN) { 383 desc->completion->status = IDXD_COMP_DESC_ABORT; 384 idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT, false); 385 } 386 idxd_free_desc(wq, desc); 387 } 388 kfree(irw); 389 } 390 391 bool idxd_queue_int_handle_resubmit(struct idxd_desc *desc) 392 { 393 struct idxd_wq *wq = desc->wq; 394 struct idxd_device *idxd = wq->idxd; 395 struct idxd_resubmit *irw; 396 397 irw = kzalloc(sizeof(*irw), GFP_KERNEL); 398 if (!irw) 399 return false; 400 401 irw->desc = desc; 402 INIT_WORK(&irw->work, idxd_int_handle_resubmit_work); 403 queue_work(idxd->wq, &irw->work); 404 return true; 405 } 406 407 static void irq_process_pending_llist(struct idxd_irq_entry *irq_entry) 408 { 409 struct idxd_desc *desc, *t; 410 struct llist_node *head; 411 412 head = llist_del_all(&irq_entry->pending_llist); 413 if (!head) 414 return; 415 416 llist_for_each_entry_safe(desc, t, head, llnode) { 417 u8 status = desc->completion->status & DSA_COMP_STATUS_MASK; 418 419 if (status) { 420 /* 421 * Check against the original status as ABORT is software defined 422 * and 0xff, which DSA_COMP_STATUS_MASK can mask out. 423 */ 424 if (unlikely(desc->completion->status == IDXD_COMP_DESC_ABORT)) { 425 idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT, true); 426 continue; 427 } 428 429 idxd_dma_complete_txd(desc, IDXD_COMPLETE_NORMAL, true); 430 } else { 431 spin_lock(&irq_entry->list_lock); 432 list_add_tail(&desc->list, 433 &irq_entry->work_list); 434 spin_unlock(&irq_entry->list_lock); 435 } 436 } 437 } 438 439 static void irq_process_work_list(struct idxd_irq_entry *irq_entry) 440 { 441 LIST_HEAD(flist); 442 struct idxd_desc *desc, *n; 443 444 /* 445 * This lock protects list corruption from access of list outside of the irq handler 446 * thread. 447 */ 448 spin_lock(&irq_entry->list_lock); 449 if (list_empty(&irq_entry->work_list)) { 450 spin_unlock(&irq_entry->list_lock); 451 return; 452 } 453 454 list_for_each_entry_safe(desc, n, &irq_entry->work_list, list) { 455 if (desc->completion->status) { 456 list_move_tail(&desc->list, &flist); 457 } 458 } 459 460 spin_unlock(&irq_entry->list_lock); 461 462 list_for_each_entry(desc, &flist, list) { 463 /* 464 * Check against the original status as ABORT is software defined 465 * and 0xff, which DSA_COMP_STATUS_MASK can mask out. 466 */ 467 if (unlikely(desc->completion->status == IDXD_COMP_DESC_ABORT)) { 468 idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT, true); 469 continue; 470 } 471 472 idxd_dma_complete_txd(desc, IDXD_COMPLETE_NORMAL, true); 473 } 474 } 475 476 irqreturn_t idxd_wq_thread(int irq, void *data) 477 { 478 struct idxd_irq_entry *irq_entry = data; 479 480 /* 481 * There are two lists we are processing. The pending_llist is where 482 * submmiter adds all the submitted descriptor after sending it to 483 * the workqueue. It's a lockless singly linked list. The work_list 484 * is the common linux double linked list. We are in a scenario of 485 * multiple producers and a single consumer. The producers are all 486 * the kernel submitters of descriptors, and the consumer is the 487 * kernel irq handler thread for the msix vector when using threaded 488 * irq. To work with the restrictions of llist to remain lockless, 489 * we are doing the following steps: 490 * 1. Iterate through the work_list and process any completed 491 * descriptor. Delete the completed entries during iteration. 492 * 2. llist_del_all() from the pending list. 493 * 3. Iterate through the llist that was deleted from the pending list 494 * and process the completed entries. 495 * 4. If the entry is still waiting on hardware, list_add_tail() to 496 * the work_list. 497 */ 498 irq_process_work_list(irq_entry); 499 irq_process_pending_llist(irq_entry); 500 501 return IRQ_HANDLED; 502 } 503