1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright 2014 IBM Corp. 4 */ 5 6 #include <linux/interrupt.h> 7 #include <linux/irqdomain.h> 8 #include <linux/workqueue.h> 9 #include <linux/sched.h> 10 #include <linux/wait.h> 11 #include <linux/slab.h> 12 #include <linux/pid.h> 13 #include <asm/cputable.h> 14 #include <misc/cxl-base.h> 15 16 #include "cxl.h" 17 #include "trace.h" 18 19 static int afu_irq_range_start(void) 20 { 21 if (cpu_has_feature(CPU_FTR_HVMODE)) 22 return 1; 23 return 0; 24 } 25 26 static irqreturn_t schedule_cxl_fault(struct cxl_context *ctx, u64 dsisr, u64 dar) 27 { 28 ctx->dsisr = dsisr; 29 ctx->dar = dar; 30 schedule_work(&ctx->fault_work); 31 return IRQ_HANDLED; 32 } 33 34 irqreturn_t cxl_irq_psl9(int irq, struct cxl_context *ctx, struct cxl_irq_info *irq_info) 35 { 36 u64 dsisr, dar; 37 38 dsisr = irq_info->dsisr; 39 dar = irq_info->dar; 40 41 trace_cxl_psl9_irq(ctx, irq, dsisr, dar); 42 43 pr_devel("CXL interrupt %i for afu pe: %i DSISR: %#llx DAR: %#llx\n", irq, ctx->pe, dsisr, dar); 44 45 if (dsisr & CXL_PSL9_DSISR_An_TF) { 46 pr_devel("CXL interrupt: Scheduling translation fault handling for later (pe: %i)\n", ctx->pe); 47 return schedule_cxl_fault(ctx, dsisr, dar); 48 } 49 50 if (dsisr & CXL_PSL9_DSISR_An_PE) 51 return cxl_ops->handle_psl_slice_error(ctx, dsisr, 52 irq_info->errstat); 53 if (dsisr & CXL_PSL9_DSISR_An_AE) { 54 pr_devel("CXL interrupt: AFU Error 0x%016llx\n", irq_info->afu_err); 55 56 if (ctx->pending_afu_err) { 57 /* 58 * This shouldn't happen - the PSL treats these errors 59 * as fatal and will have reset the AFU, so there's not 60 * much point buffering multiple AFU errors. 61 * OTOH if we DO ever see a storm of these come in it's 62 * probably best that we log them somewhere: 63 */ 64 dev_err_ratelimited(&ctx->afu->dev, "CXL AFU Error undelivered to pe %i: 0x%016llx\n", 65 ctx->pe, irq_info->afu_err); 66 } else { 67 spin_lock(&ctx->lock); 68 ctx->afu_err = irq_info->afu_err; 69 ctx->pending_afu_err = 1; 70 spin_unlock(&ctx->lock); 71 72 wake_up_all(&ctx->wq); 73 } 74 75 cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_A, 0); 76 return IRQ_HANDLED; 77 } 78 if (dsisr & CXL_PSL9_DSISR_An_OC) 79 pr_devel("CXL interrupt: OS Context Warning\n"); 80 81 WARN(1, "Unhandled CXL PSL IRQ\n"); 82 return IRQ_HANDLED; 83 } 84 85 irqreturn_t cxl_irq_psl8(int irq, struct cxl_context *ctx, struct cxl_irq_info *irq_info) 86 { 87 u64 dsisr, dar; 88 89 dsisr = irq_info->dsisr; 90 dar = irq_info->dar; 91 92 trace_cxl_psl_irq(ctx, irq, dsisr, dar); 93 94 pr_devel("CXL interrupt %i for afu pe: %i DSISR: %#llx DAR: %#llx\n", irq, ctx->pe, dsisr, dar); 95 96 if (dsisr & CXL_PSL_DSISR_An_DS) { 97 /* 98 * We don't inherently need to sleep to handle this, but we do 99 * need to get a ref to the task's mm, which we can't do from 100 * irq context without the potential for a deadlock since it 101 * takes the task_lock. An alternate option would be to keep a 102 * reference to the task's mm the entire time it has cxl open, 103 * but to do that we need to solve the issue where we hold a 104 * ref to the mm, but the mm can hold a ref to the fd after an 105 * mmap preventing anything from being cleaned up. 106 */ 107 pr_devel("Scheduling segment miss handling for later pe: %i\n", ctx->pe); 108 return schedule_cxl_fault(ctx, dsisr, dar); 109 } 110 111 if (dsisr & CXL_PSL_DSISR_An_M) 112 pr_devel("CXL interrupt: PTE not found\n"); 113 if (dsisr & CXL_PSL_DSISR_An_P) 114 pr_devel("CXL interrupt: Storage protection violation\n"); 115 if (dsisr & CXL_PSL_DSISR_An_A) 116 pr_devel("CXL interrupt: AFU lock access to write through or cache inhibited storage\n"); 117 if (dsisr & CXL_PSL_DSISR_An_S) 118 pr_devel("CXL interrupt: Access was afu_wr or afu_zero\n"); 119 if (dsisr & CXL_PSL_DSISR_An_K) 120 pr_devel("CXL interrupt: Access not permitted by virtual page class key protection\n"); 121 122 if (dsisr & CXL_PSL_DSISR_An_DM) { 123 /* 124 * In some cases we might be able to handle the fault 125 * immediately if hash_page would succeed, but we still need 126 * the task's mm, which as above we can't get without a lock 127 */ 128 pr_devel("Scheduling page fault handling for later pe: %i\n", ctx->pe); 129 return schedule_cxl_fault(ctx, dsisr, dar); 130 } 131 if (dsisr & CXL_PSL_DSISR_An_ST) 132 WARN(1, "CXL interrupt: Segment Table PTE not found\n"); 133 if (dsisr & CXL_PSL_DSISR_An_UR) 134 pr_devel("CXL interrupt: AURP PTE not found\n"); 135 if (dsisr & CXL_PSL_DSISR_An_PE) 136 return cxl_ops->handle_psl_slice_error(ctx, dsisr, 137 irq_info->errstat); 138 if (dsisr & CXL_PSL_DSISR_An_AE) { 139 pr_devel("CXL interrupt: AFU Error 0x%016llx\n", irq_info->afu_err); 140 141 if (ctx->pending_afu_err) { 142 /* 143 * This shouldn't happen - the PSL treats these errors 144 * as fatal and will have reset the AFU, so there's not 145 * much point buffering multiple AFU errors. 146 * OTOH if we DO ever see a storm of these come in it's 147 * probably best that we log them somewhere: 148 */ 149 dev_err_ratelimited(&ctx->afu->dev, "CXL AFU Error " 150 "undelivered to pe %i: 0x%016llx\n", 151 ctx->pe, irq_info->afu_err); 152 } else { 153 spin_lock(&ctx->lock); 154 ctx->afu_err = irq_info->afu_err; 155 ctx->pending_afu_err = true; 156 spin_unlock(&ctx->lock); 157 158 wake_up_all(&ctx->wq); 159 } 160 161 cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_A, 0); 162 return IRQ_HANDLED; 163 } 164 if (dsisr & CXL_PSL_DSISR_An_OC) 165 pr_devel("CXL interrupt: OS Context Warning\n"); 166 167 WARN(1, "Unhandled CXL PSL IRQ\n"); 168 return IRQ_HANDLED; 169 } 170 171 static irqreturn_t cxl_irq_afu(int irq, void *data) 172 { 173 struct cxl_context *ctx = data; 174 irq_hw_number_t hwirq = irqd_to_hwirq(irq_get_irq_data(irq)); 175 int irq_off, afu_irq = 0; 176 __u16 range; 177 int r; 178 179 /* 180 * Look for the interrupt number. 181 * On bare-metal, we know range 0 only contains the PSL 182 * interrupt so we could start counting at range 1 and initialize 183 * afu_irq at 1. 184 * In a guest, range 0 also contains AFU interrupts, so it must 185 * be counted for. Therefore we initialize afu_irq at 0 to take into 186 * account the PSL interrupt. 187 * 188 * For code-readability, it just seems easier to go over all 189 * the ranges on bare-metal and guest. The end result is the same. 190 */ 191 for (r = 0; r < CXL_IRQ_RANGES; r++) { 192 irq_off = hwirq - ctx->irqs.offset[r]; 193 range = ctx->irqs.range[r]; 194 if (irq_off >= 0 && irq_off < range) { 195 afu_irq += irq_off; 196 break; 197 } 198 afu_irq += range; 199 } 200 if (unlikely(r >= CXL_IRQ_RANGES)) { 201 WARN(1, "Received AFU IRQ out of range for pe %i (virq %i hwirq %lx)\n", 202 ctx->pe, irq, hwirq); 203 return IRQ_HANDLED; 204 } 205 206 trace_cxl_afu_irq(ctx, afu_irq, irq, hwirq); 207 pr_devel("Received AFU interrupt %i for pe: %i (virq %i hwirq %lx)\n", 208 afu_irq, ctx->pe, irq, hwirq); 209 210 if (unlikely(!ctx->irq_bitmap)) { 211 WARN(1, "Received AFU IRQ for context with no IRQ bitmap\n"); 212 return IRQ_HANDLED; 213 } 214 spin_lock(&ctx->lock); 215 set_bit(afu_irq - 1, ctx->irq_bitmap); 216 ctx->pending_irq = true; 217 spin_unlock(&ctx->lock); 218 219 wake_up_all(&ctx->wq); 220 221 return IRQ_HANDLED; 222 } 223 224 unsigned int cxl_map_irq(struct cxl *adapter, irq_hw_number_t hwirq, 225 irq_handler_t handler, void *cookie, const char *name) 226 { 227 unsigned int virq; 228 int result; 229 230 /* IRQ Domain? */ 231 virq = irq_create_mapping(NULL, hwirq); 232 if (!virq) { 233 dev_warn(&adapter->dev, "cxl_map_irq: irq_create_mapping failed\n"); 234 return 0; 235 } 236 237 if (cxl_ops->setup_irq) 238 cxl_ops->setup_irq(adapter, hwirq, virq); 239 240 pr_devel("hwirq %#lx mapped to virq %u\n", hwirq, virq); 241 242 result = request_irq(virq, handler, 0, name, cookie); 243 if (result) { 244 dev_warn(&adapter->dev, "cxl_map_irq: request_irq failed: %i\n", result); 245 return 0; 246 } 247 248 return virq; 249 } 250 251 void cxl_unmap_irq(unsigned int virq, void *cookie) 252 { 253 free_irq(virq, cookie); 254 } 255 256 int cxl_register_one_irq(struct cxl *adapter, 257 irq_handler_t handler, 258 void *cookie, 259 irq_hw_number_t *dest_hwirq, 260 unsigned int *dest_virq, 261 const char *name) 262 { 263 int hwirq, virq; 264 265 if ((hwirq = cxl_ops->alloc_one_irq(adapter)) < 0) 266 return hwirq; 267 268 if (!(virq = cxl_map_irq(adapter, hwirq, handler, cookie, name))) 269 goto err; 270 271 *dest_hwirq = hwirq; 272 *dest_virq = virq; 273 274 return 0; 275 276 err: 277 cxl_ops->release_one_irq(adapter, hwirq); 278 return -ENOMEM; 279 } 280 281 void afu_irq_name_free(struct cxl_context *ctx) 282 { 283 struct cxl_irq_name *irq_name, *tmp; 284 285 list_for_each_entry_safe(irq_name, tmp, &ctx->irq_names, list) { 286 kfree(irq_name->name); 287 list_del(&irq_name->list); 288 kfree(irq_name); 289 } 290 } 291 292 int afu_allocate_irqs(struct cxl_context *ctx, u32 count) 293 { 294 int rc, r, i, j = 1; 295 struct cxl_irq_name *irq_name; 296 int alloc_count; 297 298 /* 299 * In native mode, range 0 is reserved for the multiplexed 300 * PSL interrupt. It has been allocated when the AFU was initialized. 301 * 302 * In a guest, the PSL interrupt is not mutliplexed, but per-context, 303 * and is the first interrupt from range 0. It still needs to be 304 * allocated, so bump the count by one. 305 */ 306 if (cpu_has_feature(CPU_FTR_HVMODE)) 307 alloc_count = count; 308 else 309 alloc_count = count + 1; 310 311 if ((rc = cxl_ops->alloc_irq_ranges(&ctx->irqs, ctx->afu->adapter, 312 alloc_count))) 313 return rc; 314 315 if (cpu_has_feature(CPU_FTR_HVMODE)) { 316 /* Multiplexed PSL Interrupt */ 317 ctx->irqs.offset[0] = ctx->afu->native->psl_hwirq; 318 ctx->irqs.range[0] = 1; 319 } 320 321 ctx->irq_count = count; 322 ctx->irq_bitmap = kcalloc(BITS_TO_LONGS(count), 323 sizeof(*ctx->irq_bitmap), GFP_KERNEL); 324 if (!ctx->irq_bitmap) 325 goto out; 326 327 /* 328 * Allocate names first. If any fail, bail out before allocating 329 * actual hardware IRQs. 330 */ 331 for (r = afu_irq_range_start(); r < CXL_IRQ_RANGES; r++) { 332 for (i = 0; i < ctx->irqs.range[r]; i++) { 333 irq_name = kmalloc(sizeof(struct cxl_irq_name), 334 GFP_KERNEL); 335 if (!irq_name) 336 goto out; 337 irq_name->name = kasprintf(GFP_KERNEL, "cxl-%s-pe%i-%i", 338 dev_name(&ctx->afu->dev), 339 ctx->pe, j); 340 if (!irq_name->name) { 341 kfree(irq_name); 342 goto out; 343 } 344 /* Add to tail so next look get the correct order */ 345 list_add_tail(&irq_name->list, &ctx->irq_names); 346 j++; 347 } 348 } 349 return 0; 350 351 out: 352 cxl_ops->release_irq_ranges(&ctx->irqs, ctx->afu->adapter); 353 afu_irq_name_free(ctx); 354 return -ENOMEM; 355 } 356 357 static void afu_register_hwirqs(struct cxl_context *ctx) 358 { 359 irq_hw_number_t hwirq; 360 struct cxl_irq_name *irq_name; 361 int r, i; 362 irqreturn_t (*handler)(int irq, void *data); 363 364 /* We've allocated all memory now, so let's do the irq allocations */ 365 irq_name = list_first_entry(&ctx->irq_names, struct cxl_irq_name, list); 366 for (r = afu_irq_range_start(); r < CXL_IRQ_RANGES; r++) { 367 hwirq = ctx->irqs.offset[r]; 368 for (i = 0; i < ctx->irqs.range[r]; hwirq++, i++) { 369 if (r == 0 && i == 0) 370 /* 371 * The very first interrupt of range 0 is 372 * always the PSL interrupt, but we only 373 * need to connect a handler for guests, 374 * because there's one PSL interrupt per 375 * context. 376 * On bare-metal, the PSL interrupt is 377 * multiplexed and was setup when the AFU 378 * was configured. 379 */ 380 handler = cxl_ops->psl_interrupt; 381 else 382 handler = cxl_irq_afu; 383 cxl_map_irq(ctx->afu->adapter, hwirq, handler, ctx, 384 irq_name->name); 385 irq_name = list_next_entry(irq_name, list); 386 } 387 } 388 } 389 390 int afu_register_irqs(struct cxl_context *ctx, u32 count) 391 { 392 int rc; 393 394 rc = afu_allocate_irqs(ctx, count); 395 if (rc) 396 return rc; 397 398 afu_register_hwirqs(ctx); 399 return 0; 400 } 401 402 void afu_release_irqs(struct cxl_context *ctx, void *cookie) 403 { 404 irq_hw_number_t hwirq; 405 unsigned int virq; 406 int r, i; 407 408 for (r = afu_irq_range_start(); r < CXL_IRQ_RANGES; r++) { 409 hwirq = ctx->irqs.offset[r]; 410 for (i = 0; i < ctx->irqs.range[r]; hwirq++, i++) { 411 virq = irq_find_mapping(NULL, hwirq); 412 if (virq) 413 cxl_unmap_irq(virq, cookie); 414 } 415 } 416 417 afu_irq_name_free(ctx); 418 cxl_ops->release_irq_ranges(&ctx->irqs, ctx->afu->adapter); 419 420 ctx->irq_count = 0; 421 } 422 423 void cxl_afu_decode_psl_serr(struct cxl_afu *afu, u64 serr) 424 { 425 dev_crit(&afu->dev, 426 "PSL Slice error received. Check AFU for root cause.\n"); 427 dev_crit(&afu->dev, "PSL_SERR_An: 0x%016llx\n", serr); 428 if (serr & CXL_PSL_SERR_An_afuto) 429 dev_crit(&afu->dev, "AFU MMIO Timeout\n"); 430 if (serr & CXL_PSL_SERR_An_afudis) 431 dev_crit(&afu->dev, 432 "MMIO targeted Accelerator that was not enabled\n"); 433 if (serr & CXL_PSL_SERR_An_afuov) 434 dev_crit(&afu->dev, "AFU CTAG Overflow\n"); 435 if (serr & CXL_PSL_SERR_An_badsrc) 436 dev_crit(&afu->dev, "Bad Interrupt Source\n"); 437 if (serr & CXL_PSL_SERR_An_badctx) 438 dev_crit(&afu->dev, "Bad Context Handle\n"); 439 if (serr & CXL_PSL_SERR_An_llcmdis) 440 dev_crit(&afu->dev, "LLCMD to Disabled AFU\n"); 441 if (serr & CXL_PSL_SERR_An_llcmdto) 442 dev_crit(&afu->dev, "LLCMD Timeout to AFU\n"); 443 if (serr & CXL_PSL_SERR_An_afupar) 444 dev_crit(&afu->dev, "AFU MMIO Parity Error\n"); 445 if (serr & CXL_PSL_SERR_An_afudup) 446 dev_crit(&afu->dev, "AFU MMIO Duplicate CTAG Error\n"); 447 if (serr & CXL_PSL_SERR_An_AE) 448 dev_crit(&afu->dev, 449 "AFU asserted JDONE with JERROR in AFU Directed Mode\n"); 450 } 451