1*5ef3166eSFrederic Barrat // SPDX-License-Identifier: GPL-2.0+ 2*5ef3166eSFrederic Barrat // Copyright 2017 IBM Corp. 3*5ef3166eSFrederic Barrat #include <linux/sched/mm.h> 4*5ef3166eSFrederic Barrat #include <linux/mutex.h> 5*5ef3166eSFrederic Barrat #include <linux/mmu_context.h> 6*5ef3166eSFrederic Barrat #include <asm/copro.h> 7*5ef3166eSFrederic Barrat #include <asm/pnv-ocxl.h> 8*5ef3166eSFrederic Barrat #include "ocxl_internal.h" 9*5ef3166eSFrederic Barrat 10*5ef3166eSFrederic Barrat 11*5ef3166eSFrederic Barrat #define SPA_PASID_BITS 15 12*5ef3166eSFrederic Barrat #define SPA_PASID_MAX ((1 << SPA_PASID_BITS) - 1) 13*5ef3166eSFrederic Barrat #define SPA_PE_MASK SPA_PASID_MAX 14*5ef3166eSFrederic Barrat #define SPA_SPA_SIZE_LOG 22 /* Each SPA is 4 Mb */ 15*5ef3166eSFrederic Barrat 16*5ef3166eSFrederic Barrat #define SPA_CFG_SF (1ull << (63-0)) 17*5ef3166eSFrederic Barrat #define SPA_CFG_TA (1ull << (63-1)) 18*5ef3166eSFrederic Barrat #define SPA_CFG_HV (1ull << (63-3)) 19*5ef3166eSFrederic Barrat #define SPA_CFG_UV (1ull << (63-4)) 20*5ef3166eSFrederic Barrat #define SPA_CFG_XLAT_hpt (0ull << (63-6)) /* Hashed page table (HPT) mode */ 21*5ef3166eSFrederic Barrat #define SPA_CFG_XLAT_roh (2ull << (63-6)) /* Radix on HPT mode */ 22*5ef3166eSFrederic Barrat #define SPA_CFG_XLAT_ror (3ull << (63-6)) /* Radix on Radix mode */ 23*5ef3166eSFrederic Barrat #define SPA_CFG_PR (1ull << (63-49)) 24*5ef3166eSFrederic Barrat #define SPA_CFG_TC (1ull << (63-54)) 25*5ef3166eSFrederic Barrat #define SPA_CFG_DR (1ull << (63-59)) 26*5ef3166eSFrederic Barrat 27*5ef3166eSFrederic Barrat #define SPA_XSL_TF (1ull << (63-3)) /* Translation fault */ 28*5ef3166eSFrederic Barrat #define SPA_XSL_S (1ull << (63-38)) /* Store operation */ 29*5ef3166eSFrederic Barrat 30*5ef3166eSFrederic Barrat #define SPA_PE_VALID 0x80000000 31*5ef3166eSFrederic Barrat 32*5ef3166eSFrederic Barrat 33*5ef3166eSFrederic Barrat struct pe_data { 34*5ef3166eSFrederic Barrat struct mm_struct *mm; 35*5ef3166eSFrederic Barrat /* callback to trigger when a translation fault occurs */ 36*5ef3166eSFrederic Barrat void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr); 37*5ef3166eSFrederic Barrat /* opaque pointer to be passed to the above callback */ 38*5ef3166eSFrederic Barrat void *xsl_err_data; 39*5ef3166eSFrederic Barrat struct rcu_head rcu; 40*5ef3166eSFrederic Barrat }; 41*5ef3166eSFrederic Barrat 42*5ef3166eSFrederic Barrat struct spa { 43*5ef3166eSFrederic Barrat struct ocxl_process_element *spa_mem; 44*5ef3166eSFrederic Barrat int spa_order; 45*5ef3166eSFrederic Barrat struct mutex spa_lock; 46*5ef3166eSFrederic Barrat struct radix_tree_root pe_tree; /* Maps PE handles to pe_data */ 47*5ef3166eSFrederic Barrat char *irq_name; 48*5ef3166eSFrederic Barrat int virq; 49*5ef3166eSFrederic Barrat void __iomem *reg_dsisr; 50*5ef3166eSFrederic Barrat void __iomem *reg_dar; 51*5ef3166eSFrederic Barrat void __iomem *reg_tfc; 52*5ef3166eSFrederic Barrat void __iomem *reg_pe_handle; 53*5ef3166eSFrederic Barrat /* 54*5ef3166eSFrederic Barrat * The following field are used by the memory fault 55*5ef3166eSFrederic Barrat * interrupt handler. We can only have one interrupt at a 56*5ef3166eSFrederic Barrat * time. The NPU won't raise another interrupt until the 57*5ef3166eSFrederic Barrat * previous one has been ack'd by writing to the TFC register 58*5ef3166eSFrederic Barrat */ 59*5ef3166eSFrederic Barrat struct xsl_fault { 60*5ef3166eSFrederic Barrat struct work_struct fault_work; 61*5ef3166eSFrederic Barrat u64 pe; 62*5ef3166eSFrederic Barrat u64 dsisr; 63*5ef3166eSFrederic Barrat u64 dar; 64*5ef3166eSFrederic Barrat struct pe_data pe_data; 65*5ef3166eSFrederic Barrat } xsl_fault; 66*5ef3166eSFrederic Barrat }; 67*5ef3166eSFrederic Barrat 68*5ef3166eSFrederic Barrat /* 69*5ef3166eSFrederic Barrat * A opencapi link can be used be by several PCI functions. We have 70*5ef3166eSFrederic Barrat * one link per device slot. 71*5ef3166eSFrederic Barrat * 72*5ef3166eSFrederic Barrat * A linked list of opencapi links should suffice, as there's a 73*5ef3166eSFrederic Barrat * limited number of opencapi slots on a system and lookup is only 74*5ef3166eSFrederic Barrat * done when the device is probed 75*5ef3166eSFrederic Barrat */ 76*5ef3166eSFrederic Barrat struct link { 77*5ef3166eSFrederic Barrat struct list_head list; 78*5ef3166eSFrederic Barrat struct kref ref; 79*5ef3166eSFrederic Barrat int domain; 80*5ef3166eSFrederic Barrat int bus; 81*5ef3166eSFrederic Barrat int dev; 82*5ef3166eSFrederic Barrat atomic_t irq_available; 83*5ef3166eSFrederic Barrat struct spa *spa; 84*5ef3166eSFrederic Barrat void *platform_data; 85*5ef3166eSFrederic Barrat }; 86*5ef3166eSFrederic Barrat static struct list_head links_list = LIST_HEAD_INIT(links_list); 87*5ef3166eSFrederic Barrat static DEFINE_MUTEX(links_list_lock); 88*5ef3166eSFrederic Barrat 89*5ef3166eSFrederic Barrat enum xsl_response { 90*5ef3166eSFrederic Barrat CONTINUE, 91*5ef3166eSFrederic Barrat ADDRESS_ERROR, 92*5ef3166eSFrederic Barrat RESTART, 93*5ef3166eSFrederic Barrat }; 94*5ef3166eSFrederic Barrat 95*5ef3166eSFrederic Barrat 96*5ef3166eSFrederic Barrat static void read_irq(struct spa *spa, u64 *dsisr, u64 *dar, u64 *pe) 97*5ef3166eSFrederic Barrat { 98*5ef3166eSFrederic Barrat u64 reg; 99*5ef3166eSFrederic Barrat 100*5ef3166eSFrederic Barrat *dsisr = in_be64(spa->reg_dsisr); 101*5ef3166eSFrederic Barrat *dar = in_be64(spa->reg_dar); 102*5ef3166eSFrederic Barrat reg = in_be64(spa->reg_pe_handle); 103*5ef3166eSFrederic Barrat *pe = reg & SPA_PE_MASK; 104*5ef3166eSFrederic Barrat } 105*5ef3166eSFrederic Barrat 106*5ef3166eSFrederic Barrat static void ack_irq(struct spa *spa, enum xsl_response r) 107*5ef3166eSFrederic Barrat { 108*5ef3166eSFrederic Barrat u64 reg = 0; 109*5ef3166eSFrederic Barrat 110*5ef3166eSFrederic Barrat /* continue is not supported */ 111*5ef3166eSFrederic Barrat if (r == RESTART) 112*5ef3166eSFrederic Barrat reg = PPC_BIT(31); 113*5ef3166eSFrederic Barrat else if (r == ADDRESS_ERROR) 114*5ef3166eSFrederic Barrat reg = PPC_BIT(30); 115*5ef3166eSFrederic Barrat else 116*5ef3166eSFrederic Barrat WARN(1, "Invalid irq response %d\n", r); 117*5ef3166eSFrederic Barrat 118*5ef3166eSFrederic Barrat if (reg) 119*5ef3166eSFrederic Barrat out_be64(spa->reg_tfc, reg); 120*5ef3166eSFrederic Barrat } 121*5ef3166eSFrederic Barrat 122*5ef3166eSFrederic Barrat static void xsl_fault_handler_bh(struct work_struct *fault_work) 123*5ef3166eSFrederic Barrat { 124*5ef3166eSFrederic Barrat unsigned int flt = 0; 125*5ef3166eSFrederic Barrat unsigned long access, flags, inv_flags = 0; 126*5ef3166eSFrederic Barrat enum xsl_response r; 127*5ef3166eSFrederic Barrat struct xsl_fault *fault = container_of(fault_work, struct xsl_fault, 128*5ef3166eSFrederic Barrat fault_work); 129*5ef3166eSFrederic Barrat struct spa *spa = container_of(fault, struct spa, xsl_fault); 130*5ef3166eSFrederic Barrat 131*5ef3166eSFrederic Barrat int rc; 132*5ef3166eSFrederic Barrat 133*5ef3166eSFrederic Barrat /* 134*5ef3166eSFrederic Barrat * We need to release a reference on the mm whenever exiting this 135*5ef3166eSFrederic Barrat * function (taken in the memory fault interrupt handler) 136*5ef3166eSFrederic Barrat */ 137*5ef3166eSFrederic Barrat rc = copro_handle_mm_fault(fault->pe_data.mm, fault->dar, fault->dsisr, 138*5ef3166eSFrederic Barrat &flt); 139*5ef3166eSFrederic Barrat if (rc) { 140*5ef3166eSFrederic Barrat pr_debug("copro_handle_mm_fault failed: %d\n", rc); 141*5ef3166eSFrederic Barrat if (fault->pe_data.xsl_err_cb) { 142*5ef3166eSFrederic Barrat fault->pe_data.xsl_err_cb( 143*5ef3166eSFrederic Barrat fault->pe_data.xsl_err_data, 144*5ef3166eSFrederic Barrat fault->dar, fault->dsisr); 145*5ef3166eSFrederic Barrat } 146*5ef3166eSFrederic Barrat r = ADDRESS_ERROR; 147*5ef3166eSFrederic Barrat goto ack; 148*5ef3166eSFrederic Barrat } 149*5ef3166eSFrederic Barrat 150*5ef3166eSFrederic Barrat if (!radix_enabled()) { 151*5ef3166eSFrederic Barrat /* 152*5ef3166eSFrederic Barrat * update_mmu_cache() will not have loaded the hash 153*5ef3166eSFrederic Barrat * since current->trap is not a 0x400 or 0x300, so 154*5ef3166eSFrederic Barrat * just call hash_page_mm() here. 155*5ef3166eSFrederic Barrat */ 156*5ef3166eSFrederic Barrat access = _PAGE_PRESENT | _PAGE_READ; 157*5ef3166eSFrederic Barrat if (fault->dsisr & SPA_XSL_S) 158*5ef3166eSFrederic Barrat access |= _PAGE_WRITE; 159*5ef3166eSFrederic Barrat 160*5ef3166eSFrederic Barrat if (REGION_ID(fault->dar) != USER_REGION_ID) 161*5ef3166eSFrederic Barrat access |= _PAGE_PRIVILEGED; 162*5ef3166eSFrederic Barrat 163*5ef3166eSFrederic Barrat local_irq_save(flags); 164*5ef3166eSFrederic Barrat hash_page_mm(fault->pe_data.mm, fault->dar, access, 0x300, 165*5ef3166eSFrederic Barrat inv_flags); 166*5ef3166eSFrederic Barrat local_irq_restore(flags); 167*5ef3166eSFrederic Barrat } 168*5ef3166eSFrederic Barrat r = RESTART; 169*5ef3166eSFrederic Barrat ack: 170*5ef3166eSFrederic Barrat mmdrop(fault->pe_data.mm); 171*5ef3166eSFrederic Barrat ack_irq(spa, r); 172*5ef3166eSFrederic Barrat } 173*5ef3166eSFrederic Barrat 174*5ef3166eSFrederic Barrat static irqreturn_t xsl_fault_handler(int irq, void *data) 175*5ef3166eSFrederic Barrat { 176*5ef3166eSFrederic Barrat struct link *link = (struct link *) data; 177*5ef3166eSFrederic Barrat struct spa *spa = link->spa; 178*5ef3166eSFrederic Barrat u64 dsisr, dar, pe_handle; 179*5ef3166eSFrederic Barrat struct pe_data *pe_data; 180*5ef3166eSFrederic Barrat struct ocxl_process_element *pe; 181*5ef3166eSFrederic Barrat int lpid, pid, tid; 182*5ef3166eSFrederic Barrat 183*5ef3166eSFrederic Barrat read_irq(spa, &dsisr, &dar, &pe_handle); 184*5ef3166eSFrederic Barrat 185*5ef3166eSFrederic Barrat WARN_ON(pe_handle > SPA_PE_MASK); 186*5ef3166eSFrederic Barrat pe = spa->spa_mem + pe_handle; 187*5ef3166eSFrederic Barrat lpid = be32_to_cpu(pe->lpid); 188*5ef3166eSFrederic Barrat pid = be32_to_cpu(pe->pid); 189*5ef3166eSFrederic Barrat tid = be32_to_cpu(pe->tid); 190*5ef3166eSFrederic Barrat /* We could be reading all null values here if the PE is being 191*5ef3166eSFrederic Barrat * removed while an interrupt kicks in. It's not supposed to 192*5ef3166eSFrederic Barrat * happen if the driver notified the AFU to terminate the 193*5ef3166eSFrederic Barrat * PASID, and the AFU waited for pending operations before 194*5ef3166eSFrederic Barrat * acknowledging. But even if it happens, we won't find a 195*5ef3166eSFrederic Barrat * memory context below and fail silently, so it should be ok. 196*5ef3166eSFrederic Barrat */ 197*5ef3166eSFrederic Barrat if (!(dsisr & SPA_XSL_TF)) { 198*5ef3166eSFrederic Barrat WARN(1, "Invalid xsl interrupt fault register %#llx\n", dsisr); 199*5ef3166eSFrederic Barrat ack_irq(spa, ADDRESS_ERROR); 200*5ef3166eSFrederic Barrat return IRQ_HANDLED; 201*5ef3166eSFrederic Barrat } 202*5ef3166eSFrederic Barrat 203*5ef3166eSFrederic Barrat rcu_read_lock(); 204*5ef3166eSFrederic Barrat pe_data = radix_tree_lookup(&spa->pe_tree, pe_handle); 205*5ef3166eSFrederic Barrat if (!pe_data) { 206*5ef3166eSFrederic Barrat /* 207*5ef3166eSFrederic Barrat * Could only happen if the driver didn't notify the 208*5ef3166eSFrederic Barrat * AFU about PASID termination before removing the PE, 209*5ef3166eSFrederic Barrat * or the AFU didn't wait for all memory access to 210*5ef3166eSFrederic Barrat * have completed. 211*5ef3166eSFrederic Barrat * 212*5ef3166eSFrederic Barrat * Either way, we fail early, but we shouldn't log an 213*5ef3166eSFrederic Barrat * error message, as it is a valid (if unexpected) 214*5ef3166eSFrederic Barrat * scenario 215*5ef3166eSFrederic Barrat */ 216*5ef3166eSFrederic Barrat rcu_read_unlock(); 217*5ef3166eSFrederic Barrat pr_debug("Unknown mm context for xsl interrupt\n"); 218*5ef3166eSFrederic Barrat ack_irq(spa, ADDRESS_ERROR); 219*5ef3166eSFrederic Barrat return IRQ_HANDLED; 220*5ef3166eSFrederic Barrat } 221*5ef3166eSFrederic Barrat WARN_ON(pe_data->mm->context.id != pid); 222*5ef3166eSFrederic Barrat 223*5ef3166eSFrederic Barrat spa->xsl_fault.pe = pe_handle; 224*5ef3166eSFrederic Barrat spa->xsl_fault.dar = dar; 225*5ef3166eSFrederic Barrat spa->xsl_fault.dsisr = dsisr; 226*5ef3166eSFrederic Barrat spa->xsl_fault.pe_data = *pe_data; 227*5ef3166eSFrederic Barrat mmgrab(pe_data->mm); /* mm count is released by bottom half */ 228*5ef3166eSFrederic Barrat 229*5ef3166eSFrederic Barrat rcu_read_unlock(); 230*5ef3166eSFrederic Barrat schedule_work(&spa->xsl_fault.fault_work); 231*5ef3166eSFrederic Barrat return IRQ_HANDLED; 232*5ef3166eSFrederic Barrat } 233*5ef3166eSFrederic Barrat 234*5ef3166eSFrederic Barrat static void unmap_irq_registers(struct spa *spa) 235*5ef3166eSFrederic Barrat { 236*5ef3166eSFrederic Barrat pnv_ocxl_unmap_xsl_regs(spa->reg_dsisr, spa->reg_dar, spa->reg_tfc, 237*5ef3166eSFrederic Barrat spa->reg_pe_handle); 238*5ef3166eSFrederic Barrat } 239*5ef3166eSFrederic Barrat 240*5ef3166eSFrederic Barrat static int map_irq_registers(struct pci_dev *dev, struct spa *spa) 241*5ef3166eSFrederic Barrat { 242*5ef3166eSFrederic Barrat return pnv_ocxl_map_xsl_regs(dev, &spa->reg_dsisr, &spa->reg_dar, 243*5ef3166eSFrederic Barrat &spa->reg_tfc, &spa->reg_pe_handle); 244*5ef3166eSFrederic Barrat } 245*5ef3166eSFrederic Barrat 246*5ef3166eSFrederic Barrat static int setup_xsl_irq(struct pci_dev *dev, struct link *link) 247*5ef3166eSFrederic Barrat { 248*5ef3166eSFrederic Barrat struct spa *spa = link->spa; 249*5ef3166eSFrederic Barrat int rc; 250*5ef3166eSFrederic Barrat int hwirq; 251*5ef3166eSFrederic Barrat 252*5ef3166eSFrederic Barrat rc = pnv_ocxl_get_xsl_irq(dev, &hwirq); 253*5ef3166eSFrederic Barrat if (rc) 254*5ef3166eSFrederic Barrat return rc; 255*5ef3166eSFrederic Barrat 256*5ef3166eSFrederic Barrat rc = map_irq_registers(dev, spa); 257*5ef3166eSFrederic Barrat if (rc) 258*5ef3166eSFrederic Barrat return rc; 259*5ef3166eSFrederic Barrat 260*5ef3166eSFrederic Barrat spa->irq_name = kasprintf(GFP_KERNEL, "ocxl-xsl-%x-%x-%x", 261*5ef3166eSFrederic Barrat link->domain, link->bus, link->dev); 262*5ef3166eSFrederic Barrat if (!spa->irq_name) { 263*5ef3166eSFrederic Barrat unmap_irq_registers(spa); 264*5ef3166eSFrederic Barrat dev_err(&dev->dev, "Can't allocate name for xsl interrupt\n"); 265*5ef3166eSFrederic Barrat return -ENOMEM; 266*5ef3166eSFrederic Barrat } 267*5ef3166eSFrederic Barrat /* 268*5ef3166eSFrederic Barrat * At some point, we'll need to look into allowing a higher 269*5ef3166eSFrederic Barrat * number of interrupts. Could we have an IRQ domain per link? 270*5ef3166eSFrederic Barrat */ 271*5ef3166eSFrederic Barrat spa->virq = irq_create_mapping(NULL, hwirq); 272*5ef3166eSFrederic Barrat if (!spa->virq) { 273*5ef3166eSFrederic Barrat kfree(spa->irq_name); 274*5ef3166eSFrederic Barrat unmap_irq_registers(spa); 275*5ef3166eSFrederic Barrat dev_err(&dev->dev, 276*5ef3166eSFrederic Barrat "irq_create_mapping failed for translation interrupt\n"); 277*5ef3166eSFrederic Barrat return -EINVAL; 278*5ef3166eSFrederic Barrat } 279*5ef3166eSFrederic Barrat 280*5ef3166eSFrederic Barrat dev_dbg(&dev->dev, "hwirq %d mapped to virq %d\n", hwirq, spa->virq); 281*5ef3166eSFrederic Barrat 282*5ef3166eSFrederic Barrat rc = request_irq(spa->virq, xsl_fault_handler, 0, spa->irq_name, 283*5ef3166eSFrederic Barrat link); 284*5ef3166eSFrederic Barrat if (rc) { 285*5ef3166eSFrederic Barrat irq_dispose_mapping(spa->virq); 286*5ef3166eSFrederic Barrat kfree(spa->irq_name); 287*5ef3166eSFrederic Barrat unmap_irq_registers(spa); 288*5ef3166eSFrederic Barrat dev_err(&dev->dev, 289*5ef3166eSFrederic Barrat "request_irq failed for translation interrupt: %d\n", 290*5ef3166eSFrederic Barrat rc); 291*5ef3166eSFrederic Barrat return -EINVAL; 292*5ef3166eSFrederic Barrat } 293*5ef3166eSFrederic Barrat return 0; 294*5ef3166eSFrederic Barrat } 295*5ef3166eSFrederic Barrat 296*5ef3166eSFrederic Barrat static void release_xsl_irq(struct link *link) 297*5ef3166eSFrederic Barrat { 298*5ef3166eSFrederic Barrat struct spa *spa = link->spa; 299*5ef3166eSFrederic Barrat 300*5ef3166eSFrederic Barrat if (spa->virq) { 301*5ef3166eSFrederic Barrat free_irq(spa->virq, link); 302*5ef3166eSFrederic Barrat irq_dispose_mapping(spa->virq); 303*5ef3166eSFrederic Barrat } 304*5ef3166eSFrederic Barrat kfree(spa->irq_name); 305*5ef3166eSFrederic Barrat unmap_irq_registers(spa); 306*5ef3166eSFrederic Barrat } 307*5ef3166eSFrederic Barrat 308*5ef3166eSFrederic Barrat static int alloc_spa(struct pci_dev *dev, struct link *link) 309*5ef3166eSFrederic Barrat { 310*5ef3166eSFrederic Barrat struct spa *spa; 311*5ef3166eSFrederic Barrat 312*5ef3166eSFrederic Barrat spa = kzalloc(sizeof(struct spa), GFP_KERNEL); 313*5ef3166eSFrederic Barrat if (!spa) 314*5ef3166eSFrederic Barrat return -ENOMEM; 315*5ef3166eSFrederic Barrat 316*5ef3166eSFrederic Barrat mutex_init(&spa->spa_lock); 317*5ef3166eSFrederic Barrat INIT_RADIX_TREE(&spa->pe_tree, GFP_KERNEL); 318*5ef3166eSFrederic Barrat INIT_WORK(&spa->xsl_fault.fault_work, xsl_fault_handler_bh); 319*5ef3166eSFrederic Barrat 320*5ef3166eSFrederic Barrat spa->spa_order = SPA_SPA_SIZE_LOG - PAGE_SHIFT; 321*5ef3166eSFrederic Barrat spa->spa_mem = (struct ocxl_process_element *) 322*5ef3166eSFrederic Barrat __get_free_pages(GFP_KERNEL | __GFP_ZERO, spa->spa_order); 323*5ef3166eSFrederic Barrat if (!spa->spa_mem) { 324*5ef3166eSFrederic Barrat dev_err(&dev->dev, "Can't allocate Shared Process Area\n"); 325*5ef3166eSFrederic Barrat kfree(spa); 326*5ef3166eSFrederic Barrat return -ENOMEM; 327*5ef3166eSFrederic Barrat } 328*5ef3166eSFrederic Barrat pr_debug("Allocated SPA for %x:%x:%x at %p\n", link->domain, link->bus, 329*5ef3166eSFrederic Barrat link->dev, spa->spa_mem); 330*5ef3166eSFrederic Barrat 331*5ef3166eSFrederic Barrat link->spa = spa; 332*5ef3166eSFrederic Barrat return 0; 333*5ef3166eSFrederic Barrat } 334*5ef3166eSFrederic Barrat 335*5ef3166eSFrederic Barrat static void free_spa(struct link *link) 336*5ef3166eSFrederic Barrat { 337*5ef3166eSFrederic Barrat struct spa *spa = link->spa; 338*5ef3166eSFrederic Barrat 339*5ef3166eSFrederic Barrat pr_debug("Freeing SPA for %x:%x:%x\n", link->domain, link->bus, 340*5ef3166eSFrederic Barrat link->dev); 341*5ef3166eSFrederic Barrat 342*5ef3166eSFrederic Barrat if (spa && spa->spa_mem) { 343*5ef3166eSFrederic Barrat free_pages((unsigned long) spa->spa_mem, spa->spa_order); 344*5ef3166eSFrederic Barrat kfree(spa); 345*5ef3166eSFrederic Barrat link->spa = NULL; 346*5ef3166eSFrederic Barrat } 347*5ef3166eSFrederic Barrat } 348*5ef3166eSFrederic Barrat 349*5ef3166eSFrederic Barrat static int alloc_link(struct pci_dev *dev, int PE_mask, struct link **out_link) 350*5ef3166eSFrederic Barrat { 351*5ef3166eSFrederic Barrat struct link *link; 352*5ef3166eSFrederic Barrat int rc; 353*5ef3166eSFrederic Barrat 354*5ef3166eSFrederic Barrat link = kzalloc(sizeof(struct link), GFP_KERNEL); 355*5ef3166eSFrederic Barrat if (!link) 356*5ef3166eSFrederic Barrat return -ENOMEM; 357*5ef3166eSFrederic Barrat 358*5ef3166eSFrederic Barrat kref_init(&link->ref); 359*5ef3166eSFrederic Barrat link->domain = pci_domain_nr(dev->bus); 360*5ef3166eSFrederic Barrat link->bus = dev->bus->number; 361*5ef3166eSFrederic Barrat link->dev = PCI_SLOT(dev->devfn); 362*5ef3166eSFrederic Barrat atomic_set(&link->irq_available, MAX_IRQ_PER_LINK); 363*5ef3166eSFrederic Barrat 364*5ef3166eSFrederic Barrat rc = alloc_spa(dev, link); 365*5ef3166eSFrederic Barrat if (rc) 366*5ef3166eSFrederic Barrat goto err_free; 367*5ef3166eSFrederic Barrat 368*5ef3166eSFrederic Barrat rc = setup_xsl_irq(dev, link); 369*5ef3166eSFrederic Barrat if (rc) 370*5ef3166eSFrederic Barrat goto err_spa; 371*5ef3166eSFrederic Barrat 372*5ef3166eSFrederic Barrat /* platform specific hook */ 373*5ef3166eSFrederic Barrat rc = pnv_ocxl_spa_setup(dev, link->spa->spa_mem, PE_mask, 374*5ef3166eSFrederic Barrat &link->platform_data); 375*5ef3166eSFrederic Barrat if (rc) 376*5ef3166eSFrederic Barrat goto err_xsl_irq; 377*5ef3166eSFrederic Barrat 378*5ef3166eSFrederic Barrat *out_link = link; 379*5ef3166eSFrederic Barrat return 0; 380*5ef3166eSFrederic Barrat 381*5ef3166eSFrederic Barrat err_xsl_irq: 382*5ef3166eSFrederic Barrat release_xsl_irq(link); 383*5ef3166eSFrederic Barrat err_spa: 384*5ef3166eSFrederic Barrat free_spa(link); 385*5ef3166eSFrederic Barrat err_free: 386*5ef3166eSFrederic Barrat kfree(link); 387*5ef3166eSFrederic Barrat return rc; 388*5ef3166eSFrederic Barrat } 389*5ef3166eSFrederic Barrat 390*5ef3166eSFrederic Barrat static void free_link(struct link *link) 391*5ef3166eSFrederic Barrat { 392*5ef3166eSFrederic Barrat release_xsl_irq(link); 393*5ef3166eSFrederic Barrat free_spa(link); 394*5ef3166eSFrederic Barrat kfree(link); 395*5ef3166eSFrederic Barrat } 396*5ef3166eSFrederic Barrat 397*5ef3166eSFrederic Barrat int ocxl_link_setup(struct pci_dev *dev, int PE_mask, void **link_handle) 398*5ef3166eSFrederic Barrat { 399*5ef3166eSFrederic Barrat int rc = 0; 400*5ef3166eSFrederic Barrat struct link *link; 401*5ef3166eSFrederic Barrat 402*5ef3166eSFrederic Barrat mutex_lock(&links_list_lock); 403*5ef3166eSFrederic Barrat list_for_each_entry(link, &links_list, list) { 404*5ef3166eSFrederic Barrat /* The functions of a device all share the same link */ 405*5ef3166eSFrederic Barrat if (link->domain == pci_domain_nr(dev->bus) && 406*5ef3166eSFrederic Barrat link->bus == dev->bus->number && 407*5ef3166eSFrederic Barrat link->dev == PCI_SLOT(dev->devfn)) { 408*5ef3166eSFrederic Barrat kref_get(&link->ref); 409*5ef3166eSFrederic Barrat *link_handle = link; 410*5ef3166eSFrederic Barrat goto unlock; 411*5ef3166eSFrederic Barrat } 412*5ef3166eSFrederic Barrat } 413*5ef3166eSFrederic Barrat rc = alloc_link(dev, PE_mask, &link); 414*5ef3166eSFrederic Barrat if (rc) 415*5ef3166eSFrederic Barrat goto unlock; 416*5ef3166eSFrederic Barrat 417*5ef3166eSFrederic Barrat list_add(&link->list, &links_list); 418*5ef3166eSFrederic Barrat *link_handle = link; 419*5ef3166eSFrederic Barrat unlock: 420*5ef3166eSFrederic Barrat mutex_unlock(&links_list_lock); 421*5ef3166eSFrederic Barrat return rc; 422*5ef3166eSFrederic Barrat } 423*5ef3166eSFrederic Barrat 424*5ef3166eSFrederic Barrat static void release_xsl(struct kref *ref) 425*5ef3166eSFrederic Barrat { 426*5ef3166eSFrederic Barrat struct link *link = container_of(ref, struct link, ref); 427*5ef3166eSFrederic Barrat 428*5ef3166eSFrederic Barrat list_del(&link->list); 429*5ef3166eSFrederic Barrat /* call platform code before releasing data */ 430*5ef3166eSFrederic Barrat pnv_ocxl_spa_release(link->platform_data); 431*5ef3166eSFrederic Barrat free_link(link); 432*5ef3166eSFrederic Barrat } 433*5ef3166eSFrederic Barrat 434*5ef3166eSFrederic Barrat void ocxl_link_release(struct pci_dev *dev, void *link_handle) 435*5ef3166eSFrederic Barrat { 436*5ef3166eSFrederic Barrat struct link *link = (struct link *) link_handle; 437*5ef3166eSFrederic Barrat 438*5ef3166eSFrederic Barrat mutex_lock(&links_list_lock); 439*5ef3166eSFrederic Barrat kref_put(&link->ref, release_xsl); 440*5ef3166eSFrederic Barrat mutex_unlock(&links_list_lock); 441*5ef3166eSFrederic Barrat } 442*5ef3166eSFrederic Barrat 443*5ef3166eSFrederic Barrat static u64 calculate_cfg_state(bool kernel) 444*5ef3166eSFrederic Barrat { 445*5ef3166eSFrederic Barrat u64 state; 446*5ef3166eSFrederic Barrat 447*5ef3166eSFrederic Barrat state = SPA_CFG_DR; 448*5ef3166eSFrederic Barrat if (mfspr(SPRN_LPCR) & LPCR_TC) 449*5ef3166eSFrederic Barrat state |= SPA_CFG_TC; 450*5ef3166eSFrederic Barrat if (radix_enabled()) 451*5ef3166eSFrederic Barrat state |= SPA_CFG_XLAT_ror; 452*5ef3166eSFrederic Barrat else 453*5ef3166eSFrederic Barrat state |= SPA_CFG_XLAT_hpt; 454*5ef3166eSFrederic Barrat state |= SPA_CFG_HV; 455*5ef3166eSFrederic Barrat if (kernel) { 456*5ef3166eSFrederic Barrat if (mfmsr() & MSR_SF) 457*5ef3166eSFrederic Barrat state |= SPA_CFG_SF; 458*5ef3166eSFrederic Barrat } else { 459*5ef3166eSFrederic Barrat state |= SPA_CFG_PR; 460*5ef3166eSFrederic Barrat if (!test_tsk_thread_flag(current, TIF_32BIT)) 461*5ef3166eSFrederic Barrat state |= SPA_CFG_SF; 462*5ef3166eSFrederic Barrat } 463*5ef3166eSFrederic Barrat return state; 464*5ef3166eSFrederic Barrat } 465*5ef3166eSFrederic Barrat 466*5ef3166eSFrederic Barrat int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr, 467*5ef3166eSFrederic Barrat u64 amr, struct mm_struct *mm, 468*5ef3166eSFrederic Barrat void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr), 469*5ef3166eSFrederic Barrat void *xsl_err_data) 470*5ef3166eSFrederic Barrat { 471*5ef3166eSFrederic Barrat struct link *link = (struct link *) link_handle; 472*5ef3166eSFrederic Barrat struct spa *spa = link->spa; 473*5ef3166eSFrederic Barrat struct ocxl_process_element *pe; 474*5ef3166eSFrederic Barrat int pe_handle, rc = 0; 475*5ef3166eSFrederic Barrat struct pe_data *pe_data; 476*5ef3166eSFrederic Barrat 477*5ef3166eSFrederic Barrat BUILD_BUG_ON(sizeof(struct ocxl_process_element) != 128); 478*5ef3166eSFrederic Barrat if (pasid > SPA_PASID_MAX) 479*5ef3166eSFrederic Barrat return -EINVAL; 480*5ef3166eSFrederic Barrat 481*5ef3166eSFrederic Barrat mutex_lock(&spa->spa_lock); 482*5ef3166eSFrederic Barrat pe_handle = pasid & SPA_PE_MASK; 483*5ef3166eSFrederic Barrat pe = spa->spa_mem + pe_handle; 484*5ef3166eSFrederic Barrat 485*5ef3166eSFrederic Barrat if (pe->software_state) { 486*5ef3166eSFrederic Barrat rc = -EBUSY; 487*5ef3166eSFrederic Barrat goto unlock; 488*5ef3166eSFrederic Barrat } 489*5ef3166eSFrederic Barrat 490*5ef3166eSFrederic Barrat pe_data = kmalloc(sizeof(*pe_data), GFP_KERNEL); 491*5ef3166eSFrederic Barrat if (!pe_data) { 492*5ef3166eSFrederic Barrat rc = -ENOMEM; 493*5ef3166eSFrederic Barrat goto unlock; 494*5ef3166eSFrederic Barrat } 495*5ef3166eSFrederic Barrat 496*5ef3166eSFrederic Barrat pe_data->mm = mm; 497*5ef3166eSFrederic Barrat pe_data->xsl_err_cb = xsl_err_cb; 498*5ef3166eSFrederic Barrat pe_data->xsl_err_data = xsl_err_data; 499*5ef3166eSFrederic Barrat 500*5ef3166eSFrederic Barrat memset(pe, 0, sizeof(struct ocxl_process_element)); 501*5ef3166eSFrederic Barrat pe->config_state = cpu_to_be64(calculate_cfg_state(pidr == 0)); 502*5ef3166eSFrederic Barrat pe->lpid = cpu_to_be32(mfspr(SPRN_LPID)); 503*5ef3166eSFrederic Barrat pe->pid = cpu_to_be32(pidr); 504*5ef3166eSFrederic Barrat pe->tid = cpu_to_be32(tidr); 505*5ef3166eSFrederic Barrat pe->amr = cpu_to_be64(amr); 506*5ef3166eSFrederic Barrat pe->software_state = cpu_to_be32(SPA_PE_VALID); 507*5ef3166eSFrederic Barrat 508*5ef3166eSFrederic Barrat mm_context_add_copro(mm); 509*5ef3166eSFrederic Barrat /* 510*5ef3166eSFrederic Barrat * Barrier is to make sure PE is visible in the SPA before it 511*5ef3166eSFrederic Barrat * is used by the device. It also helps with the global TLBI 512*5ef3166eSFrederic Barrat * invalidation 513*5ef3166eSFrederic Barrat */ 514*5ef3166eSFrederic Barrat mb(); 515*5ef3166eSFrederic Barrat radix_tree_insert(&spa->pe_tree, pe_handle, pe_data); 516*5ef3166eSFrederic Barrat 517*5ef3166eSFrederic Barrat /* 518*5ef3166eSFrederic Barrat * The mm must stay valid for as long as the device uses it. We 519*5ef3166eSFrederic Barrat * lower the count when the context is removed from the SPA. 520*5ef3166eSFrederic Barrat * 521*5ef3166eSFrederic Barrat * We grab mm_count (and not mm_users), as we don't want to 522*5ef3166eSFrederic Barrat * end up in a circular dependency if a process mmaps its 523*5ef3166eSFrederic Barrat * mmio, therefore incrementing the file ref count when 524*5ef3166eSFrederic Barrat * calling mmap(), and forgets to unmap before exiting. In 525*5ef3166eSFrederic Barrat * that scenario, when the kernel handles the death of the 526*5ef3166eSFrederic Barrat * process, the file is not cleaned because unmap was not 527*5ef3166eSFrederic Barrat * called, and the mm wouldn't be freed because we would still 528*5ef3166eSFrederic Barrat * have a reference on mm_users. Incrementing mm_count solves 529*5ef3166eSFrederic Barrat * the problem. 530*5ef3166eSFrederic Barrat */ 531*5ef3166eSFrederic Barrat mmgrab(mm); 532*5ef3166eSFrederic Barrat unlock: 533*5ef3166eSFrederic Barrat mutex_unlock(&spa->spa_lock); 534*5ef3166eSFrederic Barrat return rc; 535*5ef3166eSFrederic Barrat } 536*5ef3166eSFrederic Barrat 537*5ef3166eSFrederic Barrat int ocxl_link_remove_pe(void *link_handle, int pasid) 538*5ef3166eSFrederic Barrat { 539*5ef3166eSFrederic Barrat struct link *link = (struct link *) link_handle; 540*5ef3166eSFrederic Barrat struct spa *spa = link->spa; 541*5ef3166eSFrederic Barrat struct ocxl_process_element *pe; 542*5ef3166eSFrederic Barrat struct pe_data *pe_data; 543*5ef3166eSFrederic Barrat int pe_handle, rc; 544*5ef3166eSFrederic Barrat 545*5ef3166eSFrederic Barrat if (pasid > SPA_PASID_MAX) 546*5ef3166eSFrederic Barrat return -EINVAL; 547*5ef3166eSFrederic Barrat 548*5ef3166eSFrederic Barrat /* 549*5ef3166eSFrederic Barrat * About synchronization with our memory fault handler: 550*5ef3166eSFrederic Barrat * 551*5ef3166eSFrederic Barrat * Before removing the PE, the driver is supposed to have 552*5ef3166eSFrederic Barrat * notified the AFU, which should have cleaned up and make 553*5ef3166eSFrederic Barrat * sure the PASID is no longer in use, including pending 554*5ef3166eSFrederic Barrat * interrupts. However, there's no way to be sure... 555*5ef3166eSFrederic Barrat * 556*5ef3166eSFrederic Barrat * We clear the PE and remove the context from our radix 557*5ef3166eSFrederic Barrat * tree. From that point on, any new interrupt for that 558*5ef3166eSFrederic Barrat * context will fail silently, which is ok. As mentioned 559*5ef3166eSFrederic Barrat * above, that's not expected, but it could happen if the 560*5ef3166eSFrederic Barrat * driver or AFU didn't do the right thing. 561*5ef3166eSFrederic Barrat * 562*5ef3166eSFrederic Barrat * There could still be a bottom half running, but we don't 563*5ef3166eSFrederic Barrat * need to wait/flush, as it is managing a reference count on 564*5ef3166eSFrederic Barrat * the mm it reads from the radix tree. 565*5ef3166eSFrederic Barrat */ 566*5ef3166eSFrederic Barrat pe_handle = pasid & SPA_PE_MASK; 567*5ef3166eSFrederic Barrat pe = spa->spa_mem + pe_handle; 568*5ef3166eSFrederic Barrat 569*5ef3166eSFrederic Barrat mutex_lock(&spa->spa_lock); 570*5ef3166eSFrederic Barrat 571*5ef3166eSFrederic Barrat if (!(be32_to_cpu(pe->software_state) & SPA_PE_VALID)) { 572*5ef3166eSFrederic Barrat rc = -EINVAL; 573*5ef3166eSFrederic Barrat goto unlock; 574*5ef3166eSFrederic Barrat } 575*5ef3166eSFrederic Barrat 576*5ef3166eSFrederic Barrat memset(pe, 0, sizeof(struct ocxl_process_element)); 577*5ef3166eSFrederic Barrat /* 578*5ef3166eSFrederic Barrat * The barrier makes sure the PE is removed from the SPA 579*5ef3166eSFrederic Barrat * before we clear the NPU context cache below, so that the 580*5ef3166eSFrederic Barrat * old PE cannot be reloaded erroneously. 581*5ef3166eSFrederic Barrat */ 582*5ef3166eSFrederic Barrat mb(); 583*5ef3166eSFrederic Barrat 584*5ef3166eSFrederic Barrat /* 585*5ef3166eSFrederic Barrat * hook to platform code 586*5ef3166eSFrederic Barrat * On powerpc, the entry needs to be cleared from the context 587*5ef3166eSFrederic Barrat * cache of the NPU. 588*5ef3166eSFrederic Barrat */ 589*5ef3166eSFrederic Barrat rc = pnv_ocxl_spa_remove_pe(link->platform_data, pe_handle); 590*5ef3166eSFrederic Barrat WARN_ON(rc); 591*5ef3166eSFrederic Barrat 592*5ef3166eSFrederic Barrat pe_data = radix_tree_delete(&spa->pe_tree, pe_handle); 593*5ef3166eSFrederic Barrat if (!pe_data) { 594*5ef3166eSFrederic Barrat WARN(1, "Couldn't find pe data when removing PE\n"); 595*5ef3166eSFrederic Barrat } else { 596*5ef3166eSFrederic Barrat mm_context_remove_copro(pe_data->mm); 597*5ef3166eSFrederic Barrat mmdrop(pe_data->mm); 598*5ef3166eSFrederic Barrat kfree_rcu(pe_data, rcu); 599*5ef3166eSFrederic Barrat } 600*5ef3166eSFrederic Barrat unlock: 601*5ef3166eSFrederic Barrat mutex_unlock(&spa->spa_lock); 602*5ef3166eSFrederic Barrat return rc; 603*5ef3166eSFrederic Barrat } 604