1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright 2017 IBM Corp. 4 */ 5 6 #include <linux/hugetlb.h> 7 #include <linux/sched/mm.h> 8 #include <asm/pnv-pci.h> 9 #include <misc/cxllib.h> 10 11 #include "cxl.h" 12 13 #define CXL_INVALID_DRA ~0ull 14 #define CXL_DUMMY_READ_SIZE 128 15 #define CXL_DUMMY_READ_ALIGN 8 16 #define CXL_CAPI_WINDOW_START 0x2000000000000ull 17 #define CXL_CAPI_WINDOW_LOG_SIZE 48 18 #define CXL_XSL_CONFIG_CURRENT_VERSION CXL_XSL_CONFIG_VERSION1 19 20 21 bool cxllib_slot_is_supported(struct pci_dev *dev, unsigned long flags) 22 { 23 int rc; 24 u32 phb_index; 25 u64 chip_id, capp_unit_id; 26 27 /* No flags currently supported */ 28 if (flags) 29 return false; 30 31 if (!cpu_has_feature(CPU_FTR_HVMODE)) 32 return false; 33 34 if (!cxl_is_power9()) 35 return false; 36 37 if (cxl_slot_is_switched(dev)) 38 return false; 39 40 /* on p9, some pci slots are not connected to a CAPP unit */ 41 rc = cxl_calc_capp_routing(dev, &chip_id, &phb_index, &capp_unit_id); 42 if (rc) 43 return false; 44 45 return true; 46 } 47 EXPORT_SYMBOL_GPL(cxllib_slot_is_supported); 48 49 static DEFINE_MUTEX(dra_mutex); 50 static u64 dummy_read_addr = CXL_INVALID_DRA; 51 52 static int allocate_dummy_read_buf(void) 53 { 54 u64 buf, vaddr; 55 size_t buf_size; 56 57 /* 58 * Dummy read buffer is 128-byte long, aligned on a 59 * 256-byte boundary and we need the physical address. 60 */ 61 buf_size = CXL_DUMMY_READ_SIZE + (1ull << CXL_DUMMY_READ_ALIGN); 62 buf = (u64) kzalloc(buf_size, GFP_KERNEL); 63 if (!buf) 64 return -ENOMEM; 65 66 vaddr = (buf + (1ull << CXL_DUMMY_READ_ALIGN) - 1) & 67 (~0ull << CXL_DUMMY_READ_ALIGN); 68 69 WARN((vaddr + CXL_DUMMY_READ_SIZE) > (buf + buf_size), 70 "Dummy read buffer alignment issue"); 71 dummy_read_addr = virt_to_phys((void *) vaddr); 72 return 0; 73 } 74 75 int cxllib_get_xsl_config(struct pci_dev *dev, struct cxllib_xsl_config *cfg) 76 { 77 int rc; 78 u32 phb_index; 79 u64 chip_id, capp_unit_id; 80 81 if (!cpu_has_feature(CPU_FTR_HVMODE)) 82 return -EINVAL; 83 84 mutex_lock(&dra_mutex); 85 if (dummy_read_addr == CXL_INVALID_DRA) { 86 rc = allocate_dummy_read_buf(); 87 if (rc) { 88 mutex_unlock(&dra_mutex); 89 return rc; 90 } 91 } 92 mutex_unlock(&dra_mutex); 93 94 rc = cxl_calc_capp_routing(dev, &chip_id, &phb_index, &capp_unit_id); 95 if (rc) 96 return rc; 97 98 rc = cxl_get_xsl9_dsnctl(dev, capp_unit_id, &cfg->dsnctl); 99 if (rc) 100 return rc; 101 102 cfg->version = CXL_XSL_CONFIG_CURRENT_VERSION; 103 cfg->log_bar_size = CXL_CAPI_WINDOW_LOG_SIZE; 104 cfg->bar_addr = CXL_CAPI_WINDOW_START; 105 cfg->dra = dummy_read_addr; 106 return 0; 107 } 108 EXPORT_SYMBOL_GPL(cxllib_get_xsl_config); 109 110 int cxllib_switch_phb_mode(struct pci_dev *dev, enum cxllib_mode mode, 111 unsigned long flags) 112 { 113 int rc = 0; 114 115 if (!cpu_has_feature(CPU_FTR_HVMODE)) 116 return -EINVAL; 117 118 switch (mode) { 119 case CXL_MODE_PCI: 120 /* 121 * We currently don't support going back to PCI mode 122 * However, we'll turn the invalidations off, so that 123 * the firmware doesn't have to ack them and can do 124 * things like reset, etc.. with no worries. 125 * So always return EPERM (can't go back to PCI) or 126 * EBUSY if we couldn't even turn off snooping 127 */ 128 rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_OFF); 129 if (rc) 130 rc = -EBUSY; 131 else 132 rc = -EPERM; 133 break; 134 case CXL_MODE_CXL: 135 /* DMA only supported on TVT1 for the time being */ 136 if (flags != CXL_MODE_DMA_TVT1) 137 return -EINVAL; 138 rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_DMA_TVT1); 139 if (rc) 140 return rc; 141 rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_ON); 142 break; 143 default: 144 rc = -EINVAL; 145 } 146 return rc; 147 } 148 EXPORT_SYMBOL_GPL(cxllib_switch_phb_mode); 149 150 /* 151 * When switching the PHB to capi mode, the TVT#1 entry for 152 * the Partitionable Endpoint is set in bypass mode, like 153 * in PCI mode. 154 * Configure the device dma to use TVT#1, which is done 155 * by calling dma_set_mask() with a mask large enough. 156 */ 157 int cxllib_set_device_dma(struct pci_dev *dev, unsigned long flags) 158 { 159 int rc; 160 161 if (flags) 162 return -EINVAL; 163 164 rc = dma_set_mask(&dev->dev, DMA_BIT_MASK(64)); 165 return rc; 166 } 167 EXPORT_SYMBOL_GPL(cxllib_set_device_dma); 168 169 int cxllib_get_PE_attributes(struct task_struct *task, 170 unsigned long translation_mode, 171 struct cxllib_pe_attributes *attr) 172 { 173 if (translation_mode != CXL_TRANSLATED_MODE && 174 translation_mode != CXL_REAL_MODE) 175 return -EINVAL; 176 177 attr->sr = cxl_calculate_sr(false, 178 task == NULL, 179 translation_mode == CXL_REAL_MODE, 180 true); 181 attr->lpid = mfspr(SPRN_LPID); 182 if (task) { 183 struct mm_struct *mm = get_task_mm(task); 184 if (mm == NULL) 185 return -EINVAL; 186 /* 187 * Caller is keeping a reference on mm_users for as long 188 * as XSL uses the memory context 189 */ 190 attr->pid = mm->context.id; 191 mmput(mm); 192 attr->tid = task->thread.tidr; 193 } else { 194 attr->pid = 0; 195 attr->tid = 0; 196 } 197 return 0; 198 } 199 EXPORT_SYMBOL_GPL(cxllib_get_PE_attributes); 200 201 static int get_vma_info(struct mm_struct *mm, u64 addr, 202 u64 *vma_start, u64 *vma_end, 203 unsigned long *page_size) 204 { 205 struct vm_area_struct *vma = NULL; 206 int rc = 0; 207 208 mmap_read_lock(mm); 209 210 vma = find_vma(mm, addr); 211 if (!vma) { 212 rc = -EFAULT; 213 goto out; 214 } 215 *page_size = vma_kernel_pagesize(vma); 216 *vma_start = vma->vm_start; 217 *vma_end = vma->vm_end; 218 out: 219 mmap_read_unlock(mm); 220 return rc; 221 } 222 223 int cxllib_handle_fault(struct mm_struct *mm, u64 addr, u64 size, u64 flags) 224 { 225 int rc; 226 u64 dar, vma_start, vma_end; 227 unsigned long page_size; 228 229 if (mm == NULL) 230 return -EFAULT; 231 232 /* 233 * The buffer we have to process can extend over several pages 234 * and may also cover several VMAs. 235 * We iterate over all the pages. The page size could vary 236 * between VMAs. 237 */ 238 rc = get_vma_info(mm, addr, &vma_start, &vma_end, &page_size); 239 if (rc) 240 return rc; 241 242 for (dar = (addr & ~(page_size - 1)); dar < (addr + size); 243 dar += page_size) { 244 if (dar < vma_start || dar >= vma_end) { 245 /* 246 * We don't hold mm->mmap_lock while iterating, since 247 * the lock is required by one of the lower-level page 248 * fault processing functions and it could 249 * create a deadlock. 250 * 251 * It means the VMAs can be altered between 2 252 * loop iterations and we could theoretically 253 * miss a page (however unlikely). But that's 254 * not really a problem, as the driver will 255 * retry access, get another page fault on the 256 * missing page and call us again. 257 */ 258 rc = get_vma_info(mm, dar, &vma_start, &vma_end, 259 &page_size); 260 if (rc) 261 return rc; 262 } 263 264 rc = cxl_handle_mm_fault(mm, flags, dar); 265 if (rc) 266 return -EFAULT; 267 } 268 return 0; 269 } 270 EXPORT_SYMBOL_GPL(cxllib_handle_fault); 271