1 /* 2 * Copyright 2017 IBM Corp. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 */ 9 10 #include <linux/hugetlb.h> 11 #include <linux/sched/mm.h> 12 #include <asm/pnv-pci.h> 13 #include <misc/cxllib.h> 14 15 #include "cxl.h" 16 17 #define CXL_INVALID_DRA ~0ull 18 #define CXL_DUMMY_READ_SIZE 128 19 #define CXL_DUMMY_READ_ALIGN 8 20 #define CXL_CAPI_WINDOW_START 0x2000000000000ull 21 #define CXL_CAPI_WINDOW_LOG_SIZE 48 22 #define CXL_XSL_CONFIG_CURRENT_VERSION CXL_XSL_CONFIG_VERSION1 23 24 25 bool cxllib_slot_is_supported(struct pci_dev *dev, unsigned long flags) 26 { 27 int rc; 28 u32 phb_index; 29 u64 chip_id, capp_unit_id; 30 31 /* No flags currently supported */ 32 if (flags) 33 return false; 34 35 if (!cpu_has_feature(CPU_FTR_HVMODE)) 36 return false; 37 38 if (!cxl_is_power9()) 39 return false; 40 41 if (cxl_slot_is_switched(dev)) 42 return false; 43 44 /* on p9, some pci slots are not connected to a CAPP unit */ 45 rc = cxl_calc_capp_routing(dev, &chip_id, &phb_index, &capp_unit_id); 46 if (rc) 47 return false; 48 49 return true; 50 } 51 EXPORT_SYMBOL_GPL(cxllib_slot_is_supported); 52 53 static DEFINE_MUTEX(dra_mutex); 54 static u64 dummy_read_addr = CXL_INVALID_DRA; 55 56 static int allocate_dummy_read_buf(void) 57 { 58 u64 buf, vaddr; 59 size_t buf_size; 60 61 /* 62 * Dummy read buffer is 128-byte long, aligned on a 63 * 256-byte boundary and we need the physical address. 64 */ 65 buf_size = CXL_DUMMY_READ_SIZE + (1ull << CXL_DUMMY_READ_ALIGN); 66 buf = (u64) kzalloc(buf_size, GFP_KERNEL); 67 if (!buf) 68 return -ENOMEM; 69 70 vaddr = (buf + (1ull << CXL_DUMMY_READ_ALIGN) - 1) & 71 (~0ull << CXL_DUMMY_READ_ALIGN); 72 73 WARN((vaddr + CXL_DUMMY_READ_SIZE) > (buf + buf_size), 74 "Dummy read buffer alignment issue"); 75 dummy_read_addr = virt_to_phys((void *) vaddr); 76 return 0; 77 } 78 79 int cxllib_get_xsl_config(struct pci_dev *dev, struct cxllib_xsl_config *cfg) 80 { 81 int rc; 82 u32 phb_index; 83 u64 chip_id, capp_unit_id; 84 85 if (!cpu_has_feature(CPU_FTR_HVMODE)) 86 return -EINVAL; 87 88 mutex_lock(&dra_mutex); 89 if (dummy_read_addr == CXL_INVALID_DRA) { 90 rc = allocate_dummy_read_buf(); 91 if (rc) { 92 mutex_unlock(&dra_mutex); 93 return rc; 94 } 95 } 96 mutex_unlock(&dra_mutex); 97 98 rc = cxl_calc_capp_routing(dev, &chip_id, &phb_index, &capp_unit_id); 99 if (rc) 100 return rc; 101 102 rc = cxl_get_xsl9_dsnctl(dev, capp_unit_id, &cfg->dsnctl); 103 if (rc) 104 return rc; 105 if (cpu_has_feature(CPU_FTR_POWER9_DD1)) { 106 /* workaround for DD1 - nbwind = capiind */ 107 cfg->dsnctl |= ((u64)0x02 << (63-47)); 108 } 109 110 cfg->version = CXL_XSL_CONFIG_CURRENT_VERSION; 111 cfg->log_bar_size = CXL_CAPI_WINDOW_LOG_SIZE; 112 cfg->bar_addr = CXL_CAPI_WINDOW_START; 113 cfg->dra = dummy_read_addr; 114 return 0; 115 } 116 EXPORT_SYMBOL_GPL(cxllib_get_xsl_config); 117 118 int cxllib_switch_phb_mode(struct pci_dev *dev, enum cxllib_mode mode, 119 unsigned long flags) 120 { 121 int rc = 0; 122 123 if (!cpu_has_feature(CPU_FTR_HVMODE)) 124 return -EINVAL; 125 126 switch (mode) { 127 case CXL_MODE_PCI: 128 /* 129 * We currently don't support going back to PCI mode 130 * However, we'll turn the invalidations off, so that 131 * the firmware doesn't have to ack them and can do 132 * things like reset, etc.. with no worries. 133 * So always return EPERM (can't go back to PCI) or 134 * EBUSY if we couldn't even turn off snooping 135 */ 136 rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_OFF); 137 if (rc) 138 rc = -EBUSY; 139 else 140 rc = -EPERM; 141 break; 142 case CXL_MODE_CXL: 143 /* DMA only supported on TVT1 for the time being */ 144 if (flags != CXL_MODE_DMA_TVT1) 145 return -EINVAL; 146 rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_DMA_TVT1); 147 if (rc) 148 return rc; 149 rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_ON); 150 break; 151 default: 152 rc = -EINVAL; 153 } 154 return rc; 155 } 156 EXPORT_SYMBOL_GPL(cxllib_switch_phb_mode); 157 158 /* 159 * When switching the PHB to capi mode, the TVT#1 entry for 160 * the Partitionable Endpoint is set in bypass mode, like 161 * in PCI mode. 162 * Configure the device dma to use TVT#1, which is done 163 * by calling dma_set_mask() with a mask large enough. 164 */ 165 int cxllib_set_device_dma(struct pci_dev *dev, unsigned long flags) 166 { 167 int rc; 168 169 if (flags) 170 return -EINVAL; 171 172 rc = dma_set_mask(&dev->dev, DMA_BIT_MASK(64)); 173 return rc; 174 } 175 EXPORT_SYMBOL_GPL(cxllib_set_device_dma); 176 177 int cxllib_get_PE_attributes(struct task_struct *task, 178 unsigned long translation_mode, 179 struct cxllib_pe_attributes *attr) 180 { 181 struct mm_struct *mm = NULL; 182 183 if (translation_mode != CXL_TRANSLATED_MODE && 184 translation_mode != CXL_REAL_MODE) 185 return -EINVAL; 186 187 attr->sr = cxl_calculate_sr(false, 188 task == NULL, 189 translation_mode == CXL_REAL_MODE, 190 true); 191 attr->lpid = mfspr(SPRN_LPID); 192 if (task) { 193 mm = get_task_mm(task); 194 if (mm == NULL) 195 return -EINVAL; 196 /* 197 * Caller is keeping a reference on mm_users for as long 198 * as XSL uses the memory context 199 */ 200 attr->pid = mm->context.id; 201 mmput(mm); 202 attr->tid = task->thread.tidr; 203 } else { 204 attr->pid = 0; 205 attr->tid = 0; 206 } 207 return 0; 208 } 209 EXPORT_SYMBOL_GPL(cxllib_get_PE_attributes); 210 211 static int get_vma_info(struct mm_struct *mm, u64 addr, 212 u64 *vma_start, u64 *vma_end, 213 unsigned long *page_size) 214 { 215 struct vm_area_struct *vma = NULL; 216 int rc = 0; 217 218 down_read(&mm->mmap_sem); 219 220 vma = find_vma(mm, addr); 221 if (!vma) { 222 rc = -EFAULT; 223 goto out; 224 } 225 *page_size = vma_kernel_pagesize(vma); 226 *vma_start = vma->vm_start; 227 *vma_end = vma->vm_end; 228 out: 229 up_read(&mm->mmap_sem); 230 return rc; 231 } 232 233 int cxllib_handle_fault(struct mm_struct *mm, u64 addr, u64 size, u64 flags) 234 { 235 int rc; 236 u64 dar, vma_start, vma_end; 237 unsigned long page_size; 238 239 if (mm == NULL) 240 return -EFAULT; 241 242 /* 243 * The buffer we have to process can extend over several pages 244 * and may also cover several VMAs. 245 * We iterate over all the pages. The page size could vary 246 * between VMAs. 247 */ 248 rc = get_vma_info(mm, addr, &vma_start, &vma_end, &page_size); 249 if (rc) 250 return rc; 251 252 for (dar = (addr & ~(page_size - 1)); dar < (addr + size); 253 dar += page_size) { 254 if (dar < vma_start || dar >= vma_end) { 255 /* 256 * We don't hold the mm->mmap_sem semaphore 257 * while iterating, since the semaphore is 258 * required by one of the lower-level page 259 * fault processing functions and it could 260 * create a deadlock. 261 * 262 * It means the VMAs can be altered between 2 263 * loop iterations and we could theoretically 264 * miss a page (however unlikely). But that's 265 * not really a problem, as the driver will 266 * retry access, get another page fault on the 267 * missing page and call us again. 268 */ 269 rc = get_vma_info(mm, dar, &vma_start, &vma_end, 270 &page_size); 271 if (rc) 272 return rc; 273 } 274 275 rc = cxl_handle_mm_fault(mm, flags, dar); 276 if (rc) 277 return -EFAULT; 278 } 279 return 0; 280 } 281 EXPORT_SYMBOL_GPL(cxllib_handle_fault); 282