1 /* 2 * Copyright 2017 IBM Corp. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 */ 9 10 #include <linux/hugetlb.h> 11 #include <linux/sched/mm.h> 12 #include <asm/pnv-pci.h> 13 #include <misc/cxllib.h> 14 15 #include "cxl.h" 16 17 #define CXL_INVALID_DRA ~0ull 18 #define CXL_DUMMY_READ_SIZE 128 19 #define CXL_DUMMY_READ_ALIGN 8 20 #define CXL_CAPI_WINDOW_START 0x2000000000000ull 21 #define CXL_CAPI_WINDOW_LOG_SIZE 48 22 #define CXL_XSL_CONFIG_CURRENT_VERSION CXL_XSL_CONFIG_VERSION1 23 24 25 bool cxllib_slot_is_supported(struct pci_dev *dev, unsigned long flags) 26 { 27 int rc; 28 u32 phb_index; 29 u64 chip_id, capp_unit_id; 30 31 /* No flags currently supported */ 32 if (flags) 33 return false; 34 35 if (!cpu_has_feature(CPU_FTR_HVMODE)) 36 return false; 37 38 if (!cxl_is_power9()) 39 return false; 40 41 if (cxl_slot_is_switched(dev)) 42 return false; 43 44 /* on p9, some pci slots are not connected to a CAPP unit */ 45 rc = cxl_calc_capp_routing(dev, &chip_id, &phb_index, &capp_unit_id); 46 if (rc) 47 return false; 48 49 return true; 50 } 51 EXPORT_SYMBOL_GPL(cxllib_slot_is_supported); 52 53 static DEFINE_MUTEX(dra_mutex); 54 static u64 dummy_read_addr = CXL_INVALID_DRA; 55 56 static int allocate_dummy_read_buf(void) 57 { 58 u64 buf, vaddr; 59 size_t buf_size; 60 61 /* 62 * Dummy read buffer is 128-byte long, aligned on a 63 * 256-byte boundary and we need the physical address. 64 */ 65 buf_size = CXL_DUMMY_READ_SIZE + (1ull << CXL_DUMMY_READ_ALIGN); 66 buf = (u64) kzalloc(buf_size, GFP_KERNEL); 67 if (!buf) 68 return -ENOMEM; 69 70 vaddr = (buf + (1ull << CXL_DUMMY_READ_ALIGN) - 1) & 71 (~0ull << CXL_DUMMY_READ_ALIGN); 72 73 WARN((vaddr + CXL_DUMMY_READ_SIZE) > (buf + buf_size), 74 "Dummy read buffer alignment issue"); 75 dummy_read_addr = virt_to_phys((void *) vaddr); 76 return 0; 77 } 78 79 int cxllib_get_xsl_config(struct pci_dev *dev, struct cxllib_xsl_config *cfg) 80 { 81 int rc; 82 u32 phb_index; 83 u64 chip_id, capp_unit_id; 84 85 if (!cpu_has_feature(CPU_FTR_HVMODE)) 86 return -EINVAL; 87 88 mutex_lock(&dra_mutex); 89 if (dummy_read_addr == CXL_INVALID_DRA) { 90 rc = allocate_dummy_read_buf(); 91 if (rc) { 92 mutex_unlock(&dra_mutex); 93 return rc; 94 } 95 } 96 mutex_unlock(&dra_mutex); 97 98 rc = cxl_calc_capp_routing(dev, &chip_id, &phb_index, &capp_unit_id); 99 if (rc) 100 return rc; 101 102 rc = cxl_get_xsl9_dsnctl(dev, capp_unit_id, &cfg->dsnctl); 103 if (rc) 104 return rc; 105 106 cfg->version = CXL_XSL_CONFIG_CURRENT_VERSION; 107 cfg->log_bar_size = CXL_CAPI_WINDOW_LOG_SIZE; 108 cfg->bar_addr = CXL_CAPI_WINDOW_START; 109 cfg->dra = dummy_read_addr; 110 return 0; 111 } 112 EXPORT_SYMBOL_GPL(cxllib_get_xsl_config); 113 114 int cxllib_switch_phb_mode(struct pci_dev *dev, enum cxllib_mode mode, 115 unsigned long flags) 116 { 117 int rc = 0; 118 119 if (!cpu_has_feature(CPU_FTR_HVMODE)) 120 return -EINVAL; 121 122 switch (mode) { 123 case CXL_MODE_PCI: 124 /* 125 * We currently don't support going back to PCI mode 126 * However, we'll turn the invalidations off, so that 127 * the firmware doesn't have to ack them and can do 128 * things like reset, etc.. with no worries. 129 * So always return EPERM (can't go back to PCI) or 130 * EBUSY if we couldn't even turn off snooping 131 */ 132 rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_OFF); 133 if (rc) 134 rc = -EBUSY; 135 else 136 rc = -EPERM; 137 break; 138 case CXL_MODE_CXL: 139 /* DMA only supported on TVT1 for the time being */ 140 if (flags != CXL_MODE_DMA_TVT1) 141 return -EINVAL; 142 rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_DMA_TVT1); 143 if (rc) 144 return rc; 145 rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_ON); 146 break; 147 default: 148 rc = -EINVAL; 149 } 150 return rc; 151 } 152 EXPORT_SYMBOL_GPL(cxllib_switch_phb_mode); 153 154 /* 155 * When switching the PHB to capi mode, the TVT#1 entry for 156 * the Partitionable Endpoint is set in bypass mode, like 157 * in PCI mode. 158 * Configure the device dma to use TVT#1, which is done 159 * by calling dma_set_mask() with a mask large enough. 160 */ 161 int cxllib_set_device_dma(struct pci_dev *dev, unsigned long flags) 162 { 163 int rc; 164 165 if (flags) 166 return -EINVAL; 167 168 rc = dma_set_mask(&dev->dev, DMA_BIT_MASK(64)); 169 return rc; 170 } 171 EXPORT_SYMBOL_GPL(cxllib_set_device_dma); 172 173 int cxllib_get_PE_attributes(struct task_struct *task, 174 unsigned long translation_mode, 175 struct cxllib_pe_attributes *attr) 176 { 177 struct mm_struct *mm = NULL; 178 179 if (translation_mode != CXL_TRANSLATED_MODE && 180 translation_mode != CXL_REAL_MODE) 181 return -EINVAL; 182 183 attr->sr = cxl_calculate_sr(false, 184 task == NULL, 185 translation_mode == CXL_REAL_MODE, 186 true); 187 attr->lpid = mfspr(SPRN_LPID); 188 if (task) { 189 mm = get_task_mm(task); 190 if (mm == NULL) 191 return -EINVAL; 192 /* 193 * Caller is keeping a reference on mm_users for as long 194 * as XSL uses the memory context 195 */ 196 attr->pid = mm->context.id; 197 mmput(mm); 198 attr->tid = task->thread.tidr; 199 } else { 200 attr->pid = 0; 201 attr->tid = 0; 202 } 203 return 0; 204 } 205 EXPORT_SYMBOL_GPL(cxllib_get_PE_attributes); 206 207 static int get_vma_info(struct mm_struct *mm, u64 addr, 208 u64 *vma_start, u64 *vma_end, 209 unsigned long *page_size) 210 { 211 struct vm_area_struct *vma = NULL; 212 int rc = 0; 213 214 down_read(&mm->mmap_sem); 215 216 vma = find_vma(mm, addr); 217 if (!vma) { 218 rc = -EFAULT; 219 goto out; 220 } 221 *page_size = vma_kernel_pagesize(vma); 222 *vma_start = vma->vm_start; 223 *vma_end = vma->vm_end; 224 out: 225 up_read(&mm->mmap_sem); 226 return rc; 227 } 228 229 int cxllib_handle_fault(struct mm_struct *mm, u64 addr, u64 size, u64 flags) 230 { 231 int rc; 232 u64 dar, vma_start, vma_end; 233 unsigned long page_size; 234 235 if (mm == NULL) 236 return -EFAULT; 237 238 /* 239 * The buffer we have to process can extend over several pages 240 * and may also cover several VMAs. 241 * We iterate over all the pages. The page size could vary 242 * between VMAs. 243 */ 244 rc = get_vma_info(mm, addr, &vma_start, &vma_end, &page_size); 245 if (rc) 246 return rc; 247 248 for (dar = (addr & ~(page_size - 1)); dar < (addr + size); 249 dar += page_size) { 250 if (dar < vma_start || dar >= vma_end) { 251 /* 252 * We don't hold the mm->mmap_sem semaphore 253 * while iterating, since the semaphore is 254 * required by one of the lower-level page 255 * fault processing functions and it could 256 * create a deadlock. 257 * 258 * It means the VMAs can be altered between 2 259 * loop iterations and we could theoretically 260 * miss a page (however unlikely). But that's 261 * not really a problem, as the driver will 262 * retry access, get another page fault on the 263 * missing page and call us again. 264 */ 265 rc = get_vma_info(mm, dar, &vma_start, &vma_end, 266 &page_size); 267 if (rc) 268 return rc; 269 } 270 271 rc = cxl_handle_mm_fault(mm, flags, dar); 272 if (rc) 273 return -EFAULT; 274 } 275 return 0; 276 } 277 EXPORT_SYMBOL_GPL(cxllib_handle_fault); 278