1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * TCE helpers for IODA PCI/PCIe on PowerNV platforms 4 * 5 * Copyright 2018 IBM Corp. 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; either version 10 * 2 of the License, or (at your option) any later version. 11 */ 12 13 #include <linux/kernel.h> 14 #include <linux/iommu.h> 15 16 #include <asm/iommu.h> 17 #include <asm/tce.h> 18 #include "pci.h" 19 20 unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb) 21 { 22 struct pci_controller *hose = phb->hose; 23 struct device_node *dn = hose->dn; 24 unsigned long mask = 0; 25 int i, rc, count; 26 u32 val; 27 28 count = of_property_count_u32_elems(dn, "ibm,supported-tce-sizes"); 29 if (count <= 0) { 30 mask = SZ_4K | SZ_64K; 31 /* Add 16M for POWER8 by default */ 32 if (cpu_has_feature(CPU_FTR_ARCH_207S) && 33 !cpu_has_feature(CPU_FTR_ARCH_300)) 34 mask |= SZ_16M | SZ_256M; 35 return mask; 36 } 37 38 for (i = 0; i < count; i++) { 39 rc = of_property_read_u32_index(dn, "ibm,supported-tce-sizes", 40 i, &val); 41 if (rc == 0) 42 mask |= 1ULL << val; 43 } 44 45 return mask; 46 } 47 48 void pnv_pci_setup_iommu_table(struct iommu_table *tbl, 49 void *tce_mem, u64 tce_size, 50 u64 dma_offset, unsigned int page_shift) 51 { 52 tbl->it_blocksize = 16; 53 tbl->it_base = (unsigned long)tce_mem; 54 tbl->it_page_shift = page_shift; 55 tbl->it_offset = dma_offset >> tbl->it_page_shift; 56 tbl->it_index = 0; 57 tbl->it_size = tce_size >> 3; 58 tbl->it_busno = 0; 59 tbl->it_type = TCE_PCI; 60 } 61 62 static __be64 *pnv_alloc_tce_level(int nid, unsigned int shift) 63 { 64 struct page *tce_mem = NULL; 65 __be64 *addr; 66 67 tce_mem = alloc_pages_node(nid, GFP_ATOMIC | __GFP_NOWARN, 68 shift - PAGE_SHIFT); 69 if (!tce_mem) { 70 pr_err("Failed to allocate a TCE memory, level shift=%d\n", 71 shift); 72 return NULL; 73 } 74 addr = page_address(tce_mem); 75 memset(addr, 0, 1UL << shift); 76 77 return addr; 78 } 79 80 static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr, 81 unsigned long size, unsigned int levels); 82 83 static __be64 *pnv_tce(struct iommu_table *tbl, bool user, long idx, bool alloc) 84 { 85 __be64 *tmp = user ? tbl->it_userspace : (__be64 *) tbl->it_base; 86 int level = tbl->it_indirect_levels; 87 const long shift = ilog2(tbl->it_level_size); 88 unsigned long mask = (tbl->it_level_size - 1) << (level * shift); 89 90 while (level) { 91 int n = (idx & mask) >> (level * shift); 92 unsigned long oldtce, tce = be64_to_cpu(READ_ONCE(tmp[n])); 93 94 if (!tce) { 95 __be64 *tmp2; 96 97 if (!alloc) 98 return NULL; 99 100 tmp2 = pnv_alloc_tce_level(tbl->it_nid, 101 ilog2(tbl->it_level_size) + 3); 102 if (!tmp2) 103 return NULL; 104 105 tce = __pa(tmp2) | TCE_PCI_READ | TCE_PCI_WRITE; 106 oldtce = be64_to_cpu(cmpxchg(&tmp[n], 0, 107 cpu_to_be64(tce))); 108 if (oldtce) { 109 pnv_pci_ioda2_table_do_free_pages(tmp2, 110 ilog2(tbl->it_level_size) + 3, 1); 111 tce = oldtce; 112 } 113 } 114 115 tmp = __va(tce & ~(TCE_PCI_READ | TCE_PCI_WRITE)); 116 idx &= ~mask; 117 mask >>= shift; 118 --level; 119 } 120 121 return tmp + idx; 122 } 123 124 int pnv_tce_build(struct iommu_table *tbl, long index, long npages, 125 unsigned long uaddr, enum dma_data_direction direction, 126 unsigned long attrs) 127 { 128 u64 proto_tce = iommu_direction_to_tce_perm(direction); 129 u64 rpn = __pa(uaddr) >> tbl->it_page_shift; 130 long i; 131 132 if (proto_tce & TCE_PCI_WRITE) 133 proto_tce |= TCE_PCI_READ; 134 135 for (i = 0; i < npages; i++) { 136 unsigned long newtce = proto_tce | 137 ((rpn + i) << tbl->it_page_shift); 138 unsigned long idx = index - tbl->it_offset + i; 139 140 *(pnv_tce(tbl, false, idx, true)) = cpu_to_be64(newtce); 141 } 142 143 return 0; 144 } 145 146 #ifdef CONFIG_IOMMU_API 147 int pnv_tce_xchg(struct iommu_table *tbl, long index, 148 unsigned long *hpa, enum dma_data_direction *direction, 149 bool alloc) 150 { 151 u64 proto_tce = iommu_direction_to_tce_perm(*direction); 152 unsigned long newtce = *hpa | proto_tce, oldtce; 153 unsigned long idx = index - tbl->it_offset; 154 __be64 *ptce = NULL; 155 156 BUG_ON(*hpa & ~IOMMU_PAGE_MASK(tbl)); 157 158 if (*direction == DMA_NONE) { 159 ptce = pnv_tce(tbl, false, idx, false); 160 if (!ptce) { 161 *hpa = 0; 162 return 0; 163 } 164 } 165 166 if (!ptce) { 167 ptce = pnv_tce(tbl, false, idx, alloc); 168 if (!ptce) 169 return -ENOMEM; 170 } 171 172 if (newtce & TCE_PCI_WRITE) 173 newtce |= TCE_PCI_READ; 174 175 oldtce = be64_to_cpu(xchg(ptce, cpu_to_be64(newtce))); 176 *hpa = oldtce & ~(TCE_PCI_READ | TCE_PCI_WRITE); 177 *direction = iommu_tce_direction(oldtce); 178 179 return 0; 180 } 181 182 __be64 *pnv_tce_useraddrptr(struct iommu_table *tbl, long index, bool alloc) 183 { 184 if (WARN_ON_ONCE(!tbl->it_userspace)) 185 return NULL; 186 187 return pnv_tce(tbl, true, index - tbl->it_offset, alloc); 188 } 189 #endif 190 191 void pnv_tce_free(struct iommu_table *tbl, long index, long npages) 192 { 193 long i; 194 195 for (i = 0; i < npages; i++) { 196 unsigned long idx = index - tbl->it_offset + i; 197 __be64 *ptce = pnv_tce(tbl, false, idx, false); 198 199 if (ptce) 200 *ptce = cpu_to_be64(0); 201 else 202 /* Skip the rest of the level */ 203 i |= tbl->it_level_size - 1; 204 } 205 } 206 207 unsigned long pnv_tce_get(struct iommu_table *tbl, long index) 208 { 209 __be64 *ptce = pnv_tce(tbl, false, index - tbl->it_offset, false); 210 211 if (!ptce) 212 return 0; 213 214 return be64_to_cpu(*ptce); 215 } 216 217 static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr, 218 unsigned long size, unsigned int levels) 219 { 220 const unsigned long addr_ul = (unsigned long) addr & 221 ~(TCE_PCI_READ | TCE_PCI_WRITE); 222 223 if (levels) { 224 long i; 225 u64 *tmp = (u64 *) addr_ul; 226 227 for (i = 0; i < size; ++i) { 228 unsigned long hpa = be64_to_cpu(tmp[i]); 229 230 if (!(hpa & (TCE_PCI_READ | TCE_PCI_WRITE))) 231 continue; 232 233 pnv_pci_ioda2_table_do_free_pages(__va(hpa), size, 234 levels - 1); 235 } 236 } 237 238 free_pages(addr_ul, get_order(size << 3)); 239 } 240 241 void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl) 242 { 243 const unsigned long size = tbl->it_indirect_levels ? 244 tbl->it_level_size : tbl->it_size; 245 246 if (!tbl->it_size) 247 return; 248 249 pnv_pci_ioda2_table_do_free_pages((__be64 *)tbl->it_base, size, 250 tbl->it_indirect_levels); 251 if (tbl->it_userspace) { 252 pnv_pci_ioda2_table_do_free_pages(tbl->it_userspace, size, 253 tbl->it_indirect_levels); 254 } 255 } 256 257 static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned int shift, 258 unsigned int levels, unsigned long limit, 259 unsigned long *current_offset, unsigned long *total_allocated) 260 { 261 __be64 *addr, *tmp; 262 unsigned long allocated = 1UL << shift; 263 unsigned int entries = 1UL << (shift - 3); 264 long i; 265 266 addr = pnv_alloc_tce_level(nid, shift); 267 *total_allocated += allocated; 268 269 --levels; 270 if (!levels) { 271 *current_offset += allocated; 272 return addr; 273 } 274 275 for (i = 0; i < entries; ++i) { 276 tmp = pnv_pci_ioda2_table_do_alloc_pages(nid, shift, 277 levels, limit, current_offset, total_allocated); 278 if (!tmp) 279 break; 280 281 addr[i] = cpu_to_be64(__pa(tmp) | 282 TCE_PCI_READ | TCE_PCI_WRITE); 283 284 if (*current_offset >= limit) 285 break; 286 } 287 288 return addr; 289 } 290 291 long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, 292 __u32 page_shift, __u64 window_size, __u32 levels, 293 bool alloc_userspace_copy, struct iommu_table *tbl) 294 { 295 void *addr, *uas = NULL; 296 unsigned long offset = 0, level_shift, total_allocated = 0; 297 unsigned long total_allocated_uas = 0; 298 const unsigned int window_shift = ilog2(window_size); 299 unsigned int entries_shift = window_shift - page_shift; 300 unsigned int table_shift = max_t(unsigned int, entries_shift + 3, 301 PAGE_SHIFT); 302 const unsigned long tce_table_size = 1UL << table_shift; 303 304 if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS)) 305 return -EINVAL; 306 307 if (!is_power_of_2(window_size)) 308 return -EINVAL; 309 310 /* Adjust direct table size from window_size and levels */ 311 entries_shift = (entries_shift + levels - 1) / levels; 312 level_shift = entries_shift + 3; 313 level_shift = max_t(unsigned int, level_shift, PAGE_SHIFT); 314 315 if ((level_shift - 3) * levels + page_shift >= 55) 316 return -EINVAL; 317 318 /* Allocate TCE table */ 319 addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift, 320 1, tce_table_size, &offset, &total_allocated); 321 322 /* addr==NULL means that the first level allocation failed */ 323 if (!addr) 324 return -ENOMEM; 325 326 /* 327 * First level was allocated but some lower level failed as 328 * we did not allocate as much as we wanted, 329 * release partially allocated table. 330 */ 331 if (levels == 1 && offset < tce_table_size) 332 goto free_tces_exit; 333 334 /* Allocate userspace view of the TCE table */ 335 if (alloc_userspace_copy) { 336 offset = 0; 337 uas = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift, 338 1, tce_table_size, &offset, 339 &total_allocated_uas); 340 if (!uas) 341 goto free_tces_exit; 342 if (levels == 1 && (offset < tce_table_size || 343 total_allocated_uas != total_allocated)) 344 goto free_uas_exit; 345 } 346 347 /* Setup linux iommu table */ 348 pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, bus_offset, 349 page_shift); 350 tbl->it_level_size = 1ULL << (level_shift - 3); 351 tbl->it_indirect_levels = levels - 1; 352 tbl->it_userspace = uas; 353 tbl->it_nid = nid; 354 355 pr_debug("Created TCE table: ws=%08llx ts=%lx @%08llx base=%lx uas=%p levels=%d/%d\n", 356 window_size, tce_table_size, bus_offset, tbl->it_base, 357 tbl->it_userspace, 1, levels); 358 359 return 0; 360 361 free_uas_exit: 362 pnv_pci_ioda2_table_do_free_pages(uas, 363 1ULL << (level_shift - 3), levels - 1); 364 free_tces_exit: 365 pnv_pci_ioda2_table_do_free_pages(addr, 366 1ULL << (level_shift - 3), levels - 1); 367 368 return -ENOMEM; 369 } 370 371 void pnv_pci_unlink_table_and_group(struct iommu_table *tbl, 372 struct iommu_table_group *table_group) 373 { 374 long i; 375 bool found; 376 struct iommu_table_group_link *tgl; 377 378 if (!tbl || !table_group) 379 return; 380 381 /* Remove link to a group from table's list of attached groups */ 382 found = false; 383 384 rcu_read_lock(); 385 list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) { 386 if (tgl->table_group == table_group) { 387 list_del_rcu(&tgl->next); 388 kfree_rcu(tgl, rcu); 389 found = true; 390 break; 391 } 392 } 393 rcu_read_unlock(); 394 395 if (WARN_ON(!found)) 396 return; 397 398 /* Clean a pointer to iommu_table in iommu_table_group::tables[] */ 399 found = false; 400 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { 401 if (table_group->tables[i] == tbl) { 402 iommu_tce_table_put(tbl); 403 table_group->tables[i] = NULL; 404 found = true; 405 break; 406 } 407 } 408 WARN_ON(!found); 409 } 410 411 long pnv_pci_link_table_and_group(int node, int num, 412 struct iommu_table *tbl, 413 struct iommu_table_group *table_group) 414 { 415 struct iommu_table_group_link *tgl = NULL; 416 417 if (WARN_ON(!tbl || !table_group)) 418 return -EINVAL; 419 420 tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL, 421 node); 422 if (!tgl) 423 return -ENOMEM; 424 425 tgl->table_group = table_group; 426 list_add_rcu(&tgl->next, &tbl->it_group_list); 427 428 table_group->tables[num] = iommu_tce_table_get(tbl); 429 430 return 0; 431 } 432