1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * TCE helpers for IODA PCI/PCIe on PowerNV platforms 4 * 5 * Copyright 2018 IBM Corp. 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; either version 10 * 2 of the License, or (at your option) any later version. 11 */ 12 13 #include <linux/kernel.h> 14 #include <linux/iommu.h> 15 16 #include <asm/iommu.h> 17 #include <asm/tce.h> 18 #include "pci.h" 19 20 void pnv_pci_setup_iommu_table(struct iommu_table *tbl, 21 void *tce_mem, u64 tce_size, 22 u64 dma_offset, unsigned int page_shift) 23 { 24 tbl->it_blocksize = 16; 25 tbl->it_base = (unsigned long)tce_mem; 26 tbl->it_page_shift = page_shift; 27 tbl->it_offset = dma_offset >> tbl->it_page_shift; 28 tbl->it_index = 0; 29 tbl->it_size = tce_size >> 3; 30 tbl->it_busno = 0; 31 tbl->it_type = TCE_PCI; 32 } 33 34 static __be64 *pnv_alloc_tce_level(int nid, unsigned int shift) 35 { 36 struct page *tce_mem = NULL; 37 __be64 *addr; 38 39 tce_mem = alloc_pages_node(nid, GFP_ATOMIC | __GFP_NOWARN, 40 shift - PAGE_SHIFT); 41 if (!tce_mem) { 42 pr_err("Failed to allocate a TCE memory, level shift=%d\n", 43 shift); 44 return NULL; 45 } 46 addr = page_address(tce_mem); 47 memset(addr, 0, 1UL << shift); 48 49 return addr; 50 } 51 52 static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr, 53 unsigned long size, unsigned int levels); 54 55 static __be64 *pnv_tce(struct iommu_table *tbl, bool user, long idx, bool alloc) 56 { 57 __be64 *tmp = user ? tbl->it_userspace : (__be64 *) tbl->it_base; 58 int level = tbl->it_indirect_levels; 59 const long shift = ilog2(tbl->it_level_size); 60 unsigned long mask = (tbl->it_level_size - 1) << (level * shift); 61 62 while (level) { 63 int n = (idx & mask) >> (level * shift); 64 unsigned long oldtce, tce = be64_to_cpu(READ_ONCE(tmp[n])); 65 66 if (!tce) { 67 __be64 *tmp2; 68 69 if (!alloc) 70 return NULL; 71 72 tmp2 = pnv_alloc_tce_level(tbl->it_nid, 73 ilog2(tbl->it_level_size) + 3); 74 if (!tmp2) 75 return NULL; 76 77 tce = __pa(tmp2) | TCE_PCI_READ | TCE_PCI_WRITE; 78 oldtce = be64_to_cpu(cmpxchg(&tmp[n], 0, 79 cpu_to_be64(tce))); 80 if (oldtce) { 81 pnv_pci_ioda2_table_do_free_pages(tmp2, 82 ilog2(tbl->it_level_size) + 3, 1); 83 tce = oldtce; 84 } 85 } 86 87 tmp = __va(tce & ~(TCE_PCI_READ | TCE_PCI_WRITE)); 88 idx &= ~mask; 89 mask >>= shift; 90 --level; 91 } 92 93 return tmp + idx; 94 } 95 96 int pnv_tce_build(struct iommu_table *tbl, long index, long npages, 97 unsigned long uaddr, enum dma_data_direction direction, 98 unsigned long attrs) 99 { 100 u64 proto_tce = iommu_direction_to_tce_perm(direction); 101 u64 rpn = __pa(uaddr) >> tbl->it_page_shift; 102 long i; 103 104 if (proto_tce & TCE_PCI_WRITE) 105 proto_tce |= TCE_PCI_READ; 106 107 for (i = 0; i < npages; i++) { 108 unsigned long newtce = proto_tce | 109 ((rpn + i) << tbl->it_page_shift); 110 unsigned long idx = index - tbl->it_offset + i; 111 112 *(pnv_tce(tbl, false, idx, true)) = cpu_to_be64(newtce); 113 } 114 115 return 0; 116 } 117 118 #ifdef CONFIG_IOMMU_API 119 int pnv_tce_xchg(struct iommu_table *tbl, long index, 120 unsigned long *hpa, enum dma_data_direction *direction, 121 bool alloc) 122 { 123 u64 proto_tce = iommu_direction_to_tce_perm(*direction); 124 unsigned long newtce = *hpa | proto_tce, oldtce; 125 unsigned long idx = index - tbl->it_offset; 126 __be64 *ptce = NULL; 127 128 BUG_ON(*hpa & ~IOMMU_PAGE_MASK(tbl)); 129 130 if (*direction == DMA_NONE) { 131 ptce = pnv_tce(tbl, false, idx, false); 132 if (!ptce) { 133 *hpa = 0; 134 return 0; 135 } 136 } 137 138 if (!ptce) { 139 ptce = pnv_tce(tbl, false, idx, alloc); 140 if (!ptce) 141 return alloc ? H_HARDWARE : H_TOO_HARD; 142 } 143 144 if (newtce & TCE_PCI_WRITE) 145 newtce |= TCE_PCI_READ; 146 147 oldtce = be64_to_cpu(xchg(ptce, cpu_to_be64(newtce))); 148 *hpa = oldtce & ~(TCE_PCI_READ | TCE_PCI_WRITE); 149 *direction = iommu_tce_direction(oldtce); 150 151 return 0; 152 } 153 154 __be64 *pnv_tce_useraddrptr(struct iommu_table *tbl, long index, bool alloc) 155 { 156 if (WARN_ON_ONCE(!tbl->it_userspace)) 157 return NULL; 158 159 return pnv_tce(tbl, true, index - tbl->it_offset, alloc); 160 } 161 #endif 162 163 void pnv_tce_free(struct iommu_table *tbl, long index, long npages) 164 { 165 long i; 166 167 for (i = 0; i < npages; i++) { 168 unsigned long idx = index - tbl->it_offset + i; 169 __be64 *ptce = pnv_tce(tbl, false, idx, false); 170 171 if (ptce) 172 *ptce = cpu_to_be64(0); 173 else 174 /* Skip the rest of the level */ 175 i |= tbl->it_level_size - 1; 176 } 177 } 178 179 unsigned long pnv_tce_get(struct iommu_table *tbl, long index) 180 { 181 __be64 *ptce = pnv_tce(tbl, false, index - tbl->it_offset, false); 182 183 if (!ptce) 184 return 0; 185 186 return be64_to_cpu(*ptce); 187 } 188 189 static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr, 190 unsigned long size, unsigned int levels) 191 { 192 const unsigned long addr_ul = (unsigned long) addr & 193 ~(TCE_PCI_READ | TCE_PCI_WRITE); 194 195 if (levels) { 196 long i; 197 u64 *tmp = (u64 *) addr_ul; 198 199 for (i = 0; i < size; ++i) { 200 unsigned long hpa = be64_to_cpu(tmp[i]); 201 202 if (!(hpa & (TCE_PCI_READ | TCE_PCI_WRITE))) 203 continue; 204 205 pnv_pci_ioda2_table_do_free_pages(__va(hpa), size, 206 levels - 1); 207 } 208 } 209 210 free_pages(addr_ul, get_order(size << 3)); 211 } 212 213 void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl) 214 { 215 const unsigned long size = tbl->it_indirect_levels ? 216 tbl->it_level_size : tbl->it_size; 217 218 if (!tbl->it_size) 219 return; 220 221 pnv_pci_ioda2_table_do_free_pages((__be64 *)tbl->it_base, size, 222 tbl->it_indirect_levels); 223 if (tbl->it_userspace) { 224 pnv_pci_ioda2_table_do_free_pages(tbl->it_userspace, size, 225 tbl->it_indirect_levels); 226 } 227 } 228 229 static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned int shift, 230 unsigned int levels, unsigned long limit, 231 unsigned long *current_offset, unsigned long *total_allocated) 232 { 233 __be64 *addr, *tmp; 234 unsigned long allocated = 1UL << shift; 235 unsigned int entries = 1UL << (shift - 3); 236 long i; 237 238 addr = pnv_alloc_tce_level(nid, shift); 239 *total_allocated += allocated; 240 241 --levels; 242 if (!levels) { 243 *current_offset += allocated; 244 return addr; 245 } 246 247 for (i = 0; i < entries; ++i) { 248 tmp = pnv_pci_ioda2_table_do_alloc_pages(nid, shift, 249 levels, limit, current_offset, total_allocated); 250 if (!tmp) 251 break; 252 253 addr[i] = cpu_to_be64(__pa(tmp) | 254 TCE_PCI_READ | TCE_PCI_WRITE); 255 256 if (*current_offset >= limit) 257 break; 258 } 259 260 return addr; 261 } 262 263 long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, 264 __u32 page_shift, __u64 window_size, __u32 levels, 265 bool alloc_userspace_copy, struct iommu_table *tbl) 266 { 267 void *addr, *uas = NULL; 268 unsigned long offset = 0, level_shift, total_allocated = 0; 269 unsigned long total_allocated_uas = 0; 270 const unsigned int window_shift = ilog2(window_size); 271 unsigned int entries_shift = window_shift - page_shift; 272 unsigned int table_shift = max_t(unsigned int, entries_shift + 3, 273 PAGE_SHIFT); 274 const unsigned long tce_table_size = 1UL << table_shift; 275 276 if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS)) 277 return -EINVAL; 278 279 if (!is_power_of_2(window_size)) 280 return -EINVAL; 281 282 /* Adjust direct table size from window_size and levels */ 283 entries_shift = (entries_shift + levels - 1) / levels; 284 level_shift = entries_shift + 3; 285 level_shift = max_t(unsigned int, level_shift, PAGE_SHIFT); 286 287 if ((level_shift - 3) * levels + page_shift >= 55) 288 return -EINVAL; 289 290 /* Allocate TCE table */ 291 addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift, 292 1, tce_table_size, &offset, &total_allocated); 293 294 /* addr==NULL means that the first level allocation failed */ 295 if (!addr) 296 return -ENOMEM; 297 298 /* 299 * First level was allocated but some lower level failed as 300 * we did not allocate as much as we wanted, 301 * release partially allocated table. 302 */ 303 if (levels == 1 && offset < tce_table_size) 304 goto free_tces_exit; 305 306 /* Allocate userspace view of the TCE table */ 307 if (alloc_userspace_copy) { 308 offset = 0; 309 uas = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift, 310 1, tce_table_size, &offset, 311 &total_allocated_uas); 312 if (!uas) 313 goto free_tces_exit; 314 if (levels == 1 && (offset < tce_table_size || 315 total_allocated_uas != total_allocated)) 316 goto free_uas_exit; 317 } 318 319 /* Setup linux iommu table */ 320 pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, bus_offset, 321 page_shift); 322 tbl->it_level_size = 1ULL << (level_shift - 3); 323 tbl->it_indirect_levels = levels - 1; 324 tbl->it_userspace = uas; 325 tbl->it_nid = nid; 326 327 pr_debug("Created TCE table: ws=%08llx ts=%lx @%08llx base=%lx uas=%p levels=%d/%d\n", 328 window_size, tce_table_size, bus_offset, tbl->it_base, 329 tbl->it_userspace, 1, levels); 330 331 return 0; 332 333 free_uas_exit: 334 pnv_pci_ioda2_table_do_free_pages(uas, 335 1ULL << (level_shift - 3), levels - 1); 336 free_tces_exit: 337 pnv_pci_ioda2_table_do_free_pages(addr, 338 1ULL << (level_shift - 3), levels - 1); 339 340 return -ENOMEM; 341 } 342 343 static void pnv_iommu_table_group_link_free(struct rcu_head *head) 344 { 345 struct iommu_table_group_link *tgl = container_of(head, 346 struct iommu_table_group_link, rcu); 347 348 kfree(tgl); 349 } 350 351 void pnv_pci_unlink_table_and_group(struct iommu_table *tbl, 352 struct iommu_table_group *table_group) 353 { 354 long i; 355 bool found; 356 struct iommu_table_group_link *tgl; 357 358 if (!tbl || !table_group) 359 return; 360 361 /* Remove link to a group from table's list of attached groups */ 362 found = false; 363 list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) { 364 if (tgl->table_group == table_group) { 365 list_del_rcu(&tgl->next); 366 call_rcu(&tgl->rcu, pnv_iommu_table_group_link_free); 367 found = true; 368 break; 369 } 370 } 371 if (WARN_ON(!found)) 372 return; 373 374 /* Clean a pointer to iommu_table in iommu_table_group::tables[] */ 375 found = false; 376 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { 377 if (table_group->tables[i] == tbl) { 378 iommu_tce_table_put(tbl); 379 table_group->tables[i] = NULL; 380 found = true; 381 break; 382 } 383 } 384 WARN_ON(!found); 385 } 386 387 long pnv_pci_link_table_and_group(int node, int num, 388 struct iommu_table *tbl, 389 struct iommu_table_group *table_group) 390 { 391 struct iommu_table_group_link *tgl = NULL; 392 393 if (WARN_ON(!tbl || !table_group)) 394 return -EINVAL; 395 396 tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL, 397 node); 398 if (!tgl) 399 return -ENOMEM; 400 401 tgl->table_group = table_group; 402 list_add_rcu(&tgl->next, &tbl->it_group_list); 403 404 table_group->tables[num] = iommu_tce_table_get(tbl); 405 406 return 0; 407 } 408