1 /* 2 * This program is free software; you can redistribute it and/or modify 3 * it under the terms of the GNU General Public License version 2 as 4 * published by the Free Software Foundation. 5 */ 6 7 #include <linux/compiler.h> 8 #include <linux/delay.h> 9 #include <linux/device.h> 10 #include <linux/dma-iommu.h> 11 #include <linux/dma-mapping.h> 12 #include <linux/errno.h> 13 #include <linux/interrupt.h> 14 #include <linux/io.h> 15 #include <linux/iommu.h> 16 #include <linux/jiffies.h> 17 #include <linux/list.h> 18 #include <linux/mm.h> 19 #include <linux/module.h> 20 #include <linux/of.h> 21 #include <linux/of_platform.h> 22 #include <linux/platform_device.h> 23 #include <linux/slab.h> 24 #include <linux/spinlock.h> 25 26 /** MMU register offsets */ 27 #define RK_MMU_DTE_ADDR 0x00 /* Directory table address */ 28 #define RK_MMU_STATUS 0x04 29 #define RK_MMU_COMMAND 0x08 30 #define RK_MMU_PAGE_FAULT_ADDR 0x0C /* IOVA of last page fault */ 31 #define RK_MMU_ZAP_ONE_LINE 0x10 /* Shootdown one IOTLB entry */ 32 #define RK_MMU_INT_RAWSTAT 0x14 /* IRQ status ignoring mask */ 33 #define RK_MMU_INT_CLEAR 0x18 /* Acknowledge and re-arm irq */ 34 #define RK_MMU_INT_MASK 0x1C /* IRQ enable */ 35 #define RK_MMU_INT_STATUS 0x20 /* IRQ status after masking */ 36 #define RK_MMU_AUTO_GATING 0x24 37 38 #define DTE_ADDR_DUMMY 0xCAFEBABE 39 #define FORCE_RESET_TIMEOUT 100 /* ms */ 40 41 /* RK_MMU_STATUS fields */ 42 #define RK_MMU_STATUS_PAGING_ENABLED BIT(0) 43 #define RK_MMU_STATUS_PAGE_FAULT_ACTIVE BIT(1) 44 #define RK_MMU_STATUS_STALL_ACTIVE BIT(2) 45 #define RK_MMU_STATUS_IDLE BIT(3) 46 #define RK_MMU_STATUS_REPLAY_BUFFER_EMPTY BIT(4) 47 #define RK_MMU_STATUS_PAGE_FAULT_IS_WRITE BIT(5) 48 #define RK_MMU_STATUS_STALL_NOT_ACTIVE BIT(31) 49 50 /* RK_MMU_COMMAND command values */ 51 #define RK_MMU_CMD_ENABLE_PAGING 0 /* Enable memory translation */ 52 #define RK_MMU_CMD_DISABLE_PAGING 1 /* Disable memory translation */ 53 #define RK_MMU_CMD_ENABLE_STALL 2 /* Stall paging to allow other cmds */ 54 #define RK_MMU_CMD_DISABLE_STALL 3 /* Stop stall re-enables paging */ 55 #define RK_MMU_CMD_ZAP_CACHE 4 /* Shoot down entire IOTLB */ 56 #define RK_MMU_CMD_PAGE_FAULT_DONE 5 /* Clear page fault */ 57 #define RK_MMU_CMD_FORCE_RESET 6 /* Reset all registers */ 58 59 /* RK_MMU_INT_* register fields */ 60 #define RK_MMU_IRQ_PAGE_FAULT 0x01 /* page fault */ 61 #define RK_MMU_IRQ_BUS_ERROR 0x02 /* bus read error */ 62 #define RK_MMU_IRQ_MASK (RK_MMU_IRQ_PAGE_FAULT | RK_MMU_IRQ_BUS_ERROR) 63 64 #define NUM_DT_ENTRIES 1024 65 #define NUM_PT_ENTRIES 1024 66 67 #define SPAGE_ORDER 12 68 #define SPAGE_SIZE (1 << SPAGE_ORDER) 69 70 /* 71 * Support mapping any size that fits in one page table: 72 * 4 KiB to 4 MiB 73 */ 74 #define RK_IOMMU_PGSIZE_BITMAP 0x007ff000 75 76 #define IOMMU_REG_POLL_COUNT_FAST 1000 77 78 struct rk_iommu_domain { 79 struct list_head iommus; 80 struct platform_device *pdev; 81 u32 *dt; /* page directory table */ 82 dma_addr_t dt_dma; 83 spinlock_t iommus_lock; /* lock for iommus list */ 84 spinlock_t dt_lock; /* lock for modifying page directory table */ 85 86 struct iommu_domain domain; 87 }; 88 89 struct rk_iommu { 90 struct device *dev; 91 void __iomem **bases; 92 int num_mmu; 93 int irq; 94 struct iommu_device iommu; 95 struct list_head node; /* entry in rk_iommu_domain.iommus */ 96 struct iommu_domain *domain; /* domain to which iommu is attached */ 97 }; 98 99 static inline void rk_table_flush(struct rk_iommu_domain *dom, dma_addr_t dma, 100 unsigned int count) 101 { 102 size_t size = count * sizeof(u32); /* count of u32 entry */ 103 104 dma_sync_single_for_device(&dom->pdev->dev, dma, size, DMA_TO_DEVICE); 105 } 106 107 static struct rk_iommu_domain *to_rk_domain(struct iommu_domain *dom) 108 { 109 return container_of(dom, struct rk_iommu_domain, domain); 110 } 111 112 /** 113 * Inspired by _wait_for in intel_drv.h 114 * This is NOT safe for use in interrupt context. 115 * 116 * Note that it's important that we check the condition again after having 117 * timed out, since the timeout could be due to preemption or similar and 118 * we've never had a chance to check the condition before the timeout. 119 */ 120 #define rk_wait_for(COND, MS) ({ \ 121 unsigned long timeout__ = jiffies + msecs_to_jiffies(MS) + 1; \ 122 int ret__ = 0; \ 123 while (!(COND)) { \ 124 if (time_after(jiffies, timeout__)) { \ 125 ret__ = (COND) ? 0 : -ETIMEDOUT; \ 126 break; \ 127 } \ 128 usleep_range(50, 100); \ 129 } \ 130 ret__; \ 131 }) 132 133 /* 134 * The Rockchip rk3288 iommu uses a 2-level page table. 135 * The first level is the "Directory Table" (DT). 136 * The DT consists of 1024 4-byte Directory Table Entries (DTEs), each pointing 137 * to a "Page Table". 138 * The second level is the 1024 Page Tables (PT). 139 * Each PT consists of 1024 4-byte Page Table Entries (PTEs), each pointing to 140 * a 4 KB page of physical memory. 141 * 142 * The DT and each PT fits in a single 4 KB page (4-bytes * 1024 entries). 143 * Each iommu device has a MMU_DTE_ADDR register that contains the physical 144 * address of the start of the DT page. 145 * 146 * The structure of the page table is as follows: 147 * 148 * DT 149 * MMU_DTE_ADDR -> +-----+ 150 * | | 151 * +-----+ PT 152 * | DTE | -> +-----+ 153 * +-----+ | | Memory 154 * | | +-----+ Page 155 * | | | PTE | -> +-----+ 156 * +-----+ +-----+ | | 157 * | | | | 158 * | | | | 159 * +-----+ | | 160 * | | 161 * | | 162 * +-----+ 163 */ 164 165 /* 166 * Each DTE has a PT address and a valid bit: 167 * +---------------------+-----------+-+ 168 * | PT address | Reserved |V| 169 * +---------------------+-----------+-+ 170 * 31:12 - PT address (PTs always starts on a 4 KB boundary) 171 * 11: 1 - Reserved 172 * 0 - 1 if PT @ PT address is valid 173 */ 174 #define RK_DTE_PT_ADDRESS_MASK 0xfffff000 175 #define RK_DTE_PT_VALID BIT(0) 176 177 static inline phys_addr_t rk_dte_pt_address(u32 dte) 178 { 179 return (phys_addr_t)dte & RK_DTE_PT_ADDRESS_MASK; 180 } 181 182 static inline bool rk_dte_is_pt_valid(u32 dte) 183 { 184 return dte & RK_DTE_PT_VALID; 185 } 186 187 static inline u32 rk_mk_dte(dma_addr_t pt_dma) 188 { 189 return (pt_dma & RK_DTE_PT_ADDRESS_MASK) | RK_DTE_PT_VALID; 190 } 191 192 /* 193 * Each PTE has a Page address, some flags and a valid bit: 194 * +---------------------+---+-------+-+ 195 * | Page address |Rsv| Flags |V| 196 * +---------------------+---+-------+-+ 197 * 31:12 - Page address (Pages always start on a 4 KB boundary) 198 * 11: 9 - Reserved 199 * 8: 1 - Flags 200 * 8 - Read allocate - allocate cache space on read misses 201 * 7 - Read cache - enable cache & prefetch of data 202 * 6 - Write buffer - enable delaying writes on their way to memory 203 * 5 - Write allocate - allocate cache space on write misses 204 * 4 - Write cache - different writes can be merged together 205 * 3 - Override cache attributes 206 * if 1, bits 4-8 control cache attributes 207 * if 0, the system bus defaults are used 208 * 2 - Writable 209 * 1 - Readable 210 * 0 - 1 if Page @ Page address is valid 211 */ 212 #define RK_PTE_PAGE_ADDRESS_MASK 0xfffff000 213 #define RK_PTE_PAGE_FLAGS_MASK 0x000001fe 214 #define RK_PTE_PAGE_WRITABLE BIT(2) 215 #define RK_PTE_PAGE_READABLE BIT(1) 216 #define RK_PTE_PAGE_VALID BIT(0) 217 218 static inline phys_addr_t rk_pte_page_address(u32 pte) 219 { 220 return (phys_addr_t)pte & RK_PTE_PAGE_ADDRESS_MASK; 221 } 222 223 static inline bool rk_pte_is_page_valid(u32 pte) 224 { 225 return pte & RK_PTE_PAGE_VALID; 226 } 227 228 /* TODO: set cache flags per prot IOMMU_CACHE */ 229 static u32 rk_mk_pte(phys_addr_t page, int prot) 230 { 231 u32 flags = 0; 232 flags |= (prot & IOMMU_READ) ? RK_PTE_PAGE_READABLE : 0; 233 flags |= (prot & IOMMU_WRITE) ? RK_PTE_PAGE_WRITABLE : 0; 234 page &= RK_PTE_PAGE_ADDRESS_MASK; 235 return page | flags | RK_PTE_PAGE_VALID; 236 } 237 238 static u32 rk_mk_pte_invalid(u32 pte) 239 { 240 return pte & ~RK_PTE_PAGE_VALID; 241 } 242 243 /* 244 * rk3288 iova (IOMMU Virtual Address) format 245 * 31 22.21 12.11 0 246 * +-----------+-----------+-------------+ 247 * | DTE index | PTE index | Page offset | 248 * +-----------+-----------+-------------+ 249 * 31:22 - DTE index - index of DTE in DT 250 * 21:12 - PTE index - index of PTE in PT @ DTE.pt_address 251 * 11: 0 - Page offset - offset into page @ PTE.page_address 252 */ 253 #define RK_IOVA_DTE_MASK 0xffc00000 254 #define RK_IOVA_DTE_SHIFT 22 255 #define RK_IOVA_PTE_MASK 0x003ff000 256 #define RK_IOVA_PTE_SHIFT 12 257 #define RK_IOVA_PAGE_MASK 0x00000fff 258 #define RK_IOVA_PAGE_SHIFT 0 259 260 static u32 rk_iova_dte_index(dma_addr_t iova) 261 { 262 return (u32)(iova & RK_IOVA_DTE_MASK) >> RK_IOVA_DTE_SHIFT; 263 } 264 265 static u32 rk_iova_pte_index(dma_addr_t iova) 266 { 267 return (u32)(iova & RK_IOVA_PTE_MASK) >> RK_IOVA_PTE_SHIFT; 268 } 269 270 static u32 rk_iova_page_offset(dma_addr_t iova) 271 { 272 return (u32)(iova & RK_IOVA_PAGE_MASK) >> RK_IOVA_PAGE_SHIFT; 273 } 274 275 static u32 rk_iommu_read(void __iomem *base, u32 offset) 276 { 277 return readl(base + offset); 278 } 279 280 static void rk_iommu_write(void __iomem *base, u32 offset, u32 value) 281 { 282 writel(value, base + offset); 283 } 284 285 static void rk_iommu_command(struct rk_iommu *iommu, u32 command) 286 { 287 int i; 288 289 for (i = 0; i < iommu->num_mmu; i++) 290 writel(command, iommu->bases[i] + RK_MMU_COMMAND); 291 } 292 293 static void rk_iommu_base_command(void __iomem *base, u32 command) 294 { 295 writel(command, base + RK_MMU_COMMAND); 296 } 297 static void rk_iommu_zap_lines(struct rk_iommu *iommu, dma_addr_t iova, 298 size_t size) 299 { 300 int i; 301 302 dma_addr_t iova_end = iova + size; 303 /* 304 * TODO(djkurtz): Figure out when it is more efficient to shootdown the 305 * entire iotlb rather than iterate over individual iovas. 306 */ 307 for (i = 0; i < iommu->num_mmu; i++) 308 for (; iova < iova_end; iova += SPAGE_SIZE) 309 rk_iommu_write(iommu->bases[i], RK_MMU_ZAP_ONE_LINE, iova); 310 } 311 312 static bool rk_iommu_is_stall_active(struct rk_iommu *iommu) 313 { 314 bool active = true; 315 int i; 316 317 for (i = 0; i < iommu->num_mmu; i++) 318 active &= !!(rk_iommu_read(iommu->bases[i], RK_MMU_STATUS) & 319 RK_MMU_STATUS_STALL_ACTIVE); 320 321 return active; 322 } 323 324 static bool rk_iommu_is_paging_enabled(struct rk_iommu *iommu) 325 { 326 bool enable = true; 327 int i; 328 329 for (i = 0; i < iommu->num_mmu; i++) 330 enable &= !!(rk_iommu_read(iommu->bases[i], RK_MMU_STATUS) & 331 RK_MMU_STATUS_PAGING_ENABLED); 332 333 return enable; 334 } 335 336 static int rk_iommu_enable_stall(struct rk_iommu *iommu) 337 { 338 int ret, i; 339 340 if (rk_iommu_is_stall_active(iommu)) 341 return 0; 342 343 /* Stall can only be enabled if paging is enabled */ 344 if (!rk_iommu_is_paging_enabled(iommu)) 345 return 0; 346 347 rk_iommu_command(iommu, RK_MMU_CMD_ENABLE_STALL); 348 349 ret = rk_wait_for(rk_iommu_is_stall_active(iommu), 1); 350 if (ret) 351 for (i = 0; i < iommu->num_mmu; i++) 352 dev_err(iommu->dev, "Enable stall request timed out, status: %#08x\n", 353 rk_iommu_read(iommu->bases[i], RK_MMU_STATUS)); 354 355 return ret; 356 } 357 358 static int rk_iommu_disable_stall(struct rk_iommu *iommu) 359 { 360 int ret, i; 361 362 if (!rk_iommu_is_stall_active(iommu)) 363 return 0; 364 365 rk_iommu_command(iommu, RK_MMU_CMD_DISABLE_STALL); 366 367 ret = rk_wait_for(!rk_iommu_is_stall_active(iommu), 1); 368 if (ret) 369 for (i = 0; i < iommu->num_mmu; i++) 370 dev_err(iommu->dev, "Disable stall request timed out, status: %#08x\n", 371 rk_iommu_read(iommu->bases[i], RK_MMU_STATUS)); 372 373 return ret; 374 } 375 376 static int rk_iommu_enable_paging(struct rk_iommu *iommu) 377 { 378 int ret, i; 379 380 if (rk_iommu_is_paging_enabled(iommu)) 381 return 0; 382 383 rk_iommu_command(iommu, RK_MMU_CMD_ENABLE_PAGING); 384 385 ret = rk_wait_for(rk_iommu_is_paging_enabled(iommu), 1); 386 if (ret) 387 for (i = 0; i < iommu->num_mmu; i++) 388 dev_err(iommu->dev, "Enable paging request timed out, status: %#08x\n", 389 rk_iommu_read(iommu->bases[i], RK_MMU_STATUS)); 390 391 return ret; 392 } 393 394 static int rk_iommu_disable_paging(struct rk_iommu *iommu) 395 { 396 int ret, i; 397 398 if (!rk_iommu_is_paging_enabled(iommu)) 399 return 0; 400 401 rk_iommu_command(iommu, RK_MMU_CMD_DISABLE_PAGING); 402 403 ret = rk_wait_for(!rk_iommu_is_paging_enabled(iommu), 1); 404 if (ret) 405 for (i = 0; i < iommu->num_mmu; i++) 406 dev_err(iommu->dev, "Disable paging request timed out, status: %#08x\n", 407 rk_iommu_read(iommu->bases[i], RK_MMU_STATUS)); 408 409 return ret; 410 } 411 412 static int rk_iommu_force_reset(struct rk_iommu *iommu) 413 { 414 int ret, i; 415 u32 dte_addr; 416 417 /* 418 * Check if register DTE_ADDR is working by writing DTE_ADDR_DUMMY 419 * and verifying that upper 5 nybbles are read back. 420 */ 421 for (i = 0; i < iommu->num_mmu; i++) { 422 rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR, DTE_ADDR_DUMMY); 423 424 dte_addr = rk_iommu_read(iommu->bases[i], RK_MMU_DTE_ADDR); 425 if (dte_addr != (DTE_ADDR_DUMMY & RK_DTE_PT_ADDRESS_MASK)) { 426 dev_err(iommu->dev, "Error during raw reset. MMU_DTE_ADDR is not functioning\n"); 427 return -EFAULT; 428 } 429 } 430 431 rk_iommu_command(iommu, RK_MMU_CMD_FORCE_RESET); 432 433 for (i = 0; i < iommu->num_mmu; i++) { 434 ret = rk_wait_for(rk_iommu_read(iommu->bases[i], RK_MMU_DTE_ADDR) == 0x00000000, 435 FORCE_RESET_TIMEOUT); 436 if (ret) { 437 dev_err(iommu->dev, "FORCE_RESET command timed out\n"); 438 return ret; 439 } 440 } 441 442 return 0; 443 } 444 445 static void log_iova(struct rk_iommu *iommu, int index, dma_addr_t iova) 446 { 447 void __iomem *base = iommu->bases[index]; 448 u32 dte_index, pte_index, page_offset; 449 u32 mmu_dte_addr; 450 phys_addr_t mmu_dte_addr_phys, dte_addr_phys; 451 u32 *dte_addr; 452 u32 dte; 453 phys_addr_t pte_addr_phys = 0; 454 u32 *pte_addr = NULL; 455 u32 pte = 0; 456 phys_addr_t page_addr_phys = 0; 457 u32 page_flags = 0; 458 459 dte_index = rk_iova_dte_index(iova); 460 pte_index = rk_iova_pte_index(iova); 461 page_offset = rk_iova_page_offset(iova); 462 463 mmu_dte_addr = rk_iommu_read(base, RK_MMU_DTE_ADDR); 464 mmu_dte_addr_phys = (phys_addr_t)mmu_dte_addr; 465 466 dte_addr_phys = mmu_dte_addr_phys + (4 * dte_index); 467 dte_addr = phys_to_virt(dte_addr_phys); 468 dte = *dte_addr; 469 470 if (!rk_dte_is_pt_valid(dte)) 471 goto print_it; 472 473 pte_addr_phys = rk_dte_pt_address(dte) + (pte_index * 4); 474 pte_addr = phys_to_virt(pte_addr_phys); 475 pte = *pte_addr; 476 477 if (!rk_pte_is_page_valid(pte)) 478 goto print_it; 479 480 page_addr_phys = rk_pte_page_address(pte) + page_offset; 481 page_flags = pte & RK_PTE_PAGE_FLAGS_MASK; 482 483 print_it: 484 dev_err(iommu->dev, "iova = %pad: dte_index: %#03x pte_index: %#03x page_offset: %#03x\n", 485 &iova, dte_index, pte_index, page_offset); 486 dev_err(iommu->dev, "mmu_dte_addr: %pa dte@%pa: %#08x valid: %u pte@%pa: %#08x valid: %u page@%pa flags: %#03x\n", 487 &mmu_dte_addr_phys, &dte_addr_phys, dte, 488 rk_dte_is_pt_valid(dte), &pte_addr_phys, pte, 489 rk_pte_is_page_valid(pte), &page_addr_phys, page_flags); 490 } 491 492 static irqreturn_t rk_iommu_irq(int irq, void *dev_id) 493 { 494 struct rk_iommu *iommu = dev_id; 495 u32 status; 496 u32 int_status; 497 dma_addr_t iova; 498 irqreturn_t ret = IRQ_NONE; 499 int i; 500 501 for (i = 0; i < iommu->num_mmu; i++) { 502 int_status = rk_iommu_read(iommu->bases[i], RK_MMU_INT_STATUS); 503 if (int_status == 0) 504 continue; 505 506 ret = IRQ_HANDLED; 507 iova = rk_iommu_read(iommu->bases[i], RK_MMU_PAGE_FAULT_ADDR); 508 509 if (int_status & RK_MMU_IRQ_PAGE_FAULT) { 510 int flags; 511 512 status = rk_iommu_read(iommu->bases[i], RK_MMU_STATUS); 513 flags = (status & RK_MMU_STATUS_PAGE_FAULT_IS_WRITE) ? 514 IOMMU_FAULT_WRITE : IOMMU_FAULT_READ; 515 516 dev_err(iommu->dev, "Page fault at %pad of type %s\n", 517 &iova, 518 (flags == IOMMU_FAULT_WRITE) ? "write" : "read"); 519 520 log_iova(iommu, i, iova); 521 522 /* 523 * Report page fault to any installed handlers. 524 * Ignore the return code, though, since we always zap cache 525 * and clear the page fault anyway. 526 */ 527 if (iommu->domain) 528 report_iommu_fault(iommu->domain, iommu->dev, iova, 529 flags); 530 else 531 dev_err(iommu->dev, "Page fault while iommu not attached to domain?\n"); 532 533 rk_iommu_base_command(iommu->bases[i], RK_MMU_CMD_ZAP_CACHE); 534 rk_iommu_base_command(iommu->bases[i], RK_MMU_CMD_PAGE_FAULT_DONE); 535 } 536 537 if (int_status & RK_MMU_IRQ_BUS_ERROR) 538 dev_err(iommu->dev, "BUS_ERROR occurred at %pad\n", &iova); 539 540 if (int_status & ~RK_MMU_IRQ_MASK) 541 dev_err(iommu->dev, "unexpected int_status: %#08x\n", 542 int_status); 543 544 rk_iommu_write(iommu->bases[i], RK_MMU_INT_CLEAR, int_status); 545 } 546 547 return ret; 548 } 549 550 static phys_addr_t rk_iommu_iova_to_phys(struct iommu_domain *domain, 551 dma_addr_t iova) 552 { 553 struct rk_iommu_domain *rk_domain = to_rk_domain(domain); 554 unsigned long flags; 555 phys_addr_t pt_phys, phys = 0; 556 u32 dte, pte; 557 u32 *page_table; 558 559 spin_lock_irqsave(&rk_domain->dt_lock, flags); 560 561 dte = rk_domain->dt[rk_iova_dte_index(iova)]; 562 if (!rk_dte_is_pt_valid(dte)) 563 goto out; 564 565 pt_phys = rk_dte_pt_address(dte); 566 page_table = (u32 *)phys_to_virt(pt_phys); 567 pte = page_table[rk_iova_pte_index(iova)]; 568 if (!rk_pte_is_page_valid(pte)) 569 goto out; 570 571 phys = rk_pte_page_address(pte) + rk_iova_page_offset(iova); 572 out: 573 spin_unlock_irqrestore(&rk_domain->dt_lock, flags); 574 575 return phys; 576 } 577 578 static void rk_iommu_zap_iova(struct rk_iommu_domain *rk_domain, 579 dma_addr_t iova, size_t size) 580 { 581 struct list_head *pos; 582 unsigned long flags; 583 584 /* shootdown these iova from all iommus using this domain */ 585 spin_lock_irqsave(&rk_domain->iommus_lock, flags); 586 list_for_each(pos, &rk_domain->iommus) { 587 struct rk_iommu *iommu; 588 iommu = list_entry(pos, struct rk_iommu, node); 589 rk_iommu_zap_lines(iommu, iova, size); 590 } 591 spin_unlock_irqrestore(&rk_domain->iommus_lock, flags); 592 } 593 594 static void rk_iommu_zap_iova_first_last(struct rk_iommu_domain *rk_domain, 595 dma_addr_t iova, size_t size) 596 { 597 rk_iommu_zap_iova(rk_domain, iova, SPAGE_SIZE); 598 if (size > SPAGE_SIZE) 599 rk_iommu_zap_iova(rk_domain, iova + size - SPAGE_SIZE, 600 SPAGE_SIZE); 601 } 602 603 static u32 *rk_dte_get_page_table(struct rk_iommu_domain *rk_domain, 604 dma_addr_t iova) 605 { 606 struct device *dev = &rk_domain->pdev->dev; 607 u32 *page_table, *dte_addr; 608 u32 dte_index, dte; 609 phys_addr_t pt_phys; 610 dma_addr_t pt_dma; 611 612 assert_spin_locked(&rk_domain->dt_lock); 613 614 dte_index = rk_iova_dte_index(iova); 615 dte_addr = &rk_domain->dt[dte_index]; 616 dte = *dte_addr; 617 if (rk_dte_is_pt_valid(dte)) 618 goto done; 619 620 page_table = (u32 *)get_zeroed_page(GFP_ATOMIC | GFP_DMA32); 621 if (!page_table) 622 return ERR_PTR(-ENOMEM); 623 624 pt_dma = dma_map_single(dev, page_table, SPAGE_SIZE, DMA_TO_DEVICE); 625 if (dma_mapping_error(dev, pt_dma)) { 626 dev_err(dev, "DMA mapping error while allocating page table\n"); 627 free_page((unsigned long)page_table); 628 return ERR_PTR(-ENOMEM); 629 } 630 631 dte = rk_mk_dte(pt_dma); 632 *dte_addr = dte; 633 634 rk_table_flush(rk_domain, pt_dma, NUM_PT_ENTRIES); 635 rk_table_flush(rk_domain, 636 rk_domain->dt_dma + dte_index * sizeof(u32), 1); 637 done: 638 pt_phys = rk_dte_pt_address(dte); 639 return (u32 *)phys_to_virt(pt_phys); 640 } 641 642 static size_t rk_iommu_unmap_iova(struct rk_iommu_domain *rk_domain, 643 u32 *pte_addr, dma_addr_t pte_dma, 644 size_t size) 645 { 646 unsigned int pte_count; 647 unsigned int pte_total = size / SPAGE_SIZE; 648 649 assert_spin_locked(&rk_domain->dt_lock); 650 651 for (pte_count = 0; pte_count < pte_total; pte_count++) { 652 u32 pte = pte_addr[pte_count]; 653 if (!rk_pte_is_page_valid(pte)) 654 break; 655 656 pte_addr[pte_count] = rk_mk_pte_invalid(pte); 657 } 658 659 rk_table_flush(rk_domain, pte_dma, pte_count); 660 661 return pte_count * SPAGE_SIZE; 662 } 663 664 static int rk_iommu_map_iova(struct rk_iommu_domain *rk_domain, u32 *pte_addr, 665 dma_addr_t pte_dma, dma_addr_t iova, 666 phys_addr_t paddr, size_t size, int prot) 667 { 668 unsigned int pte_count; 669 unsigned int pte_total = size / SPAGE_SIZE; 670 phys_addr_t page_phys; 671 672 assert_spin_locked(&rk_domain->dt_lock); 673 674 for (pte_count = 0; pte_count < pte_total; pte_count++) { 675 u32 pte = pte_addr[pte_count]; 676 677 if (rk_pte_is_page_valid(pte)) 678 goto unwind; 679 680 pte_addr[pte_count] = rk_mk_pte(paddr, prot); 681 682 paddr += SPAGE_SIZE; 683 } 684 685 rk_table_flush(rk_domain, pte_dma, pte_total); 686 687 /* 688 * Zap the first and last iova to evict from iotlb any previously 689 * mapped cachelines holding stale values for its dte and pte. 690 * We only zap the first and last iova, since only they could have 691 * dte or pte shared with an existing mapping. 692 */ 693 rk_iommu_zap_iova_first_last(rk_domain, iova, size); 694 695 return 0; 696 unwind: 697 /* Unmap the range of iovas that we just mapped */ 698 rk_iommu_unmap_iova(rk_domain, pte_addr, pte_dma, 699 pte_count * SPAGE_SIZE); 700 701 iova += pte_count * SPAGE_SIZE; 702 page_phys = rk_pte_page_address(pte_addr[pte_count]); 703 pr_err("iova: %pad already mapped to %pa cannot remap to phys: %pa prot: %#x\n", 704 &iova, &page_phys, &paddr, prot); 705 706 return -EADDRINUSE; 707 } 708 709 static int rk_iommu_map(struct iommu_domain *domain, unsigned long _iova, 710 phys_addr_t paddr, size_t size, int prot) 711 { 712 struct rk_iommu_domain *rk_domain = to_rk_domain(domain); 713 unsigned long flags; 714 dma_addr_t pte_dma, iova = (dma_addr_t)_iova; 715 u32 *page_table, *pte_addr; 716 u32 dte_index, pte_index; 717 int ret; 718 719 spin_lock_irqsave(&rk_domain->dt_lock, flags); 720 721 /* 722 * pgsize_bitmap specifies iova sizes that fit in one page table 723 * (1024 4-KiB pages = 4 MiB). 724 * So, size will always be 4096 <= size <= 4194304. 725 * Since iommu_map() guarantees that both iova and size will be 726 * aligned, we will always only be mapping from a single dte here. 727 */ 728 page_table = rk_dte_get_page_table(rk_domain, iova); 729 if (IS_ERR(page_table)) { 730 spin_unlock_irqrestore(&rk_domain->dt_lock, flags); 731 return PTR_ERR(page_table); 732 } 733 734 dte_index = rk_domain->dt[rk_iova_dte_index(iova)]; 735 pte_index = rk_iova_pte_index(iova); 736 pte_addr = &page_table[pte_index]; 737 pte_dma = rk_dte_pt_address(dte_index) + pte_index * sizeof(u32); 738 ret = rk_iommu_map_iova(rk_domain, pte_addr, pte_dma, iova, 739 paddr, size, prot); 740 741 spin_unlock_irqrestore(&rk_domain->dt_lock, flags); 742 743 return ret; 744 } 745 746 static size_t rk_iommu_unmap(struct iommu_domain *domain, unsigned long _iova, 747 size_t size) 748 { 749 struct rk_iommu_domain *rk_domain = to_rk_domain(domain); 750 unsigned long flags; 751 dma_addr_t pte_dma, iova = (dma_addr_t)_iova; 752 phys_addr_t pt_phys; 753 u32 dte; 754 u32 *pte_addr; 755 size_t unmap_size; 756 757 spin_lock_irqsave(&rk_domain->dt_lock, flags); 758 759 /* 760 * pgsize_bitmap specifies iova sizes that fit in one page table 761 * (1024 4-KiB pages = 4 MiB). 762 * So, size will always be 4096 <= size <= 4194304. 763 * Since iommu_unmap() guarantees that both iova and size will be 764 * aligned, we will always only be unmapping from a single dte here. 765 */ 766 dte = rk_domain->dt[rk_iova_dte_index(iova)]; 767 /* Just return 0 if iova is unmapped */ 768 if (!rk_dte_is_pt_valid(dte)) { 769 spin_unlock_irqrestore(&rk_domain->dt_lock, flags); 770 return 0; 771 } 772 773 pt_phys = rk_dte_pt_address(dte); 774 pte_addr = (u32 *)phys_to_virt(pt_phys) + rk_iova_pte_index(iova); 775 pte_dma = pt_phys + rk_iova_pte_index(iova) * sizeof(u32); 776 unmap_size = rk_iommu_unmap_iova(rk_domain, pte_addr, pte_dma, size); 777 778 spin_unlock_irqrestore(&rk_domain->dt_lock, flags); 779 780 /* Shootdown iotlb entries for iova range that was just unmapped */ 781 rk_iommu_zap_iova(rk_domain, iova, unmap_size); 782 783 return unmap_size; 784 } 785 786 static struct rk_iommu *rk_iommu_from_dev(struct device *dev) 787 { 788 struct iommu_group *group; 789 struct device *iommu_dev; 790 struct rk_iommu *rk_iommu; 791 792 group = iommu_group_get(dev); 793 if (!group) 794 return NULL; 795 iommu_dev = iommu_group_get_iommudata(group); 796 rk_iommu = dev_get_drvdata(iommu_dev); 797 iommu_group_put(group); 798 799 return rk_iommu; 800 } 801 802 static int rk_iommu_attach_device(struct iommu_domain *domain, 803 struct device *dev) 804 { 805 struct rk_iommu *iommu; 806 struct rk_iommu_domain *rk_domain = to_rk_domain(domain); 807 unsigned long flags; 808 int ret, i; 809 810 /* 811 * Allow 'virtual devices' (e.g., drm) to attach to domain. 812 * Such a device does not belong to an iommu group. 813 */ 814 iommu = rk_iommu_from_dev(dev); 815 if (!iommu) 816 return 0; 817 818 ret = rk_iommu_enable_stall(iommu); 819 if (ret) 820 return ret; 821 822 ret = rk_iommu_force_reset(iommu); 823 if (ret) 824 return ret; 825 826 iommu->domain = domain; 827 828 ret = devm_request_irq(iommu->dev, iommu->irq, rk_iommu_irq, 829 IRQF_SHARED, dev_name(dev), iommu); 830 if (ret) 831 return ret; 832 833 for (i = 0; i < iommu->num_mmu; i++) { 834 rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR, 835 rk_domain->dt_dma); 836 rk_iommu_base_command(iommu->bases[i], RK_MMU_CMD_ZAP_CACHE); 837 rk_iommu_write(iommu->bases[i], RK_MMU_INT_MASK, RK_MMU_IRQ_MASK); 838 } 839 840 ret = rk_iommu_enable_paging(iommu); 841 if (ret) 842 return ret; 843 844 spin_lock_irqsave(&rk_domain->iommus_lock, flags); 845 list_add_tail(&iommu->node, &rk_domain->iommus); 846 spin_unlock_irqrestore(&rk_domain->iommus_lock, flags); 847 848 dev_dbg(dev, "Attached to iommu domain\n"); 849 850 rk_iommu_disable_stall(iommu); 851 852 return 0; 853 } 854 855 static void rk_iommu_detach_device(struct iommu_domain *domain, 856 struct device *dev) 857 { 858 struct rk_iommu *iommu; 859 struct rk_iommu_domain *rk_domain = to_rk_domain(domain); 860 unsigned long flags; 861 int i; 862 863 /* Allow 'virtual devices' (eg drm) to detach from domain */ 864 iommu = rk_iommu_from_dev(dev); 865 if (!iommu) 866 return; 867 868 spin_lock_irqsave(&rk_domain->iommus_lock, flags); 869 list_del_init(&iommu->node); 870 spin_unlock_irqrestore(&rk_domain->iommus_lock, flags); 871 872 /* Ignore error while disabling, just keep going */ 873 rk_iommu_enable_stall(iommu); 874 rk_iommu_disable_paging(iommu); 875 for (i = 0; i < iommu->num_mmu; i++) { 876 rk_iommu_write(iommu->bases[i], RK_MMU_INT_MASK, 0); 877 rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR, 0); 878 } 879 rk_iommu_disable_stall(iommu); 880 881 devm_free_irq(iommu->dev, iommu->irq, iommu); 882 883 iommu->domain = NULL; 884 885 dev_dbg(dev, "Detached from iommu domain\n"); 886 } 887 888 static struct iommu_domain *rk_iommu_domain_alloc(unsigned type) 889 { 890 struct rk_iommu_domain *rk_domain; 891 struct platform_device *pdev; 892 struct device *iommu_dev; 893 894 if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA) 895 return NULL; 896 897 /* Register a pdev per domain, so DMA API can base on this *dev 898 * even some virtual master doesn't have an iommu slave 899 */ 900 pdev = platform_device_register_simple("rk_iommu_domain", 901 PLATFORM_DEVID_AUTO, NULL, 0); 902 if (IS_ERR(pdev)) 903 return NULL; 904 905 rk_domain = devm_kzalloc(&pdev->dev, sizeof(*rk_domain), GFP_KERNEL); 906 if (!rk_domain) 907 goto err_unreg_pdev; 908 909 rk_domain->pdev = pdev; 910 911 if (type == IOMMU_DOMAIN_DMA && 912 iommu_get_dma_cookie(&rk_domain->domain)) 913 goto err_unreg_pdev; 914 915 /* 916 * rk32xx iommus use a 2 level pagetable. 917 * Each level1 (dt) and level2 (pt) table has 1024 4-byte entries. 918 * Allocate one 4 KiB page for each table. 919 */ 920 rk_domain->dt = (u32 *)get_zeroed_page(GFP_KERNEL | GFP_DMA32); 921 if (!rk_domain->dt) 922 goto err_put_cookie; 923 924 iommu_dev = &pdev->dev; 925 rk_domain->dt_dma = dma_map_single(iommu_dev, rk_domain->dt, 926 SPAGE_SIZE, DMA_TO_DEVICE); 927 if (dma_mapping_error(iommu_dev, rk_domain->dt_dma)) { 928 dev_err(iommu_dev, "DMA map error for DT\n"); 929 goto err_free_dt; 930 } 931 932 rk_table_flush(rk_domain, rk_domain->dt_dma, NUM_DT_ENTRIES); 933 934 spin_lock_init(&rk_domain->iommus_lock); 935 spin_lock_init(&rk_domain->dt_lock); 936 INIT_LIST_HEAD(&rk_domain->iommus); 937 938 rk_domain->domain.geometry.aperture_start = 0; 939 rk_domain->domain.geometry.aperture_end = DMA_BIT_MASK(32); 940 rk_domain->domain.geometry.force_aperture = true; 941 942 return &rk_domain->domain; 943 944 err_free_dt: 945 free_page((unsigned long)rk_domain->dt); 946 err_put_cookie: 947 if (type == IOMMU_DOMAIN_DMA) 948 iommu_put_dma_cookie(&rk_domain->domain); 949 err_unreg_pdev: 950 platform_device_unregister(pdev); 951 952 return NULL; 953 } 954 955 static void rk_iommu_domain_free(struct iommu_domain *domain) 956 { 957 struct rk_iommu_domain *rk_domain = to_rk_domain(domain); 958 int i; 959 960 WARN_ON(!list_empty(&rk_domain->iommus)); 961 962 for (i = 0; i < NUM_DT_ENTRIES; i++) { 963 u32 dte = rk_domain->dt[i]; 964 if (rk_dte_is_pt_valid(dte)) { 965 phys_addr_t pt_phys = rk_dte_pt_address(dte); 966 u32 *page_table = phys_to_virt(pt_phys); 967 dma_unmap_single(&rk_domain->pdev->dev, pt_phys, 968 SPAGE_SIZE, DMA_TO_DEVICE); 969 free_page((unsigned long)page_table); 970 } 971 } 972 973 dma_unmap_single(&rk_domain->pdev->dev, rk_domain->dt_dma, 974 SPAGE_SIZE, DMA_TO_DEVICE); 975 free_page((unsigned long)rk_domain->dt); 976 977 if (domain->type == IOMMU_DOMAIN_DMA) 978 iommu_put_dma_cookie(&rk_domain->domain); 979 980 platform_device_unregister(rk_domain->pdev); 981 } 982 983 static bool rk_iommu_is_dev_iommu_master(struct device *dev) 984 { 985 struct device_node *np = dev->of_node; 986 int ret; 987 988 /* 989 * An iommu master has an iommus property containing a list of phandles 990 * to iommu nodes, each with an #iommu-cells property with value 0. 991 */ 992 ret = of_count_phandle_with_args(np, "iommus", "#iommu-cells"); 993 return (ret > 0); 994 } 995 996 static int rk_iommu_group_set_iommudata(struct iommu_group *group, 997 struct device *dev) 998 { 999 struct device_node *np = dev->of_node; 1000 struct platform_device *pd; 1001 int ret; 1002 struct of_phandle_args args; 1003 1004 /* 1005 * An iommu master has an iommus property containing a list of phandles 1006 * to iommu nodes, each with an #iommu-cells property with value 0. 1007 */ 1008 ret = of_parse_phandle_with_args(np, "iommus", "#iommu-cells", 0, 1009 &args); 1010 if (ret) { 1011 dev_err(dev, "of_parse_phandle_with_args(%s) => %d\n", 1012 np->full_name, ret); 1013 return ret; 1014 } 1015 if (args.args_count != 0) { 1016 dev_err(dev, "incorrect number of iommu params found for %s (found %d, expected 0)\n", 1017 args.np->full_name, args.args_count); 1018 return -EINVAL; 1019 } 1020 1021 pd = of_find_device_by_node(args.np); 1022 of_node_put(args.np); 1023 if (!pd) { 1024 dev_err(dev, "iommu %s not found\n", args.np->full_name); 1025 return -EPROBE_DEFER; 1026 } 1027 1028 /* TODO(djkurtz): handle multiple slave iommus for a single master */ 1029 iommu_group_set_iommudata(group, &pd->dev, NULL); 1030 1031 return 0; 1032 } 1033 1034 static int rk_iommu_add_device(struct device *dev) 1035 { 1036 struct iommu_group *group; 1037 struct rk_iommu *iommu; 1038 int ret; 1039 1040 if (!rk_iommu_is_dev_iommu_master(dev)) 1041 return -ENODEV; 1042 1043 group = iommu_group_get(dev); 1044 if (!group) { 1045 group = iommu_group_alloc(); 1046 if (IS_ERR(group)) { 1047 dev_err(dev, "Failed to allocate IOMMU group\n"); 1048 return PTR_ERR(group); 1049 } 1050 } 1051 1052 ret = iommu_group_add_device(group, dev); 1053 if (ret) 1054 goto err_put_group; 1055 1056 ret = rk_iommu_group_set_iommudata(group, dev); 1057 if (ret) 1058 goto err_remove_device; 1059 1060 iommu = rk_iommu_from_dev(dev); 1061 if (iommu) 1062 iommu_device_link(&iommu->iommu, dev); 1063 1064 iommu_group_put(group); 1065 1066 return 0; 1067 1068 err_remove_device: 1069 iommu_group_remove_device(dev); 1070 err_put_group: 1071 iommu_group_put(group); 1072 return ret; 1073 } 1074 1075 static void rk_iommu_remove_device(struct device *dev) 1076 { 1077 struct rk_iommu *iommu; 1078 1079 if (!rk_iommu_is_dev_iommu_master(dev)) 1080 return; 1081 1082 iommu = rk_iommu_from_dev(dev); 1083 if (iommu) 1084 iommu_device_unlink(&iommu->iommu, dev); 1085 1086 iommu_group_remove_device(dev); 1087 } 1088 1089 static const struct iommu_ops rk_iommu_ops = { 1090 .domain_alloc = rk_iommu_domain_alloc, 1091 .domain_free = rk_iommu_domain_free, 1092 .attach_dev = rk_iommu_attach_device, 1093 .detach_dev = rk_iommu_detach_device, 1094 .map = rk_iommu_map, 1095 .unmap = rk_iommu_unmap, 1096 .map_sg = default_iommu_map_sg, 1097 .add_device = rk_iommu_add_device, 1098 .remove_device = rk_iommu_remove_device, 1099 .iova_to_phys = rk_iommu_iova_to_phys, 1100 .pgsize_bitmap = RK_IOMMU_PGSIZE_BITMAP, 1101 }; 1102 1103 static int rk_iommu_domain_probe(struct platform_device *pdev) 1104 { 1105 struct device *dev = &pdev->dev; 1106 1107 dev->dma_parms = devm_kzalloc(dev, sizeof(*dev->dma_parms), GFP_KERNEL); 1108 if (!dev->dma_parms) 1109 return -ENOMEM; 1110 1111 /* Set dma_ops for dev, otherwise it would be dummy_dma_ops */ 1112 arch_setup_dma_ops(dev, 0, DMA_BIT_MASK(32), NULL, false); 1113 1114 dma_set_max_seg_size(dev, DMA_BIT_MASK(32)); 1115 dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(32)); 1116 1117 return 0; 1118 } 1119 1120 static struct platform_driver rk_iommu_domain_driver = { 1121 .probe = rk_iommu_domain_probe, 1122 .driver = { 1123 .name = "rk_iommu_domain", 1124 }, 1125 }; 1126 1127 static int rk_iommu_probe(struct platform_device *pdev) 1128 { 1129 struct device *dev = &pdev->dev; 1130 struct rk_iommu *iommu; 1131 struct resource *res; 1132 int num_res = pdev->num_resources; 1133 int err, i; 1134 1135 iommu = devm_kzalloc(dev, sizeof(*iommu), GFP_KERNEL); 1136 if (!iommu) 1137 return -ENOMEM; 1138 1139 platform_set_drvdata(pdev, iommu); 1140 iommu->dev = dev; 1141 iommu->num_mmu = 0; 1142 1143 iommu->bases = devm_kzalloc(dev, sizeof(*iommu->bases) * num_res, 1144 GFP_KERNEL); 1145 if (!iommu->bases) 1146 return -ENOMEM; 1147 1148 for (i = 0; i < num_res; i++) { 1149 res = platform_get_resource(pdev, IORESOURCE_MEM, i); 1150 if (!res) 1151 continue; 1152 iommu->bases[i] = devm_ioremap_resource(&pdev->dev, res); 1153 if (IS_ERR(iommu->bases[i])) 1154 continue; 1155 iommu->num_mmu++; 1156 } 1157 if (iommu->num_mmu == 0) 1158 return PTR_ERR(iommu->bases[0]); 1159 1160 iommu->irq = platform_get_irq(pdev, 0); 1161 if (iommu->irq < 0) { 1162 dev_err(dev, "Failed to get IRQ, %d\n", iommu->irq); 1163 return -ENXIO; 1164 } 1165 1166 err = iommu_device_sysfs_add(&iommu->iommu, dev, NULL, dev_name(dev)); 1167 if (err) 1168 return err; 1169 1170 iommu_device_set_ops(&iommu->iommu, &rk_iommu_ops); 1171 err = iommu_device_register(&iommu->iommu); 1172 1173 return err; 1174 } 1175 1176 static int rk_iommu_remove(struct platform_device *pdev) 1177 { 1178 struct rk_iommu *iommu = platform_get_drvdata(pdev); 1179 1180 if (iommu) { 1181 iommu_device_sysfs_remove(&iommu->iommu); 1182 iommu_device_unregister(&iommu->iommu); 1183 } 1184 1185 return 0; 1186 } 1187 1188 static const struct of_device_id rk_iommu_dt_ids[] = { 1189 { .compatible = "rockchip,iommu" }, 1190 { /* sentinel */ } 1191 }; 1192 MODULE_DEVICE_TABLE(of, rk_iommu_dt_ids); 1193 1194 static struct platform_driver rk_iommu_driver = { 1195 .probe = rk_iommu_probe, 1196 .remove = rk_iommu_remove, 1197 .driver = { 1198 .name = "rk_iommu", 1199 .of_match_table = rk_iommu_dt_ids, 1200 }, 1201 }; 1202 1203 static int __init rk_iommu_init(void) 1204 { 1205 struct device_node *np; 1206 int ret; 1207 1208 np = of_find_matching_node(NULL, rk_iommu_dt_ids); 1209 if (!np) 1210 return 0; 1211 1212 of_node_put(np); 1213 1214 ret = bus_set_iommu(&platform_bus_type, &rk_iommu_ops); 1215 if (ret) 1216 return ret; 1217 1218 ret = platform_driver_register(&rk_iommu_domain_driver); 1219 if (ret) 1220 return ret; 1221 1222 ret = platform_driver_register(&rk_iommu_driver); 1223 if (ret) 1224 platform_driver_unregister(&rk_iommu_domain_driver); 1225 return ret; 1226 } 1227 static void __exit rk_iommu_exit(void) 1228 { 1229 platform_driver_unregister(&rk_iommu_driver); 1230 platform_driver_unregister(&rk_iommu_domain_driver); 1231 } 1232 1233 subsys_initcall(rk_iommu_init); 1234 module_exit(rk_iommu_exit); 1235 1236 MODULE_DESCRIPTION("IOMMU API for Rockchip"); 1237 MODULE_AUTHOR("Simon Xue <xxm@rock-chips.com> and Daniel Kurtz <djkurtz@chromium.org>"); 1238 MODULE_ALIAS("platform:rockchip-iommu"); 1239 MODULE_LICENSE("GPL v2"); 1240