1b2441318SGreg Kroah-Hartman /* SPDX-License-Identifier: GPL-2.0 */ 2650275dbSCong Wang #include <linux/prefetch.h> 3650275dbSCong Wang 41da177e4SLinus Torvalds /** 51da177e4SLinus Torvalds * iommu_fill_pdir - Insert coalesced scatter/gather chunks into the I/O Pdir. 61da177e4SLinus Torvalds * @ioc: The I/O Controller. 71da177e4SLinus Torvalds * @startsg: The scatter/gather list of coalesced chunks. 81da177e4SLinus Torvalds * @nents: The number of entries in the scatter/gather list. 91da177e4SLinus Torvalds * @hint: The DMA Hint. 101da177e4SLinus Torvalds * 111da177e4SLinus Torvalds * This function inserts the coalesced scatter/gather list chunks into the 121da177e4SLinus Torvalds * I/O Controller's I/O Pdir. 131da177e4SLinus Torvalds */ 141da177e4SLinus Torvalds static inline unsigned int 151da177e4SLinus Torvalds iommu_fill_pdir(struct ioc *ioc, struct scatterlist *startsg, int nents, 161da177e4SLinus Torvalds unsigned long hint, 17*c1ebb940SHelge Deller void (*iommu_io_pdir_entry)(__le64 *, space_t, unsigned long, 181da177e4SLinus Torvalds unsigned long)) 191da177e4SLinus Torvalds { 201da177e4SLinus Torvalds struct scatterlist *dma_sg = startsg; /* pointer to current DMA */ 211da177e4SLinus Torvalds unsigned int n_mappings = 0; 221da177e4SLinus Torvalds unsigned long dma_offset = 0, dma_len = 0; 23*c1ebb940SHelge Deller __le64 *pdirp = NULL; 241da177e4SLinus Torvalds 251da177e4SLinus Torvalds /* Horrible hack. For efficiency's sake, dma_sg starts one 261da177e4SLinus Torvalds * entry below the true start (it is immediately incremented 271da177e4SLinus Torvalds * in the loop) */ 281da177e4SLinus Torvalds dma_sg--; 291da177e4SLinus Torvalds 301da177e4SLinus Torvalds while (nents-- > 0) { 311da177e4SLinus Torvalds unsigned long vaddr; 321da177e4SLinus Torvalds long size; 331da177e4SLinus Torvalds 348bf8a1d1SMatthew Wilcox DBG_RUN_SG(" %d : %08lx/%05x %p/%05x\n", nents, 351da177e4SLinus Torvalds (unsigned long)sg_dma_address(startsg), cnt, 368bf8a1d1SMatthew Wilcox sg_virt(startsg), startsg->length 371da177e4SLinus Torvalds ); 381da177e4SLinus Torvalds 391da177e4SLinus Torvalds 401da177e4SLinus Torvalds /* 411da177e4SLinus Torvalds ** Look for the start of a new DMA stream 421da177e4SLinus Torvalds */ 431da177e4SLinus Torvalds 441da177e4SLinus Torvalds if (sg_dma_address(startsg) & PIDE_FLAG) { 451da177e4SLinus Torvalds u32 pide = sg_dma_address(startsg) & ~PIDE_FLAG; 461da177e4SLinus Torvalds 471da177e4SLinus Torvalds BUG_ON(pdirp && (dma_len != sg_dma_len(dma_sg))); 481da177e4SLinus Torvalds 491da177e4SLinus Torvalds dma_sg++; 501da177e4SLinus Torvalds 511da177e4SLinus Torvalds dma_len = sg_dma_len(startsg); 521da177e4SLinus Torvalds sg_dma_len(startsg) = 0; 531da177e4SLinus Torvalds dma_offset = (unsigned long) pide & ~IOVP_MASK; 541da177e4SLinus Torvalds n_mappings++; 551da177e4SLinus Torvalds #if defined(ZX1_SUPPORT) 561da177e4SLinus Torvalds /* Pluto IOMMU IO Virt Address is not zero based */ 571da177e4SLinus Torvalds sg_dma_address(dma_sg) = pide | ioc->ibase; 581da177e4SLinus Torvalds #else 591da177e4SLinus Torvalds /* SBA, ccio, and dino are zero based. 601da177e4SLinus Torvalds * Trying to save a few CPU cycles for most users. 611da177e4SLinus Torvalds */ 621da177e4SLinus Torvalds sg_dma_address(dma_sg) = pide; 631da177e4SLinus Torvalds #endif 641da177e4SLinus Torvalds pdirp = &(ioc->pdir_base[pide >> IOVP_SHIFT]); 651da177e4SLinus Torvalds prefetchw(pdirp); 661da177e4SLinus Torvalds } 671da177e4SLinus Torvalds 681da177e4SLinus Torvalds BUG_ON(pdirp == NULL); 691da177e4SLinus Torvalds 708bf8a1d1SMatthew Wilcox vaddr = (unsigned long)sg_virt(startsg); 711da177e4SLinus Torvalds sg_dma_len(dma_sg) += startsg->length; 721da177e4SLinus Torvalds size = startsg->length + dma_offset; 731da177e4SLinus Torvalds dma_offset = 0; 741da177e4SLinus Torvalds #ifdef IOMMU_MAP_STATS 751da177e4SLinus Torvalds ioc->msg_pages += startsg->length >> IOVP_SHIFT; 761da177e4SLinus Torvalds #endif 771da177e4SLinus Torvalds do { 781da177e4SLinus Torvalds iommu_io_pdir_entry(pdirp, KERNEL_SPACE, 791da177e4SLinus Torvalds vaddr, hint); 801da177e4SLinus Torvalds vaddr += IOVP_SIZE; 811da177e4SLinus Torvalds size -= IOVP_SIZE; 821da177e4SLinus Torvalds pdirp++; 831da177e4SLinus Torvalds } while(unlikely(size > 0)); 841da177e4SLinus Torvalds startsg++; 851da177e4SLinus Torvalds } 861da177e4SLinus Torvalds return(n_mappings); 871da177e4SLinus Torvalds } 881da177e4SLinus Torvalds 891da177e4SLinus Torvalds 901da177e4SLinus Torvalds /* 911da177e4SLinus Torvalds ** First pass is to walk the SG list and determine where the breaks are 921da177e4SLinus Torvalds ** in the DMA stream. Allocates PDIR entries but does not fill them. 931da177e4SLinus Torvalds ** Returns the number of DMA chunks. 941da177e4SLinus Torvalds ** 951da177e4SLinus Torvalds ** Doing the fill separate from the coalescing/allocation keeps the 961da177e4SLinus Torvalds ** code simpler. Future enhancement could make one pass through 971da177e4SLinus Torvalds ** the sglist do both. 981da177e4SLinus Torvalds */ 991da177e4SLinus Torvalds 1001da177e4SLinus Torvalds static inline unsigned int 101d1b51632SFUJITA Tomonori iommu_coalesce_chunks(struct ioc *ioc, struct device *dev, 102d1b51632SFUJITA Tomonori struct scatterlist *startsg, int nents, 1037c8cda62SFUJITA Tomonori int (*iommu_alloc_range)(struct ioc *, struct device *, size_t)) 1041da177e4SLinus Torvalds { 1051da177e4SLinus Torvalds struct scatterlist *contig_sg; /* contig chunk head */ 1061da177e4SLinus Torvalds unsigned long dma_offset, dma_len; /* start/len of DMA stream */ 1071da177e4SLinus Torvalds unsigned int n_mappings = 0; 108e46e31a3SMikulas Patocka unsigned int max_seg_size = min(dma_get_max_seg_size(dev), 109e46e31a3SMikulas Patocka (unsigned)DMA_CHUNK_SIZE); 110e46e31a3SMikulas Patocka unsigned int max_seg_boundary = dma_get_seg_boundary(dev) + 1; 111e46e31a3SMikulas Patocka if (max_seg_boundary) /* check if the addition above didn't overflow */ 112e46e31a3SMikulas Patocka max_seg_size = min(max_seg_size, max_seg_boundary); 1131da177e4SLinus Torvalds 1141da177e4SLinus Torvalds while (nents > 0) { 1151da177e4SLinus Torvalds 1161da177e4SLinus Torvalds /* 1171da177e4SLinus Torvalds ** Prepare for first/next DMA stream 1181da177e4SLinus Torvalds */ 1191da177e4SLinus Torvalds contig_sg = startsg; 1201da177e4SLinus Torvalds dma_len = startsg->length; 1218bf8a1d1SMatthew Wilcox dma_offset = startsg->offset; 1221da177e4SLinus Torvalds 1231da177e4SLinus Torvalds /* PARANOID: clear entries */ 1241da177e4SLinus Torvalds sg_dma_address(startsg) = 0; 1251da177e4SLinus Torvalds sg_dma_len(startsg) = 0; 1261da177e4SLinus Torvalds 1271da177e4SLinus Torvalds /* 1281da177e4SLinus Torvalds ** This loop terminates one iteration "early" since 1291da177e4SLinus Torvalds ** it's always looking one "ahead". 1301da177e4SLinus Torvalds */ 1311da177e4SLinus Torvalds while(--nents > 0) { 1328bf8a1d1SMatthew Wilcox unsigned long prev_end, sg_start; 1331da177e4SLinus Torvalds 1348bf8a1d1SMatthew Wilcox prev_end = (unsigned long)sg_virt(startsg) + 1351da177e4SLinus Torvalds startsg->length; 1361da177e4SLinus Torvalds 1371da177e4SLinus Torvalds startsg++; 1388bf8a1d1SMatthew Wilcox sg_start = (unsigned long)sg_virt(startsg); 1391da177e4SLinus Torvalds 1401da177e4SLinus Torvalds /* PARANOID: clear entries */ 1411da177e4SLinus Torvalds sg_dma_address(startsg) = 0; 1421da177e4SLinus Torvalds sg_dma_len(startsg) = 0; 1431da177e4SLinus Torvalds 1441da177e4SLinus Torvalds /* 1451da177e4SLinus Torvalds ** First make sure current dma stream won't 146e46e31a3SMikulas Patocka ** exceed max_seg_size if we coalesce the 1471da177e4SLinus Torvalds ** next entry. 1481da177e4SLinus Torvalds */ 149e46e31a3SMikulas Patocka if (unlikely(ALIGN(dma_len + dma_offset + startsg->length, IOVP_SIZE) > 150e46e31a3SMikulas Patocka max_seg_size)) 151d1b51632SFUJITA Tomonori break; 152d1b51632SFUJITA Tomonori 1531da177e4SLinus Torvalds /* 1548bf8a1d1SMatthew Wilcox * Next see if we can append the next chunk (i.e. 1558bf8a1d1SMatthew Wilcox * it must end on one page and begin on another, or 1568bf8a1d1SMatthew Wilcox * it must start on the same address as the previous 1578bf8a1d1SMatthew Wilcox * entry ended. 1581da177e4SLinus Torvalds */ 1598bf8a1d1SMatthew Wilcox if (unlikely((prev_end != sg_start) || 1608bf8a1d1SMatthew Wilcox ((prev_end | sg_start) & ~PAGE_MASK))) 1611da177e4SLinus Torvalds break; 1621da177e4SLinus Torvalds 1631da177e4SLinus Torvalds dma_len += startsg->length; 1641da177e4SLinus Torvalds } 1651da177e4SLinus Torvalds 1661da177e4SLinus Torvalds /* 1671da177e4SLinus Torvalds ** End of DMA Stream 1681da177e4SLinus Torvalds ** Terminate last VCONTIG block. 1691da177e4SLinus Torvalds ** Allocate space for DMA stream. 1701da177e4SLinus Torvalds */ 1711da177e4SLinus Torvalds sg_dma_len(contig_sg) = dma_len; 1723cb1d958SMilind Arun Choudhary dma_len = ALIGN(dma_len + dma_offset, IOVP_SIZE); 1731da177e4SLinus Torvalds sg_dma_address(contig_sg) = 1741da177e4SLinus Torvalds PIDE_FLAG 1757c8cda62SFUJITA Tomonori | (iommu_alloc_range(ioc, dev, dma_len) << IOVP_SHIFT) 1761da177e4SLinus Torvalds | dma_offset; 1771da177e4SLinus Torvalds n_mappings++; 1781da177e4SLinus Torvalds } 1791da177e4SLinus Torvalds 1801da177e4SLinus Torvalds return n_mappings; 1811da177e4SLinus Torvalds } 1821da177e4SLinus Torvalds 183