1650275dbSCong Wang #include <linux/prefetch.h> 2650275dbSCong Wang 31da177e4SLinus Torvalds /** 41da177e4SLinus Torvalds * iommu_fill_pdir - Insert coalesced scatter/gather chunks into the I/O Pdir. 51da177e4SLinus Torvalds * @ioc: The I/O Controller. 61da177e4SLinus Torvalds * @startsg: The scatter/gather list of coalesced chunks. 71da177e4SLinus Torvalds * @nents: The number of entries in the scatter/gather list. 81da177e4SLinus Torvalds * @hint: The DMA Hint. 91da177e4SLinus Torvalds * 101da177e4SLinus Torvalds * This function inserts the coalesced scatter/gather list chunks into the 111da177e4SLinus Torvalds * I/O Controller's I/O Pdir. 121da177e4SLinus Torvalds */ 131da177e4SLinus Torvalds static inline unsigned int 141da177e4SLinus Torvalds iommu_fill_pdir(struct ioc *ioc, struct scatterlist *startsg, int nents, 151da177e4SLinus Torvalds unsigned long hint, 161da177e4SLinus Torvalds void (*iommu_io_pdir_entry)(u64 *, space_t, unsigned long, 171da177e4SLinus Torvalds unsigned long)) 181da177e4SLinus Torvalds { 191da177e4SLinus Torvalds struct scatterlist *dma_sg = startsg; /* pointer to current DMA */ 201da177e4SLinus Torvalds unsigned int n_mappings = 0; 211da177e4SLinus Torvalds unsigned long dma_offset = 0, dma_len = 0; 221da177e4SLinus Torvalds u64 *pdirp = NULL; 231da177e4SLinus Torvalds 241da177e4SLinus Torvalds /* Horrible hack. For efficiency's sake, dma_sg starts one 251da177e4SLinus Torvalds * entry below the true start (it is immediately incremented 261da177e4SLinus Torvalds * in the loop) */ 271da177e4SLinus Torvalds dma_sg--; 281da177e4SLinus Torvalds 291da177e4SLinus Torvalds while (nents-- > 0) { 301da177e4SLinus Torvalds unsigned long vaddr; 311da177e4SLinus Torvalds long size; 321da177e4SLinus Torvalds 338bf8a1d1SMatthew Wilcox DBG_RUN_SG(" %d : %08lx/%05x %p/%05x\n", nents, 341da177e4SLinus Torvalds (unsigned long)sg_dma_address(startsg), cnt, 358bf8a1d1SMatthew Wilcox sg_virt(startsg), startsg->length 361da177e4SLinus Torvalds ); 371da177e4SLinus Torvalds 381da177e4SLinus Torvalds 391da177e4SLinus Torvalds /* 401da177e4SLinus Torvalds ** Look for the start of a new DMA stream 411da177e4SLinus Torvalds */ 421da177e4SLinus Torvalds 431da177e4SLinus Torvalds if (sg_dma_address(startsg) & PIDE_FLAG) { 441da177e4SLinus Torvalds u32 pide = sg_dma_address(startsg) & ~PIDE_FLAG; 451da177e4SLinus Torvalds 461da177e4SLinus Torvalds BUG_ON(pdirp && (dma_len != sg_dma_len(dma_sg))); 471da177e4SLinus Torvalds 481da177e4SLinus Torvalds dma_sg++; 491da177e4SLinus Torvalds 501da177e4SLinus Torvalds dma_len = sg_dma_len(startsg); 511da177e4SLinus Torvalds sg_dma_len(startsg) = 0; 521da177e4SLinus Torvalds dma_offset = (unsigned long) pide & ~IOVP_MASK; 531da177e4SLinus Torvalds n_mappings++; 541da177e4SLinus Torvalds #if defined(ZX1_SUPPORT) 551da177e4SLinus Torvalds /* Pluto IOMMU IO Virt Address is not zero based */ 561da177e4SLinus Torvalds sg_dma_address(dma_sg) = pide | ioc->ibase; 571da177e4SLinus Torvalds #else 581da177e4SLinus Torvalds /* SBA, ccio, and dino are zero based. 591da177e4SLinus Torvalds * Trying to save a few CPU cycles for most users. 601da177e4SLinus Torvalds */ 611da177e4SLinus Torvalds sg_dma_address(dma_sg) = pide; 621da177e4SLinus Torvalds #endif 631da177e4SLinus Torvalds pdirp = &(ioc->pdir_base[pide >> IOVP_SHIFT]); 641da177e4SLinus Torvalds prefetchw(pdirp); 651da177e4SLinus Torvalds } 661da177e4SLinus Torvalds 671da177e4SLinus Torvalds BUG_ON(pdirp == NULL); 681da177e4SLinus Torvalds 698bf8a1d1SMatthew Wilcox vaddr = (unsigned long)sg_virt(startsg); 701da177e4SLinus Torvalds sg_dma_len(dma_sg) += startsg->length; 711da177e4SLinus Torvalds size = startsg->length + dma_offset; 721da177e4SLinus Torvalds dma_offset = 0; 731da177e4SLinus Torvalds #ifdef IOMMU_MAP_STATS 741da177e4SLinus Torvalds ioc->msg_pages += startsg->length >> IOVP_SHIFT; 751da177e4SLinus Torvalds #endif 761da177e4SLinus Torvalds do { 771da177e4SLinus Torvalds iommu_io_pdir_entry(pdirp, KERNEL_SPACE, 781da177e4SLinus Torvalds vaddr, hint); 791da177e4SLinus Torvalds vaddr += IOVP_SIZE; 801da177e4SLinus Torvalds size -= IOVP_SIZE; 811da177e4SLinus Torvalds pdirp++; 821da177e4SLinus Torvalds } while(unlikely(size > 0)); 831da177e4SLinus Torvalds startsg++; 841da177e4SLinus Torvalds } 851da177e4SLinus Torvalds return(n_mappings); 861da177e4SLinus Torvalds } 871da177e4SLinus Torvalds 881da177e4SLinus Torvalds 891da177e4SLinus Torvalds /* 901da177e4SLinus Torvalds ** First pass is to walk the SG list and determine where the breaks are 911da177e4SLinus Torvalds ** in the DMA stream. Allocates PDIR entries but does not fill them. 921da177e4SLinus Torvalds ** Returns the number of DMA chunks. 931da177e4SLinus Torvalds ** 941da177e4SLinus Torvalds ** Doing the fill separate from the coalescing/allocation keeps the 951da177e4SLinus Torvalds ** code simpler. Future enhancement could make one pass through 961da177e4SLinus Torvalds ** the sglist do both. 971da177e4SLinus Torvalds */ 981da177e4SLinus Torvalds 991da177e4SLinus Torvalds static inline unsigned int 100d1b51632SFUJITA Tomonori iommu_coalesce_chunks(struct ioc *ioc, struct device *dev, 101d1b51632SFUJITA Tomonori struct scatterlist *startsg, int nents, 1027c8cda62SFUJITA Tomonori int (*iommu_alloc_range)(struct ioc *, struct device *, size_t)) 1031da177e4SLinus Torvalds { 1041da177e4SLinus Torvalds struct scatterlist *contig_sg; /* contig chunk head */ 1051da177e4SLinus Torvalds unsigned long dma_offset, dma_len; /* start/len of DMA stream */ 1061da177e4SLinus Torvalds unsigned int n_mappings = 0; 107*e46e31a3SMikulas Patocka unsigned int max_seg_size = min(dma_get_max_seg_size(dev), 108*e46e31a3SMikulas Patocka (unsigned)DMA_CHUNK_SIZE); 109*e46e31a3SMikulas Patocka unsigned int max_seg_boundary = dma_get_seg_boundary(dev) + 1; 110*e46e31a3SMikulas Patocka if (max_seg_boundary) /* check if the addition above didn't overflow */ 111*e46e31a3SMikulas Patocka max_seg_size = min(max_seg_size, max_seg_boundary); 1121da177e4SLinus Torvalds 1131da177e4SLinus Torvalds while (nents > 0) { 1141da177e4SLinus Torvalds 1151da177e4SLinus Torvalds /* 1161da177e4SLinus Torvalds ** Prepare for first/next DMA stream 1171da177e4SLinus Torvalds */ 1181da177e4SLinus Torvalds contig_sg = startsg; 1191da177e4SLinus Torvalds dma_len = startsg->length; 1208bf8a1d1SMatthew Wilcox dma_offset = startsg->offset; 1211da177e4SLinus Torvalds 1221da177e4SLinus Torvalds /* PARANOID: clear entries */ 1231da177e4SLinus Torvalds sg_dma_address(startsg) = 0; 1241da177e4SLinus Torvalds sg_dma_len(startsg) = 0; 1251da177e4SLinus Torvalds 1261da177e4SLinus Torvalds /* 1271da177e4SLinus Torvalds ** This loop terminates one iteration "early" since 1281da177e4SLinus Torvalds ** it's always looking one "ahead". 1291da177e4SLinus Torvalds */ 1301da177e4SLinus Torvalds while(--nents > 0) { 1318bf8a1d1SMatthew Wilcox unsigned long prev_end, sg_start; 1321da177e4SLinus Torvalds 1338bf8a1d1SMatthew Wilcox prev_end = (unsigned long)sg_virt(startsg) + 1341da177e4SLinus Torvalds startsg->length; 1351da177e4SLinus Torvalds 1361da177e4SLinus Torvalds startsg++; 1378bf8a1d1SMatthew Wilcox sg_start = (unsigned long)sg_virt(startsg); 1381da177e4SLinus Torvalds 1391da177e4SLinus Torvalds /* PARANOID: clear entries */ 1401da177e4SLinus Torvalds sg_dma_address(startsg) = 0; 1411da177e4SLinus Torvalds sg_dma_len(startsg) = 0; 1421da177e4SLinus Torvalds 1431da177e4SLinus Torvalds /* 1441da177e4SLinus Torvalds ** First make sure current dma stream won't 145*e46e31a3SMikulas Patocka ** exceed max_seg_size if we coalesce the 1461da177e4SLinus Torvalds ** next entry. 1471da177e4SLinus Torvalds */ 148*e46e31a3SMikulas Patocka if (unlikely(ALIGN(dma_len + dma_offset + startsg->length, IOVP_SIZE) > 149*e46e31a3SMikulas Patocka max_seg_size)) 150d1b51632SFUJITA Tomonori break; 151d1b51632SFUJITA Tomonori 1521da177e4SLinus Torvalds /* 1538bf8a1d1SMatthew Wilcox * Next see if we can append the next chunk (i.e. 1548bf8a1d1SMatthew Wilcox * it must end on one page and begin on another, or 1558bf8a1d1SMatthew Wilcox * it must start on the same address as the previous 1568bf8a1d1SMatthew Wilcox * entry ended. 1571da177e4SLinus Torvalds */ 1588bf8a1d1SMatthew Wilcox if (unlikely((prev_end != sg_start) || 1598bf8a1d1SMatthew Wilcox ((prev_end | sg_start) & ~PAGE_MASK))) 1601da177e4SLinus Torvalds break; 1611da177e4SLinus Torvalds 1621da177e4SLinus Torvalds dma_len += startsg->length; 1631da177e4SLinus Torvalds } 1641da177e4SLinus Torvalds 1651da177e4SLinus Torvalds /* 1661da177e4SLinus Torvalds ** End of DMA Stream 1671da177e4SLinus Torvalds ** Terminate last VCONTIG block. 1681da177e4SLinus Torvalds ** Allocate space for DMA stream. 1691da177e4SLinus Torvalds */ 1701da177e4SLinus Torvalds sg_dma_len(contig_sg) = dma_len; 1713cb1d958SMilind Arun Choudhary dma_len = ALIGN(dma_len + dma_offset, IOVP_SIZE); 1721da177e4SLinus Torvalds sg_dma_address(contig_sg) = 1731da177e4SLinus Torvalds PIDE_FLAG 1747c8cda62SFUJITA Tomonori | (iommu_alloc_range(ioc, dev, dma_len) << IOVP_SHIFT) 1751da177e4SLinus Torvalds | dma_offset; 1761da177e4SLinus Torvalds n_mappings++; 1771da177e4SLinus Torvalds } 1781da177e4SLinus Torvalds 1791da177e4SLinus Torvalds return n_mappings; 1801da177e4SLinus Torvalds } 1811da177e4SLinus Torvalds 182