1f232836aSBrett Creeley // SPDX-License-Identifier: GPL-2.0 2f232836aSBrett Creeley /* Copyright(c) 2023 Advanced Micro Devices, Inc. */ 3f232836aSBrett Creeley 4f232836aSBrett Creeley #include <linux/interval_tree.h> 5f232836aSBrett Creeley #include <linux/vfio.h> 6f232836aSBrett Creeley 7f232836aSBrett Creeley #include <linux/pds/pds_common.h> 8f232836aSBrett Creeley #include <linux/pds/pds_core_if.h> 9f232836aSBrett Creeley #include <linux/pds/pds_adminq.h> 10f232836aSBrett Creeley 11f232836aSBrett Creeley #include "vfio_dev.h" 12f232836aSBrett Creeley #include "cmds.h" 13f232836aSBrett Creeley #include "dirty.h" 14f232836aSBrett Creeley 15f232836aSBrett Creeley #define READ_SEQ true 16f232836aSBrett Creeley #define WRITE_ACK false 17f232836aSBrett Creeley 18f232836aSBrett Creeley bool pds_vfio_dirty_is_enabled(struct pds_vfio_pci_device *pds_vfio) 19f232836aSBrett Creeley { 20f232836aSBrett Creeley return pds_vfio->dirty.is_enabled; 21f232836aSBrett Creeley } 22f232836aSBrett Creeley 23f232836aSBrett Creeley void pds_vfio_dirty_set_enabled(struct pds_vfio_pci_device *pds_vfio) 24f232836aSBrett Creeley { 25f232836aSBrett Creeley pds_vfio->dirty.is_enabled = true; 26f232836aSBrett Creeley } 27f232836aSBrett Creeley 28f232836aSBrett Creeley void pds_vfio_dirty_set_disabled(struct pds_vfio_pci_device *pds_vfio) 29f232836aSBrett Creeley { 30f232836aSBrett Creeley pds_vfio->dirty.is_enabled = false; 31f232836aSBrett Creeley } 32f232836aSBrett Creeley 33f232836aSBrett Creeley static void 34f232836aSBrett Creeley pds_vfio_print_guest_region_info(struct pds_vfio_pci_device *pds_vfio, 35f232836aSBrett Creeley u8 max_regions) 36f232836aSBrett Creeley { 37f232836aSBrett Creeley int len = max_regions * sizeof(struct pds_lm_dirty_region_info); 38f232836aSBrett Creeley struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev; 39f232836aSBrett Creeley struct device *pdsc_dev = &pci_physfn(pdev)->dev; 40f232836aSBrett Creeley struct pds_lm_dirty_region_info *region_info; 41f232836aSBrett Creeley dma_addr_t regions_dma; 42f232836aSBrett Creeley u8 num_regions; 43f232836aSBrett Creeley int err; 44f232836aSBrett Creeley 45f232836aSBrett Creeley region_info = kcalloc(max_regions, 46f232836aSBrett Creeley sizeof(struct pds_lm_dirty_region_info), 47f232836aSBrett Creeley GFP_KERNEL); 48f232836aSBrett Creeley if (!region_info) 49f232836aSBrett Creeley return; 50f232836aSBrett Creeley 51f232836aSBrett Creeley regions_dma = 52f232836aSBrett Creeley dma_map_single(pdsc_dev, region_info, len, DMA_FROM_DEVICE); 53f232836aSBrett Creeley if (dma_mapping_error(pdsc_dev, regions_dma)) 54f232836aSBrett Creeley goto out_free_region_info; 55f232836aSBrett Creeley 56f232836aSBrett Creeley err = pds_vfio_dirty_status_cmd(pds_vfio, regions_dma, &max_regions, 57f232836aSBrett Creeley &num_regions); 58f232836aSBrett Creeley dma_unmap_single(pdsc_dev, regions_dma, len, DMA_FROM_DEVICE); 59f232836aSBrett Creeley if (err) 60f232836aSBrett Creeley goto out_free_region_info; 61f232836aSBrett Creeley 62f232836aSBrett Creeley for (unsigned int i = 0; i < num_regions; i++) 63f232836aSBrett Creeley dev_dbg(&pdev->dev, 64f232836aSBrett Creeley "region_info[%d]: dma_base 0x%llx page_count %u page_size_log2 %u\n", 65f232836aSBrett Creeley i, le64_to_cpu(region_info[i].dma_base), 66f232836aSBrett Creeley le32_to_cpu(region_info[i].page_count), 67f232836aSBrett Creeley region_info[i].page_size_log2); 68f232836aSBrett Creeley 69f232836aSBrett Creeley out_free_region_info: 70f232836aSBrett Creeley kfree(region_info); 71f232836aSBrett Creeley } 72f232836aSBrett Creeley 73f232836aSBrett Creeley static int pds_vfio_dirty_alloc_bitmaps(struct pds_vfio_dirty *dirty, 74f232836aSBrett Creeley unsigned long bytes) 75f232836aSBrett Creeley { 76f232836aSBrett Creeley unsigned long *host_seq_bmp, *host_ack_bmp; 77f232836aSBrett Creeley 78f232836aSBrett Creeley host_seq_bmp = vzalloc(bytes); 79f232836aSBrett Creeley if (!host_seq_bmp) 80f232836aSBrett Creeley return -ENOMEM; 81f232836aSBrett Creeley 82f232836aSBrett Creeley host_ack_bmp = vzalloc(bytes); 83f232836aSBrett Creeley if (!host_ack_bmp) { 84f232836aSBrett Creeley bitmap_free(host_seq_bmp); 85f232836aSBrett Creeley return -ENOMEM; 86f232836aSBrett Creeley } 87f232836aSBrett Creeley 88f232836aSBrett Creeley dirty->host_seq.bmp = host_seq_bmp; 89f232836aSBrett Creeley dirty->host_ack.bmp = host_ack_bmp; 90f232836aSBrett Creeley 91f232836aSBrett Creeley return 0; 92f232836aSBrett Creeley } 93f232836aSBrett Creeley 94f232836aSBrett Creeley static void pds_vfio_dirty_free_bitmaps(struct pds_vfio_dirty *dirty) 95f232836aSBrett Creeley { 96f232836aSBrett Creeley vfree(dirty->host_seq.bmp); 97f232836aSBrett Creeley vfree(dirty->host_ack.bmp); 98f232836aSBrett Creeley dirty->host_seq.bmp = NULL; 99f232836aSBrett Creeley dirty->host_ack.bmp = NULL; 100f232836aSBrett Creeley } 101f232836aSBrett Creeley 102f232836aSBrett Creeley static void __pds_vfio_dirty_free_sgl(struct pds_vfio_pci_device *pds_vfio, 103f232836aSBrett Creeley struct pds_vfio_bmp_info *bmp_info) 104f232836aSBrett Creeley { 105f232836aSBrett Creeley struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev; 106f232836aSBrett Creeley struct device *pdsc_dev = &pci_physfn(pdev)->dev; 107f232836aSBrett Creeley 108f232836aSBrett Creeley dma_unmap_single(pdsc_dev, bmp_info->sgl_addr, 109f232836aSBrett Creeley bmp_info->num_sge * sizeof(struct pds_lm_sg_elem), 110f232836aSBrett Creeley DMA_BIDIRECTIONAL); 111f232836aSBrett Creeley kfree(bmp_info->sgl); 112f232836aSBrett Creeley 113f232836aSBrett Creeley bmp_info->num_sge = 0; 114f232836aSBrett Creeley bmp_info->sgl = NULL; 115f232836aSBrett Creeley bmp_info->sgl_addr = 0; 116f232836aSBrett Creeley } 117f232836aSBrett Creeley 118f232836aSBrett Creeley static void pds_vfio_dirty_free_sgl(struct pds_vfio_pci_device *pds_vfio) 119f232836aSBrett Creeley { 120f232836aSBrett Creeley if (pds_vfio->dirty.host_seq.sgl) 121f232836aSBrett Creeley __pds_vfio_dirty_free_sgl(pds_vfio, &pds_vfio->dirty.host_seq); 122f232836aSBrett Creeley if (pds_vfio->dirty.host_ack.sgl) 123f232836aSBrett Creeley __pds_vfio_dirty_free_sgl(pds_vfio, &pds_vfio->dirty.host_ack); 124f232836aSBrett Creeley } 125f232836aSBrett Creeley 126f232836aSBrett Creeley static int __pds_vfio_dirty_alloc_sgl(struct pds_vfio_pci_device *pds_vfio, 127f232836aSBrett Creeley struct pds_vfio_bmp_info *bmp_info, 128f232836aSBrett Creeley u32 page_count) 129f232836aSBrett Creeley { 130f232836aSBrett Creeley struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev; 131f232836aSBrett Creeley struct device *pdsc_dev = &pci_physfn(pdev)->dev; 132f232836aSBrett Creeley struct pds_lm_sg_elem *sgl; 133f232836aSBrett Creeley dma_addr_t sgl_addr; 134f232836aSBrett Creeley size_t sgl_size; 135f232836aSBrett Creeley u32 max_sge; 136f232836aSBrett Creeley 137f232836aSBrett Creeley max_sge = DIV_ROUND_UP(page_count, PAGE_SIZE * 8); 138f232836aSBrett Creeley sgl_size = max_sge * sizeof(struct pds_lm_sg_elem); 139f232836aSBrett Creeley 140f232836aSBrett Creeley sgl = kzalloc(sgl_size, GFP_KERNEL); 141f232836aSBrett Creeley if (!sgl) 142f232836aSBrett Creeley return -ENOMEM; 143f232836aSBrett Creeley 144f232836aSBrett Creeley sgl_addr = dma_map_single(pdsc_dev, sgl, sgl_size, DMA_BIDIRECTIONAL); 145f232836aSBrett Creeley if (dma_mapping_error(pdsc_dev, sgl_addr)) { 146f232836aSBrett Creeley kfree(sgl); 147f232836aSBrett Creeley return -EIO; 148f232836aSBrett Creeley } 149f232836aSBrett Creeley 150f232836aSBrett Creeley bmp_info->sgl = sgl; 151f232836aSBrett Creeley bmp_info->num_sge = max_sge; 152f232836aSBrett Creeley bmp_info->sgl_addr = sgl_addr; 153f232836aSBrett Creeley 154f232836aSBrett Creeley return 0; 155f232836aSBrett Creeley } 156f232836aSBrett Creeley 157f232836aSBrett Creeley static int pds_vfio_dirty_alloc_sgl(struct pds_vfio_pci_device *pds_vfio, 158f232836aSBrett Creeley u32 page_count) 159f232836aSBrett Creeley { 160f232836aSBrett Creeley struct pds_vfio_dirty *dirty = &pds_vfio->dirty; 161f232836aSBrett Creeley int err; 162f232836aSBrett Creeley 163f232836aSBrett Creeley err = __pds_vfio_dirty_alloc_sgl(pds_vfio, &dirty->host_seq, 164f232836aSBrett Creeley page_count); 165f232836aSBrett Creeley if (err) 166f232836aSBrett Creeley return err; 167f232836aSBrett Creeley 168f232836aSBrett Creeley err = __pds_vfio_dirty_alloc_sgl(pds_vfio, &dirty->host_ack, 169f232836aSBrett Creeley page_count); 170f232836aSBrett Creeley if (err) { 171f232836aSBrett Creeley __pds_vfio_dirty_free_sgl(pds_vfio, &dirty->host_seq); 172f232836aSBrett Creeley return err; 173f232836aSBrett Creeley } 174f232836aSBrett Creeley 175f232836aSBrett Creeley return 0; 176f232836aSBrett Creeley } 177f232836aSBrett Creeley 178f232836aSBrett Creeley static int pds_vfio_dirty_enable(struct pds_vfio_pci_device *pds_vfio, 179f232836aSBrett Creeley struct rb_root_cached *ranges, u32 nnodes, 180f232836aSBrett Creeley u64 *page_size) 181f232836aSBrett Creeley { 182f232836aSBrett Creeley struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev; 183f232836aSBrett Creeley struct device *pdsc_dev = &pci_physfn(pdev)->dev; 184f232836aSBrett Creeley struct pds_vfio_dirty *dirty = &pds_vfio->dirty; 185f232836aSBrett Creeley u64 region_start, region_size, region_page_size; 186f232836aSBrett Creeley struct pds_lm_dirty_region_info *region_info; 187f232836aSBrett Creeley struct interval_tree_node *node = NULL; 188f232836aSBrett Creeley u8 max_regions = 0, num_regions; 189f232836aSBrett Creeley dma_addr_t regions_dma = 0; 190f232836aSBrett Creeley u32 num_ranges = nnodes; 191f232836aSBrett Creeley u32 page_count; 192f232836aSBrett Creeley u16 len; 193f232836aSBrett Creeley int err; 194f232836aSBrett Creeley 195f232836aSBrett Creeley dev_dbg(&pdev->dev, "vf%u: Start dirty page tracking\n", 196f232836aSBrett Creeley pds_vfio->vf_id); 197f232836aSBrett Creeley 198f232836aSBrett Creeley if (pds_vfio_dirty_is_enabled(pds_vfio)) 199f232836aSBrett Creeley return -EINVAL; 200f232836aSBrett Creeley 201f232836aSBrett Creeley /* find if dirty tracking is disabled, i.e. num_regions == 0 */ 202f232836aSBrett Creeley err = pds_vfio_dirty_status_cmd(pds_vfio, 0, &max_regions, 203f232836aSBrett Creeley &num_regions); 204f232836aSBrett Creeley if (err < 0) { 205f232836aSBrett Creeley dev_err(&pdev->dev, "Failed to get dirty status, err %pe\n", 206f232836aSBrett Creeley ERR_PTR(err)); 207f232836aSBrett Creeley return err; 208f232836aSBrett Creeley } else if (num_regions) { 209f232836aSBrett Creeley dev_err(&pdev->dev, 210f232836aSBrett Creeley "Dirty tracking already enabled for %d regions\n", 211f232836aSBrett Creeley num_regions); 212f232836aSBrett Creeley return -EEXIST; 213f232836aSBrett Creeley } else if (!max_regions) { 214f232836aSBrett Creeley dev_err(&pdev->dev, 215f232836aSBrett Creeley "Device doesn't support dirty tracking, max_regions %d\n", 216f232836aSBrett Creeley max_regions); 217f232836aSBrett Creeley return -EOPNOTSUPP; 218f232836aSBrett Creeley } 219f232836aSBrett Creeley 220f232836aSBrett Creeley /* 221f232836aSBrett Creeley * Only support 1 region for now. If there are any large gaps in the 222f232836aSBrett Creeley * VM's address regions, then this would be a waste of memory as we are 223f232836aSBrett Creeley * generating 2 bitmaps (ack/seq) from the min address to the max 224f232836aSBrett Creeley * address of the VM's address regions. In the future, if we support 225f232836aSBrett Creeley * more than one region in the device/driver we can split the bitmaps 226f232836aSBrett Creeley * on the largest address region gaps. We can do this split up to the 227f232836aSBrett Creeley * max_regions times returned from the dirty_status command. 228f232836aSBrett Creeley */ 229f232836aSBrett Creeley max_regions = 1; 230f232836aSBrett Creeley if (num_ranges > max_regions) { 231f232836aSBrett Creeley vfio_combine_iova_ranges(ranges, nnodes, max_regions); 232f232836aSBrett Creeley num_ranges = max_regions; 233f232836aSBrett Creeley } 234f232836aSBrett Creeley 235f232836aSBrett Creeley node = interval_tree_iter_first(ranges, 0, ULONG_MAX); 236f232836aSBrett Creeley if (!node) 237f232836aSBrett Creeley return -EINVAL; 238f232836aSBrett Creeley 239f232836aSBrett Creeley region_size = node->last - node->start + 1; 240f232836aSBrett Creeley region_start = node->start; 241f232836aSBrett Creeley region_page_size = *page_size; 242f232836aSBrett Creeley 243f232836aSBrett Creeley len = sizeof(*region_info); 244f232836aSBrett Creeley region_info = kzalloc(len, GFP_KERNEL); 245f232836aSBrett Creeley if (!region_info) 246f232836aSBrett Creeley return -ENOMEM; 247f232836aSBrett Creeley 248f232836aSBrett Creeley page_count = DIV_ROUND_UP(region_size, region_page_size); 249f232836aSBrett Creeley 250f232836aSBrett Creeley region_info->dma_base = cpu_to_le64(region_start); 251f232836aSBrett Creeley region_info->page_count = cpu_to_le32(page_count); 252f232836aSBrett Creeley region_info->page_size_log2 = ilog2(region_page_size); 253f232836aSBrett Creeley 254f232836aSBrett Creeley regions_dma = dma_map_single(pdsc_dev, (void *)region_info, len, 255f232836aSBrett Creeley DMA_BIDIRECTIONAL); 256f232836aSBrett Creeley if (dma_mapping_error(pdsc_dev, regions_dma)) { 257f232836aSBrett Creeley err = -ENOMEM; 258f232836aSBrett Creeley goto out_free_region_info; 259f232836aSBrett Creeley } 260f232836aSBrett Creeley 261f232836aSBrett Creeley err = pds_vfio_dirty_enable_cmd(pds_vfio, regions_dma, max_regions); 262f232836aSBrett Creeley dma_unmap_single(pdsc_dev, regions_dma, len, DMA_BIDIRECTIONAL); 263f232836aSBrett Creeley if (err) 264f232836aSBrett Creeley goto out_free_region_info; 265f232836aSBrett Creeley 266f232836aSBrett Creeley /* 267f232836aSBrett Creeley * page_count might be adjusted by the device, 268f232836aSBrett Creeley * update it before freeing region_info DMA 269f232836aSBrett Creeley */ 270f232836aSBrett Creeley page_count = le32_to_cpu(region_info->page_count); 271f232836aSBrett Creeley 272f232836aSBrett Creeley dev_dbg(&pdev->dev, 273f232836aSBrett Creeley "region_info: regions_dma 0x%llx dma_base 0x%llx page_count %u page_size_log2 %u\n", 274f232836aSBrett Creeley regions_dma, region_start, page_count, 275f232836aSBrett Creeley (u8)ilog2(region_page_size)); 276f232836aSBrett Creeley 277f232836aSBrett Creeley err = pds_vfio_dirty_alloc_bitmaps(dirty, page_count / BITS_PER_BYTE); 278f232836aSBrett Creeley if (err) { 279f232836aSBrett Creeley dev_err(&pdev->dev, "Failed to alloc dirty bitmaps: %pe\n", 280f232836aSBrett Creeley ERR_PTR(err)); 281f232836aSBrett Creeley goto out_free_region_info; 282f232836aSBrett Creeley } 283f232836aSBrett Creeley 284f232836aSBrett Creeley err = pds_vfio_dirty_alloc_sgl(pds_vfio, page_count); 285f232836aSBrett Creeley if (err) { 286f232836aSBrett Creeley dev_err(&pdev->dev, "Failed to alloc dirty sg lists: %pe\n", 287f232836aSBrett Creeley ERR_PTR(err)); 288f232836aSBrett Creeley goto out_free_bitmaps; 289f232836aSBrett Creeley } 290f232836aSBrett Creeley 291f232836aSBrett Creeley dirty->region_start = region_start; 292f232836aSBrett Creeley dirty->region_size = region_size; 293f232836aSBrett Creeley dirty->region_page_size = region_page_size; 294f232836aSBrett Creeley pds_vfio_dirty_set_enabled(pds_vfio); 295f232836aSBrett Creeley 296f232836aSBrett Creeley pds_vfio_print_guest_region_info(pds_vfio, max_regions); 297f232836aSBrett Creeley 298f232836aSBrett Creeley kfree(region_info); 299f232836aSBrett Creeley 300f232836aSBrett Creeley return 0; 301f232836aSBrett Creeley 302f232836aSBrett Creeley out_free_bitmaps: 303f232836aSBrett Creeley pds_vfio_dirty_free_bitmaps(dirty); 304f232836aSBrett Creeley out_free_region_info: 305f232836aSBrett Creeley kfree(region_info); 306f232836aSBrett Creeley return err; 307f232836aSBrett Creeley } 308f232836aSBrett Creeley 309f232836aSBrett Creeley void pds_vfio_dirty_disable(struct pds_vfio_pci_device *pds_vfio, bool send_cmd) 310f232836aSBrett Creeley { 311f232836aSBrett Creeley if (pds_vfio_dirty_is_enabled(pds_vfio)) { 312f232836aSBrett Creeley pds_vfio_dirty_set_disabled(pds_vfio); 313f232836aSBrett Creeley if (send_cmd) 314f232836aSBrett Creeley pds_vfio_dirty_disable_cmd(pds_vfio); 315f232836aSBrett Creeley pds_vfio_dirty_free_sgl(pds_vfio); 316f232836aSBrett Creeley pds_vfio_dirty_free_bitmaps(&pds_vfio->dirty); 317f232836aSBrett Creeley } 318f232836aSBrett Creeley 319f232836aSBrett Creeley if (send_cmd) 320f232836aSBrett Creeley pds_vfio_send_host_vf_lm_status_cmd(pds_vfio, PDS_LM_STA_NONE); 321f232836aSBrett Creeley } 322f232836aSBrett Creeley 323f232836aSBrett Creeley static int pds_vfio_dirty_seq_ack(struct pds_vfio_pci_device *pds_vfio, 324f232836aSBrett Creeley struct pds_vfio_bmp_info *bmp_info, 325f232836aSBrett Creeley u32 offset, u32 bmp_bytes, bool read_seq) 326f232836aSBrett Creeley { 327f232836aSBrett Creeley const char *bmp_type_str = read_seq ? "read_seq" : "write_ack"; 328f232836aSBrett Creeley u8 dma_dir = read_seq ? DMA_FROM_DEVICE : DMA_TO_DEVICE; 329f232836aSBrett Creeley struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev; 330f232836aSBrett Creeley struct device *pdsc_dev = &pci_physfn(pdev)->dev; 331f232836aSBrett Creeley unsigned long long npages; 332f232836aSBrett Creeley struct sg_table sg_table; 333f232836aSBrett Creeley struct scatterlist *sg; 334f232836aSBrett Creeley struct page **pages; 335f232836aSBrett Creeley u32 page_offset; 336f232836aSBrett Creeley const void *bmp; 337f232836aSBrett Creeley size_t size; 338f232836aSBrett Creeley u16 num_sge; 339f232836aSBrett Creeley int err; 340f232836aSBrett Creeley int i; 341f232836aSBrett Creeley 342f232836aSBrett Creeley bmp = (void *)((u64)bmp_info->bmp + offset); 343f232836aSBrett Creeley page_offset = offset_in_page(bmp); 344f232836aSBrett Creeley bmp -= page_offset; 345f232836aSBrett Creeley 346f232836aSBrett Creeley /* 347f232836aSBrett Creeley * Start and end of bitmap section to seq/ack might not be page 348f232836aSBrett Creeley * aligned, so use the page_offset to account for that so there 349f232836aSBrett Creeley * will be enough pages to represent the bmp_bytes 350f232836aSBrett Creeley */ 351f232836aSBrett Creeley npages = DIV_ROUND_UP_ULL(bmp_bytes + page_offset, PAGE_SIZE); 352f232836aSBrett Creeley pages = kmalloc_array(npages, sizeof(*pages), GFP_KERNEL); 353f232836aSBrett Creeley if (!pages) 354f232836aSBrett Creeley return -ENOMEM; 355f232836aSBrett Creeley 356f232836aSBrett Creeley for (unsigned long long i = 0; i < npages; i++) { 357f232836aSBrett Creeley struct page *page = vmalloc_to_page(bmp); 358f232836aSBrett Creeley 359f232836aSBrett Creeley if (!page) { 360f232836aSBrett Creeley err = -EFAULT; 361f232836aSBrett Creeley goto out_free_pages; 362f232836aSBrett Creeley } 363f232836aSBrett Creeley 364f232836aSBrett Creeley pages[i] = page; 365f232836aSBrett Creeley bmp += PAGE_SIZE; 366f232836aSBrett Creeley } 367f232836aSBrett Creeley 368f232836aSBrett Creeley err = sg_alloc_table_from_pages(&sg_table, pages, npages, page_offset, 369f232836aSBrett Creeley bmp_bytes, GFP_KERNEL); 370f232836aSBrett Creeley if (err) 371f232836aSBrett Creeley goto out_free_pages; 372f232836aSBrett Creeley 373f232836aSBrett Creeley err = dma_map_sgtable(pdsc_dev, &sg_table, dma_dir, 0); 374f232836aSBrett Creeley if (err) 375f232836aSBrett Creeley goto out_free_sg_table; 376f232836aSBrett Creeley 377f232836aSBrett Creeley for_each_sgtable_dma_sg(&sg_table, sg, i) { 378f232836aSBrett Creeley struct pds_lm_sg_elem *sg_elem = &bmp_info->sgl[i]; 379f232836aSBrett Creeley 380f232836aSBrett Creeley sg_elem->addr = cpu_to_le64(sg_dma_address(sg)); 381f232836aSBrett Creeley sg_elem->len = cpu_to_le32(sg_dma_len(sg)); 382f232836aSBrett Creeley } 383f232836aSBrett Creeley 384f232836aSBrett Creeley num_sge = sg_table.nents; 385f232836aSBrett Creeley size = num_sge * sizeof(struct pds_lm_sg_elem); 386f232836aSBrett Creeley dma_sync_single_for_device(pdsc_dev, bmp_info->sgl_addr, size, dma_dir); 387f232836aSBrett Creeley err = pds_vfio_dirty_seq_ack_cmd(pds_vfio, bmp_info->sgl_addr, num_sge, 388f232836aSBrett Creeley offset, bmp_bytes, read_seq); 389f232836aSBrett Creeley if (err) 390f232836aSBrett Creeley dev_err(&pdev->dev, 391f232836aSBrett Creeley "Dirty bitmap %s failed offset %u bmp_bytes %u num_sge %u DMA 0x%llx: %pe\n", 392f232836aSBrett Creeley bmp_type_str, offset, bmp_bytes, 393f232836aSBrett Creeley num_sge, bmp_info->sgl_addr, ERR_PTR(err)); 394f232836aSBrett Creeley dma_sync_single_for_cpu(pdsc_dev, bmp_info->sgl_addr, size, dma_dir); 395f232836aSBrett Creeley 396f232836aSBrett Creeley dma_unmap_sgtable(pdsc_dev, &sg_table, dma_dir, 0); 397f232836aSBrett Creeley out_free_sg_table: 398f232836aSBrett Creeley sg_free_table(&sg_table); 399f232836aSBrett Creeley out_free_pages: 400f232836aSBrett Creeley kfree(pages); 401f232836aSBrett Creeley 402f232836aSBrett Creeley return err; 403f232836aSBrett Creeley } 404f232836aSBrett Creeley 405f232836aSBrett Creeley static int pds_vfio_dirty_write_ack(struct pds_vfio_pci_device *pds_vfio, 406f232836aSBrett Creeley u32 offset, u32 len) 407f232836aSBrett Creeley { 408f232836aSBrett Creeley return pds_vfio_dirty_seq_ack(pds_vfio, &pds_vfio->dirty.host_ack, 409f232836aSBrett Creeley offset, len, WRITE_ACK); 410f232836aSBrett Creeley } 411f232836aSBrett Creeley 412f232836aSBrett Creeley static int pds_vfio_dirty_read_seq(struct pds_vfio_pci_device *pds_vfio, 413f232836aSBrett Creeley u32 offset, u32 len) 414f232836aSBrett Creeley { 415f232836aSBrett Creeley return pds_vfio_dirty_seq_ack(pds_vfio, &pds_vfio->dirty.host_seq, 416f232836aSBrett Creeley offset, len, READ_SEQ); 417f232836aSBrett Creeley } 418f232836aSBrett Creeley 419f232836aSBrett Creeley static int pds_vfio_dirty_process_bitmaps(struct pds_vfio_pci_device *pds_vfio, 420f232836aSBrett Creeley struct iova_bitmap *dirty_bitmap, 421f232836aSBrett Creeley u32 bmp_offset, u32 len_bytes) 422f232836aSBrett Creeley { 423f232836aSBrett Creeley u64 page_size = pds_vfio->dirty.region_page_size; 424f232836aSBrett Creeley u64 region_start = pds_vfio->dirty.region_start; 425f232836aSBrett Creeley u32 bmp_offset_bit; 426f232836aSBrett Creeley __le64 *seq, *ack; 427f232836aSBrett Creeley int dword_count; 428f232836aSBrett Creeley 429f232836aSBrett Creeley dword_count = len_bytes / sizeof(u64); 430f232836aSBrett Creeley seq = (__le64 *)((u64)pds_vfio->dirty.host_seq.bmp + bmp_offset); 431f232836aSBrett Creeley ack = (__le64 *)((u64)pds_vfio->dirty.host_ack.bmp + bmp_offset); 432f232836aSBrett Creeley bmp_offset_bit = bmp_offset * 8; 433f232836aSBrett Creeley 434f232836aSBrett Creeley for (int i = 0; i < dword_count; i++) { 435f232836aSBrett Creeley u64 xor = le64_to_cpu(seq[i]) ^ le64_to_cpu(ack[i]); 436f232836aSBrett Creeley 437f232836aSBrett Creeley /* prepare for next write_ack call */ 438f232836aSBrett Creeley ack[i] = seq[i]; 439f232836aSBrett Creeley 440f232836aSBrett Creeley for (u8 bit_i = 0; bit_i < BITS_PER_TYPE(u64); ++bit_i) { 441f232836aSBrett Creeley if (xor & BIT(bit_i)) { 442f232836aSBrett Creeley u64 abs_bit_i = bmp_offset_bit + 443f232836aSBrett Creeley i * BITS_PER_TYPE(u64) + bit_i; 444f232836aSBrett Creeley u64 addr = abs_bit_i * page_size + region_start; 445f232836aSBrett Creeley 446f232836aSBrett Creeley iova_bitmap_set(dirty_bitmap, addr, page_size); 447f232836aSBrett Creeley } 448f232836aSBrett Creeley } 449f232836aSBrett Creeley } 450f232836aSBrett Creeley 451f232836aSBrett Creeley return 0; 452f232836aSBrett Creeley } 453f232836aSBrett Creeley 454f232836aSBrett Creeley static int pds_vfio_dirty_sync(struct pds_vfio_pci_device *pds_vfio, 455f232836aSBrett Creeley struct iova_bitmap *dirty_bitmap, 456f232836aSBrett Creeley unsigned long iova, unsigned long length) 457f232836aSBrett Creeley { 458f232836aSBrett Creeley struct device *dev = &pds_vfio->vfio_coredev.pdev->dev; 459f232836aSBrett Creeley struct pds_vfio_dirty *dirty = &pds_vfio->dirty; 460f232836aSBrett Creeley u64 bmp_offset, bmp_bytes; 461f232836aSBrett Creeley u64 bitmap_size, pages; 462f232836aSBrett Creeley int err; 463f232836aSBrett Creeley 464f232836aSBrett Creeley dev_dbg(dev, "vf%u: Get dirty page bitmap\n", pds_vfio->vf_id); 465f232836aSBrett Creeley 466f232836aSBrett Creeley if (!pds_vfio_dirty_is_enabled(pds_vfio)) { 467f232836aSBrett Creeley dev_err(dev, "vf%u: Sync failed, dirty tracking is disabled\n", 468f232836aSBrett Creeley pds_vfio->vf_id); 469f232836aSBrett Creeley return -EINVAL; 470f232836aSBrett Creeley } 471f232836aSBrett Creeley 472f232836aSBrett Creeley pages = DIV_ROUND_UP(length, pds_vfio->dirty.region_page_size); 473f232836aSBrett Creeley bitmap_size = 474f232836aSBrett Creeley round_up(pages, sizeof(u64) * BITS_PER_BYTE) / BITS_PER_BYTE; 475f232836aSBrett Creeley 476f232836aSBrett Creeley dev_dbg(dev, 477f232836aSBrett Creeley "vf%u: iova 0x%lx length %lu page_size %llu pages %llu bitmap_size %llu\n", 478f232836aSBrett Creeley pds_vfio->vf_id, iova, length, pds_vfio->dirty.region_page_size, 479f232836aSBrett Creeley pages, bitmap_size); 480f232836aSBrett Creeley 481*358b1c99SBrett Creeley if (!length || ((iova - dirty->region_start + length) > dirty->region_size)) { 482f232836aSBrett Creeley dev_err(dev, "Invalid iova 0x%lx and/or length 0x%lx to sync\n", 483f232836aSBrett Creeley iova, length); 484f232836aSBrett Creeley return -EINVAL; 485f232836aSBrett Creeley } 486f232836aSBrett Creeley 487f232836aSBrett Creeley /* bitmap is modified in 64 bit chunks */ 488f232836aSBrett Creeley bmp_bytes = ALIGN(DIV_ROUND_UP(length / dirty->region_page_size, 489f232836aSBrett Creeley sizeof(u64)), 490f232836aSBrett Creeley sizeof(u64)); 491f232836aSBrett Creeley if (bmp_bytes != bitmap_size) { 492f232836aSBrett Creeley dev_err(dev, 493f232836aSBrett Creeley "Calculated bitmap bytes %llu not equal to bitmap size %llu\n", 494f232836aSBrett Creeley bmp_bytes, bitmap_size); 495f232836aSBrett Creeley return -EINVAL; 496f232836aSBrett Creeley } 497f232836aSBrett Creeley 498*358b1c99SBrett Creeley bmp_offset = DIV_ROUND_UP((iova - dirty->region_start) / 499*358b1c99SBrett Creeley dirty->region_page_size, sizeof(u64)); 500f232836aSBrett Creeley 501f232836aSBrett Creeley dev_dbg(dev, 502f232836aSBrett Creeley "Syncing dirty bitmap, iova 0x%lx length 0x%lx, bmp_offset %llu bmp_bytes %llu\n", 503f232836aSBrett Creeley iova, length, bmp_offset, bmp_bytes); 504f232836aSBrett Creeley 505f232836aSBrett Creeley err = pds_vfio_dirty_read_seq(pds_vfio, bmp_offset, bmp_bytes); 506f232836aSBrett Creeley if (err) 507f232836aSBrett Creeley return err; 508f232836aSBrett Creeley 509f232836aSBrett Creeley err = pds_vfio_dirty_process_bitmaps(pds_vfio, dirty_bitmap, bmp_offset, 510f232836aSBrett Creeley bmp_bytes); 511f232836aSBrett Creeley if (err) 512f232836aSBrett Creeley return err; 513f232836aSBrett Creeley 514f232836aSBrett Creeley err = pds_vfio_dirty_write_ack(pds_vfio, bmp_offset, bmp_bytes); 515f232836aSBrett Creeley if (err) 516f232836aSBrett Creeley return err; 517f232836aSBrett Creeley 518f232836aSBrett Creeley return 0; 519f232836aSBrett Creeley } 520f232836aSBrett Creeley 521f232836aSBrett Creeley int pds_vfio_dma_logging_report(struct vfio_device *vdev, unsigned long iova, 522f232836aSBrett Creeley unsigned long length, struct iova_bitmap *dirty) 523f232836aSBrett Creeley { 524f232836aSBrett Creeley struct pds_vfio_pci_device *pds_vfio = 525f232836aSBrett Creeley container_of(vdev, struct pds_vfio_pci_device, 526f232836aSBrett Creeley vfio_coredev.vdev); 527f232836aSBrett Creeley int err; 528f232836aSBrett Creeley 529f232836aSBrett Creeley mutex_lock(&pds_vfio->state_mutex); 530f232836aSBrett Creeley err = pds_vfio_dirty_sync(pds_vfio, dirty, iova, length); 531f232836aSBrett Creeley pds_vfio_state_mutex_unlock(pds_vfio); 532f232836aSBrett Creeley 533f232836aSBrett Creeley return err; 534f232836aSBrett Creeley } 535f232836aSBrett Creeley 536f232836aSBrett Creeley int pds_vfio_dma_logging_start(struct vfio_device *vdev, 537f232836aSBrett Creeley struct rb_root_cached *ranges, u32 nnodes, 538f232836aSBrett Creeley u64 *page_size) 539f232836aSBrett Creeley { 540f232836aSBrett Creeley struct pds_vfio_pci_device *pds_vfio = 541f232836aSBrett Creeley container_of(vdev, struct pds_vfio_pci_device, 542f232836aSBrett Creeley vfio_coredev.vdev); 543f232836aSBrett Creeley int err; 544f232836aSBrett Creeley 545f232836aSBrett Creeley mutex_lock(&pds_vfio->state_mutex); 546f232836aSBrett Creeley pds_vfio_send_host_vf_lm_status_cmd(pds_vfio, PDS_LM_STA_IN_PROGRESS); 547f232836aSBrett Creeley err = pds_vfio_dirty_enable(pds_vfio, ranges, nnodes, page_size); 548f232836aSBrett Creeley pds_vfio_state_mutex_unlock(pds_vfio); 549f232836aSBrett Creeley 550f232836aSBrett Creeley return err; 551f232836aSBrett Creeley } 552f232836aSBrett Creeley 553f232836aSBrett Creeley int pds_vfio_dma_logging_stop(struct vfio_device *vdev) 554f232836aSBrett Creeley { 555f232836aSBrett Creeley struct pds_vfio_pci_device *pds_vfio = 556f232836aSBrett Creeley container_of(vdev, struct pds_vfio_pci_device, 557f232836aSBrett Creeley vfio_coredev.vdev); 558f232836aSBrett Creeley 559f232836aSBrett Creeley mutex_lock(&pds_vfio->state_mutex); 560f232836aSBrett Creeley pds_vfio_dirty_disable(pds_vfio, true); 561f232836aSBrett Creeley pds_vfio_state_mutex_unlock(pds_vfio); 562f232836aSBrett Creeley 563f232836aSBrett Creeley return 0; 564f232836aSBrett Creeley } 565