1*f232836aSBrett Creeley // SPDX-License-Identifier: GPL-2.0 2*f232836aSBrett Creeley /* Copyright(c) 2023 Advanced Micro Devices, Inc. */ 3*f232836aSBrett Creeley 4*f232836aSBrett Creeley #include <linux/interval_tree.h> 5*f232836aSBrett Creeley #include <linux/vfio.h> 6*f232836aSBrett Creeley 7*f232836aSBrett Creeley #include <linux/pds/pds_common.h> 8*f232836aSBrett Creeley #include <linux/pds/pds_core_if.h> 9*f232836aSBrett Creeley #include <linux/pds/pds_adminq.h> 10*f232836aSBrett Creeley 11*f232836aSBrett Creeley #include "vfio_dev.h" 12*f232836aSBrett Creeley #include "cmds.h" 13*f232836aSBrett Creeley #include "dirty.h" 14*f232836aSBrett Creeley 15*f232836aSBrett Creeley #define READ_SEQ true 16*f232836aSBrett Creeley #define WRITE_ACK false 17*f232836aSBrett Creeley 18*f232836aSBrett Creeley bool pds_vfio_dirty_is_enabled(struct pds_vfio_pci_device *pds_vfio) 19*f232836aSBrett Creeley { 20*f232836aSBrett Creeley return pds_vfio->dirty.is_enabled; 21*f232836aSBrett Creeley } 22*f232836aSBrett Creeley 23*f232836aSBrett Creeley void pds_vfio_dirty_set_enabled(struct pds_vfio_pci_device *pds_vfio) 24*f232836aSBrett Creeley { 25*f232836aSBrett Creeley pds_vfio->dirty.is_enabled = true; 26*f232836aSBrett Creeley } 27*f232836aSBrett Creeley 28*f232836aSBrett Creeley void pds_vfio_dirty_set_disabled(struct pds_vfio_pci_device *pds_vfio) 29*f232836aSBrett Creeley { 30*f232836aSBrett Creeley pds_vfio->dirty.is_enabled = false; 31*f232836aSBrett Creeley } 32*f232836aSBrett Creeley 33*f232836aSBrett Creeley static void 34*f232836aSBrett Creeley pds_vfio_print_guest_region_info(struct pds_vfio_pci_device *pds_vfio, 35*f232836aSBrett Creeley u8 max_regions) 36*f232836aSBrett Creeley { 37*f232836aSBrett Creeley int len = max_regions * sizeof(struct pds_lm_dirty_region_info); 38*f232836aSBrett Creeley struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev; 39*f232836aSBrett Creeley struct device *pdsc_dev = &pci_physfn(pdev)->dev; 40*f232836aSBrett Creeley struct pds_lm_dirty_region_info *region_info; 41*f232836aSBrett Creeley dma_addr_t regions_dma; 42*f232836aSBrett Creeley u8 num_regions; 43*f232836aSBrett Creeley int err; 44*f232836aSBrett Creeley 45*f232836aSBrett Creeley region_info = kcalloc(max_regions, 46*f232836aSBrett Creeley sizeof(struct pds_lm_dirty_region_info), 47*f232836aSBrett Creeley GFP_KERNEL); 48*f232836aSBrett Creeley if (!region_info) 49*f232836aSBrett Creeley return; 50*f232836aSBrett Creeley 51*f232836aSBrett Creeley regions_dma = 52*f232836aSBrett Creeley dma_map_single(pdsc_dev, region_info, len, DMA_FROM_DEVICE); 53*f232836aSBrett Creeley if (dma_mapping_error(pdsc_dev, regions_dma)) 54*f232836aSBrett Creeley goto out_free_region_info; 55*f232836aSBrett Creeley 56*f232836aSBrett Creeley err = pds_vfio_dirty_status_cmd(pds_vfio, regions_dma, &max_regions, 57*f232836aSBrett Creeley &num_regions); 58*f232836aSBrett Creeley dma_unmap_single(pdsc_dev, regions_dma, len, DMA_FROM_DEVICE); 59*f232836aSBrett Creeley if (err) 60*f232836aSBrett Creeley goto out_free_region_info; 61*f232836aSBrett Creeley 62*f232836aSBrett Creeley for (unsigned int i = 0; i < num_regions; i++) 63*f232836aSBrett Creeley dev_dbg(&pdev->dev, 64*f232836aSBrett Creeley "region_info[%d]: dma_base 0x%llx page_count %u page_size_log2 %u\n", 65*f232836aSBrett Creeley i, le64_to_cpu(region_info[i].dma_base), 66*f232836aSBrett Creeley le32_to_cpu(region_info[i].page_count), 67*f232836aSBrett Creeley region_info[i].page_size_log2); 68*f232836aSBrett Creeley 69*f232836aSBrett Creeley out_free_region_info: 70*f232836aSBrett Creeley kfree(region_info); 71*f232836aSBrett Creeley } 72*f232836aSBrett Creeley 73*f232836aSBrett Creeley static int pds_vfio_dirty_alloc_bitmaps(struct pds_vfio_dirty *dirty, 74*f232836aSBrett Creeley unsigned long bytes) 75*f232836aSBrett Creeley { 76*f232836aSBrett Creeley unsigned long *host_seq_bmp, *host_ack_bmp; 77*f232836aSBrett Creeley 78*f232836aSBrett Creeley host_seq_bmp = vzalloc(bytes); 79*f232836aSBrett Creeley if (!host_seq_bmp) 80*f232836aSBrett Creeley return -ENOMEM; 81*f232836aSBrett Creeley 82*f232836aSBrett Creeley host_ack_bmp = vzalloc(bytes); 83*f232836aSBrett Creeley if (!host_ack_bmp) { 84*f232836aSBrett Creeley bitmap_free(host_seq_bmp); 85*f232836aSBrett Creeley return -ENOMEM; 86*f232836aSBrett Creeley } 87*f232836aSBrett Creeley 88*f232836aSBrett Creeley dirty->host_seq.bmp = host_seq_bmp; 89*f232836aSBrett Creeley dirty->host_ack.bmp = host_ack_bmp; 90*f232836aSBrett Creeley 91*f232836aSBrett Creeley return 0; 92*f232836aSBrett Creeley } 93*f232836aSBrett Creeley 94*f232836aSBrett Creeley static void pds_vfio_dirty_free_bitmaps(struct pds_vfio_dirty *dirty) 95*f232836aSBrett Creeley { 96*f232836aSBrett Creeley vfree(dirty->host_seq.bmp); 97*f232836aSBrett Creeley vfree(dirty->host_ack.bmp); 98*f232836aSBrett Creeley dirty->host_seq.bmp = NULL; 99*f232836aSBrett Creeley dirty->host_ack.bmp = NULL; 100*f232836aSBrett Creeley } 101*f232836aSBrett Creeley 102*f232836aSBrett Creeley static void __pds_vfio_dirty_free_sgl(struct pds_vfio_pci_device *pds_vfio, 103*f232836aSBrett Creeley struct pds_vfio_bmp_info *bmp_info) 104*f232836aSBrett Creeley { 105*f232836aSBrett Creeley struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev; 106*f232836aSBrett Creeley struct device *pdsc_dev = &pci_physfn(pdev)->dev; 107*f232836aSBrett Creeley 108*f232836aSBrett Creeley dma_unmap_single(pdsc_dev, bmp_info->sgl_addr, 109*f232836aSBrett Creeley bmp_info->num_sge * sizeof(struct pds_lm_sg_elem), 110*f232836aSBrett Creeley DMA_BIDIRECTIONAL); 111*f232836aSBrett Creeley kfree(bmp_info->sgl); 112*f232836aSBrett Creeley 113*f232836aSBrett Creeley bmp_info->num_sge = 0; 114*f232836aSBrett Creeley bmp_info->sgl = NULL; 115*f232836aSBrett Creeley bmp_info->sgl_addr = 0; 116*f232836aSBrett Creeley } 117*f232836aSBrett Creeley 118*f232836aSBrett Creeley static void pds_vfio_dirty_free_sgl(struct pds_vfio_pci_device *pds_vfio) 119*f232836aSBrett Creeley { 120*f232836aSBrett Creeley if (pds_vfio->dirty.host_seq.sgl) 121*f232836aSBrett Creeley __pds_vfio_dirty_free_sgl(pds_vfio, &pds_vfio->dirty.host_seq); 122*f232836aSBrett Creeley if (pds_vfio->dirty.host_ack.sgl) 123*f232836aSBrett Creeley __pds_vfio_dirty_free_sgl(pds_vfio, &pds_vfio->dirty.host_ack); 124*f232836aSBrett Creeley } 125*f232836aSBrett Creeley 126*f232836aSBrett Creeley static int __pds_vfio_dirty_alloc_sgl(struct pds_vfio_pci_device *pds_vfio, 127*f232836aSBrett Creeley struct pds_vfio_bmp_info *bmp_info, 128*f232836aSBrett Creeley u32 page_count) 129*f232836aSBrett Creeley { 130*f232836aSBrett Creeley struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev; 131*f232836aSBrett Creeley struct device *pdsc_dev = &pci_physfn(pdev)->dev; 132*f232836aSBrett Creeley struct pds_lm_sg_elem *sgl; 133*f232836aSBrett Creeley dma_addr_t sgl_addr; 134*f232836aSBrett Creeley size_t sgl_size; 135*f232836aSBrett Creeley u32 max_sge; 136*f232836aSBrett Creeley 137*f232836aSBrett Creeley max_sge = DIV_ROUND_UP(page_count, PAGE_SIZE * 8); 138*f232836aSBrett Creeley sgl_size = max_sge * sizeof(struct pds_lm_sg_elem); 139*f232836aSBrett Creeley 140*f232836aSBrett Creeley sgl = kzalloc(sgl_size, GFP_KERNEL); 141*f232836aSBrett Creeley if (!sgl) 142*f232836aSBrett Creeley return -ENOMEM; 143*f232836aSBrett Creeley 144*f232836aSBrett Creeley sgl_addr = dma_map_single(pdsc_dev, sgl, sgl_size, DMA_BIDIRECTIONAL); 145*f232836aSBrett Creeley if (dma_mapping_error(pdsc_dev, sgl_addr)) { 146*f232836aSBrett Creeley kfree(sgl); 147*f232836aSBrett Creeley return -EIO; 148*f232836aSBrett Creeley } 149*f232836aSBrett Creeley 150*f232836aSBrett Creeley bmp_info->sgl = sgl; 151*f232836aSBrett Creeley bmp_info->num_sge = max_sge; 152*f232836aSBrett Creeley bmp_info->sgl_addr = sgl_addr; 153*f232836aSBrett Creeley 154*f232836aSBrett Creeley return 0; 155*f232836aSBrett Creeley } 156*f232836aSBrett Creeley 157*f232836aSBrett Creeley static int pds_vfio_dirty_alloc_sgl(struct pds_vfio_pci_device *pds_vfio, 158*f232836aSBrett Creeley u32 page_count) 159*f232836aSBrett Creeley { 160*f232836aSBrett Creeley struct pds_vfio_dirty *dirty = &pds_vfio->dirty; 161*f232836aSBrett Creeley int err; 162*f232836aSBrett Creeley 163*f232836aSBrett Creeley err = __pds_vfio_dirty_alloc_sgl(pds_vfio, &dirty->host_seq, 164*f232836aSBrett Creeley page_count); 165*f232836aSBrett Creeley if (err) 166*f232836aSBrett Creeley return err; 167*f232836aSBrett Creeley 168*f232836aSBrett Creeley err = __pds_vfio_dirty_alloc_sgl(pds_vfio, &dirty->host_ack, 169*f232836aSBrett Creeley page_count); 170*f232836aSBrett Creeley if (err) { 171*f232836aSBrett Creeley __pds_vfio_dirty_free_sgl(pds_vfio, &dirty->host_seq); 172*f232836aSBrett Creeley return err; 173*f232836aSBrett Creeley } 174*f232836aSBrett Creeley 175*f232836aSBrett Creeley return 0; 176*f232836aSBrett Creeley } 177*f232836aSBrett Creeley 178*f232836aSBrett Creeley static int pds_vfio_dirty_enable(struct pds_vfio_pci_device *pds_vfio, 179*f232836aSBrett Creeley struct rb_root_cached *ranges, u32 nnodes, 180*f232836aSBrett Creeley u64 *page_size) 181*f232836aSBrett Creeley { 182*f232836aSBrett Creeley struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev; 183*f232836aSBrett Creeley struct device *pdsc_dev = &pci_physfn(pdev)->dev; 184*f232836aSBrett Creeley struct pds_vfio_dirty *dirty = &pds_vfio->dirty; 185*f232836aSBrett Creeley u64 region_start, region_size, region_page_size; 186*f232836aSBrett Creeley struct pds_lm_dirty_region_info *region_info; 187*f232836aSBrett Creeley struct interval_tree_node *node = NULL; 188*f232836aSBrett Creeley u8 max_regions = 0, num_regions; 189*f232836aSBrett Creeley dma_addr_t regions_dma = 0; 190*f232836aSBrett Creeley u32 num_ranges = nnodes; 191*f232836aSBrett Creeley u32 page_count; 192*f232836aSBrett Creeley u16 len; 193*f232836aSBrett Creeley int err; 194*f232836aSBrett Creeley 195*f232836aSBrett Creeley dev_dbg(&pdev->dev, "vf%u: Start dirty page tracking\n", 196*f232836aSBrett Creeley pds_vfio->vf_id); 197*f232836aSBrett Creeley 198*f232836aSBrett Creeley if (pds_vfio_dirty_is_enabled(pds_vfio)) 199*f232836aSBrett Creeley return -EINVAL; 200*f232836aSBrett Creeley 201*f232836aSBrett Creeley /* find if dirty tracking is disabled, i.e. num_regions == 0 */ 202*f232836aSBrett Creeley err = pds_vfio_dirty_status_cmd(pds_vfio, 0, &max_regions, 203*f232836aSBrett Creeley &num_regions); 204*f232836aSBrett Creeley if (err < 0) { 205*f232836aSBrett Creeley dev_err(&pdev->dev, "Failed to get dirty status, err %pe\n", 206*f232836aSBrett Creeley ERR_PTR(err)); 207*f232836aSBrett Creeley return err; 208*f232836aSBrett Creeley } else if (num_regions) { 209*f232836aSBrett Creeley dev_err(&pdev->dev, 210*f232836aSBrett Creeley "Dirty tracking already enabled for %d regions\n", 211*f232836aSBrett Creeley num_regions); 212*f232836aSBrett Creeley return -EEXIST; 213*f232836aSBrett Creeley } else if (!max_regions) { 214*f232836aSBrett Creeley dev_err(&pdev->dev, 215*f232836aSBrett Creeley "Device doesn't support dirty tracking, max_regions %d\n", 216*f232836aSBrett Creeley max_regions); 217*f232836aSBrett Creeley return -EOPNOTSUPP; 218*f232836aSBrett Creeley } 219*f232836aSBrett Creeley 220*f232836aSBrett Creeley /* 221*f232836aSBrett Creeley * Only support 1 region for now. If there are any large gaps in the 222*f232836aSBrett Creeley * VM's address regions, then this would be a waste of memory as we are 223*f232836aSBrett Creeley * generating 2 bitmaps (ack/seq) from the min address to the max 224*f232836aSBrett Creeley * address of the VM's address regions. In the future, if we support 225*f232836aSBrett Creeley * more than one region in the device/driver we can split the bitmaps 226*f232836aSBrett Creeley * on the largest address region gaps. We can do this split up to the 227*f232836aSBrett Creeley * max_regions times returned from the dirty_status command. 228*f232836aSBrett Creeley */ 229*f232836aSBrett Creeley max_regions = 1; 230*f232836aSBrett Creeley if (num_ranges > max_regions) { 231*f232836aSBrett Creeley vfio_combine_iova_ranges(ranges, nnodes, max_regions); 232*f232836aSBrett Creeley num_ranges = max_regions; 233*f232836aSBrett Creeley } 234*f232836aSBrett Creeley 235*f232836aSBrett Creeley node = interval_tree_iter_first(ranges, 0, ULONG_MAX); 236*f232836aSBrett Creeley if (!node) 237*f232836aSBrett Creeley return -EINVAL; 238*f232836aSBrett Creeley 239*f232836aSBrett Creeley region_size = node->last - node->start + 1; 240*f232836aSBrett Creeley region_start = node->start; 241*f232836aSBrett Creeley region_page_size = *page_size; 242*f232836aSBrett Creeley 243*f232836aSBrett Creeley len = sizeof(*region_info); 244*f232836aSBrett Creeley region_info = kzalloc(len, GFP_KERNEL); 245*f232836aSBrett Creeley if (!region_info) 246*f232836aSBrett Creeley return -ENOMEM; 247*f232836aSBrett Creeley 248*f232836aSBrett Creeley page_count = DIV_ROUND_UP(region_size, region_page_size); 249*f232836aSBrett Creeley 250*f232836aSBrett Creeley region_info->dma_base = cpu_to_le64(region_start); 251*f232836aSBrett Creeley region_info->page_count = cpu_to_le32(page_count); 252*f232836aSBrett Creeley region_info->page_size_log2 = ilog2(region_page_size); 253*f232836aSBrett Creeley 254*f232836aSBrett Creeley regions_dma = dma_map_single(pdsc_dev, (void *)region_info, len, 255*f232836aSBrett Creeley DMA_BIDIRECTIONAL); 256*f232836aSBrett Creeley if (dma_mapping_error(pdsc_dev, regions_dma)) { 257*f232836aSBrett Creeley err = -ENOMEM; 258*f232836aSBrett Creeley goto out_free_region_info; 259*f232836aSBrett Creeley } 260*f232836aSBrett Creeley 261*f232836aSBrett Creeley err = pds_vfio_dirty_enable_cmd(pds_vfio, regions_dma, max_regions); 262*f232836aSBrett Creeley dma_unmap_single(pdsc_dev, regions_dma, len, DMA_BIDIRECTIONAL); 263*f232836aSBrett Creeley if (err) 264*f232836aSBrett Creeley goto out_free_region_info; 265*f232836aSBrett Creeley 266*f232836aSBrett Creeley /* 267*f232836aSBrett Creeley * page_count might be adjusted by the device, 268*f232836aSBrett Creeley * update it before freeing region_info DMA 269*f232836aSBrett Creeley */ 270*f232836aSBrett Creeley page_count = le32_to_cpu(region_info->page_count); 271*f232836aSBrett Creeley 272*f232836aSBrett Creeley dev_dbg(&pdev->dev, 273*f232836aSBrett Creeley "region_info: regions_dma 0x%llx dma_base 0x%llx page_count %u page_size_log2 %u\n", 274*f232836aSBrett Creeley regions_dma, region_start, page_count, 275*f232836aSBrett Creeley (u8)ilog2(region_page_size)); 276*f232836aSBrett Creeley 277*f232836aSBrett Creeley err = pds_vfio_dirty_alloc_bitmaps(dirty, page_count / BITS_PER_BYTE); 278*f232836aSBrett Creeley if (err) { 279*f232836aSBrett Creeley dev_err(&pdev->dev, "Failed to alloc dirty bitmaps: %pe\n", 280*f232836aSBrett Creeley ERR_PTR(err)); 281*f232836aSBrett Creeley goto out_free_region_info; 282*f232836aSBrett Creeley } 283*f232836aSBrett Creeley 284*f232836aSBrett Creeley err = pds_vfio_dirty_alloc_sgl(pds_vfio, page_count); 285*f232836aSBrett Creeley if (err) { 286*f232836aSBrett Creeley dev_err(&pdev->dev, "Failed to alloc dirty sg lists: %pe\n", 287*f232836aSBrett Creeley ERR_PTR(err)); 288*f232836aSBrett Creeley goto out_free_bitmaps; 289*f232836aSBrett Creeley } 290*f232836aSBrett Creeley 291*f232836aSBrett Creeley dirty->region_start = region_start; 292*f232836aSBrett Creeley dirty->region_size = region_size; 293*f232836aSBrett Creeley dirty->region_page_size = region_page_size; 294*f232836aSBrett Creeley pds_vfio_dirty_set_enabled(pds_vfio); 295*f232836aSBrett Creeley 296*f232836aSBrett Creeley pds_vfio_print_guest_region_info(pds_vfio, max_regions); 297*f232836aSBrett Creeley 298*f232836aSBrett Creeley kfree(region_info); 299*f232836aSBrett Creeley 300*f232836aSBrett Creeley return 0; 301*f232836aSBrett Creeley 302*f232836aSBrett Creeley out_free_bitmaps: 303*f232836aSBrett Creeley pds_vfio_dirty_free_bitmaps(dirty); 304*f232836aSBrett Creeley out_free_region_info: 305*f232836aSBrett Creeley kfree(region_info); 306*f232836aSBrett Creeley return err; 307*f232836aSBrett Creeley } 308*f232836aSBrett Creeley 309*f232836aSBrett Creeley void pds_vfio_dirty_disable(struct pds_vfio_pci_device *pds_vfio, bool send_cmd) 310*f232836aSBrett Creeley { 311*f232836aSBrett Creeley if (pds_vfio_dirty_is_enabled(pds_vfio)) { 312*f232836aSBrett Creeley pds_vfio_dirty_set_disabled(pds_vfio); 313*f232836aSBrett Creeley if (send_cmd) 314*f232836aSBrett Creeley pds_vfio_dirty_disable_cmd(pds_vfio); 315*f232836aSBrett Creeley pds_vfio_dirty_free_sgl(pds_vfio); 316*f232836aSBrett Creeley pds_vfio_dirty_free_bitmaps(&pds_vfio->dirty); 317*f232836aSBrett Creeley } 318*f232836aSBrett Creeley 319*f232836aSBrett Creeley if (send_cmd) 320*f232836aSBrett Creeley pds_vfio_send_host_vf_lm_status_cmd(pds_vfio, PDS_LM_STA_NONE); 321*f232836aSBrett Creeley } 322*f232836aSBrett Creeley 323*f232836aSBrett Creeley static int pds_vfio_dirty_seq_ack(struct pds_vfio_pci_device *pds_vfio, 324*f232836aSBrett Creeley struct pds_vfio_bmp_info *bmp_info, 325*f232836aSBrett Creeley u32 offset, u32 bmp_bytes, bool read_seq) 326*f232836aSBrett Creeley { 327*f232836aSBrett Creeley const char *bmp_type_str = read_seq ? "read_seq" : "write_ack"; 328*f232836aSBrett Creeley u8 dma_dir = read_seq ? DMA_FROM_DEVICE : DMA_TO_DEVICE; 329*f232836aSBrett Creeley struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev; 330*f232836aSBrett Creeley struct device *pdsc_dev = &pci_physfn(pdev)->dev; 331*f232836aSBrett Creeley unsigned long long npages; 332*f232836aSBrett Creeley struct sg_table sg_table; 333*f232836aSBrett Creeley struct scatterlist *sg; 334*f232836aSBrett Creeley struct page **pages; 335*f232836aSBrett Creeley u32 page_offset; 336*f232836aSBrett Creeley const void *bmp; 337*f232836aSBrett Creeley size_t size; 338*f232836aSBrett Creeley u16 num_sge; 339*f232836aSBrett Creeley int err; 340*f232836aSBrett Creeley int i; 341*f232836aSBrett Creeley 342*f232836aSBrett Creeley bmp = (void *)((u64)bmp_info->bmp + offset); 343*f232836aSBrett Creeley page_offset = offset_in_page(bmp); 344*f232836aSBrett Creeley bmp -= page_offset; 345*f232836aSBrett Creeley 346*f232836aSBrett Creeley /* 347*f232836aSBrett Creeley * Start and end of bitmap section to seq/ack might not be page 348*f232836aSBrett Creeley * aligned, so use the page_offset to account for that so there 349*f232836aSBrett Creeley * will be enough pages to represent the bmp_bytes 350*f232836aSBrett Creeley */ 351*f232836aSBrett Creeley npages = DIV_ROUND_UP_ULL(bmp_bytes + page_offset, PAGE_SIZE); 352*f232836aSBrett Creeley pages = kmalloc_array(npages, sizeof(*pages), GFP_KERNEL); 353*f232836aSBrett Creeley if (!pages) 354*f232836aSBrett Creeley return -ENOMEM; 355*f232836aSBrett Creeley 356*f232836aSBrett Creeley for (unsigned long long i = 0; i < npages; i++) { 357*f232836aSBrett Creeley struct page *page = vmalloc_to_page(bmp); 358*f232836aSBrett Creeley 359*f232836aSBrett Creeley if (!page) { 360*f232836aSBrett Creeley err = -EFAULT; 361*f232836aSBrett Creeley goto out_free_pages; 362*f232836aSBrett Creeley } 363*f232836aSBrett Creeley 364*f232836aSBrett Creeley pages[i] = page; 365*f232836aSBrett Creeley bmp += PAGE_SIZE; 366*f232836aSBrett Creeley } 367*f232836aSBrett Creeley 368*f232836aSBrett Creeley err = sg_alloc_table_from_pages(&sg_table, pages, npages, page_offset, 369*f232836aSBrett Creeley bmp_bytes, GFP_KERNEL); 370*f232836aSBrett Creeley if (err) 371*f232836aSBrett Creeley goto out_free_pages; 372*f232836aSBrett Creeley 373*f232836aSBrett Creeley err = dma_map_sgtable(pdsc_dev, &sg_table, dma_dir, 0); 374*f232836aSBrett Creeley if (err) 375*f232836aSBrett Creeley goto out_free_sg_table; 376*f232836aSBrett Creeley 377*f232836aSBrett Creeley for_each_sgtable_dma_sg(&sg_table, sg, i) { 378*f232836aSBrett Creeley struct pds_lm_sg_elem *sg_elem = &bmp_info->sgl[i]; 379*f232836aSBrett Creeley 380*f232836aSBrett Creeley sg_elem->addr = cpu_to_le64(sg_dma_address(sg)); 381*f232836aSBrett Creeley sg_elem->len = cpu_to_le32(sg_dma_len(sg)); 382*f232836aSBrett Creeley } 383*f232836aSBrett Creeley 384*f232836aSBrett Creeley num_sge = sg_table.nents; 385*f232836aSBrett Creeley size = num_sge * sizeof(struct pds_lm_sg_elem); 386*f232836aSBrett Creeley dma_sync_single_for_device(pdsc_dev, bmp_info->sgl_addr, size, dma_dir); 387*f232836aSBrett Creeley err = pds_vfio_dirty_seq_ack_cmd(pds_vfio, bmp_info->sgl_addr, num_sge, 388*f232836aSBrett Creeley offset, bmp_bytes, read_seq); 389*f232836aSBrett Creeley if (err) 390*f232836aSBrett Creeley dev_err(&pdev->dev, 391*f232836aSBrett Creeley "Dirty bitmap %s failed offset %u bmp_bytes %u num_sge %u DMA 0x%llx: %pe\n", 392*f232836aSBrett Creeley bmp_type_str, offset, bmp_bytes, 393*f232836aSBrett Creeley num_sge, bmp_info->sgl_addr, ERR_PTR(err)); 394*f232836aSBrett Creeley dma_sync_single_for_cpu(pdsc_dev, bmp_info->sgl_addr, size, dma_dir); 395*f232836aSBrett Creeley 396*f232836aSBrett Creeley dma_unmap_sgtable(pdsc_dev, &sg_table, dma_dir, 0); 397*f232836aSBrett Creeley out_free_sg_table: 398*f232836aSBrett Creeley sg_free_table(&sg_table); 399*f232836aSBrett Creeley out_free_pages: 400*f232836aSBrett Creeley kfree(pages); 401*f232836aSBrett Creeley 402*f232836aSBrett Creeley return err; 403*f232836aSBrett Creeley } 404*f232836aSBrett Creeley 405*f232836aSBrett Creeley static int pds_vfio_dirty_write_ack(struct pds_vfio_pci_device *pds_vfio, 406*f232836aSBrett Creeley u32 offset, u32 len) 407*f232836aSBrett Creeley { 408*f232836aSBrett Creeley return pds_vfio_dirty_seq_ack(pds_vfio, &pds_vfio->dirty.host_ack, 409*f232836aSBrett Creeley offset, len, WRITE_ACK); 410*f232836aSBrett Creeley } 411*f232836aSBrett Creeley 412*f232836aSBrett Creeley static int pds_vfio_dirty_read_seq(struct pds_vfio_pci_device *pds_vfio, 413*f232836aSBrett Creeley u32 offset, u32 len) 414*f232836aSBrett Creeley { 415*f232836aSBrett Creeley return pds_vfio_dirty_seq_ack(pds_vfio, &pds_vfio->dirty.host_seq, 416*f232836aSBrett Creeley offset, len, READ_SEQ); 417*f232836aSBrett Creeley } 418*f232836aSBrett Creeley 419*f232836aSBrett Creeley static int pds_vfio_dirty_process_bitmaps(struct pds_vfio_pci_device *pds_vfio, 420*f232836aSBrett Creeley struct iova_bitmap *dirty_bitmap, 421*f232836aSBrett Creeley u32 bmp_offset, u32 len_bytes) 422*f232836aSBrett Creeley { 423*f232836aSBrett Creeley u64 page_size = pds_vfio->dirty.region_page_size; 424*f232836aSBrett Creeley u64 region_start = pds_vfio->dirty.region_start; 425*f232836aSBrett Creeley u32 bmp_offset_bit; 426*f232836aSBrett Creeley __le64 *seq, *ack; 427*f232836aSBrett Creeley int dword_count; 428*f232836aSBrett Creeley 429*f232836aSBrett Creeley dword_count = len_bytes / sizeof(u64); 430*f232836aSBrett Creeley seq = (__le64 *)((u64)pds_vfio->dirty.host_seq.bmp + bmp_offset); 431*f232836aSBrett Creeley ack = (__le64 *)((u64)pds_vfio->dirty.host_ack.bmp + bmp_offset); 432*f232836aSBrett Creeley bmp_offset_bit = bmp_offset * 8; 433*f232836aSBrett Creeley 434*f232836aSBrett Creeley for (int i = 0; i < dword_count; i++) { 435*f232836aSBrett Creeley u64 xor = le64_to_cpu(seq[i]) ^ le64_to_cpu(ack[i]); 436*f232836aSBrett Creeley 437*f232836aSBrett Creeley /* prepare for next write_ack call */ 438*f232836aSBrett Creeley ack[i] = seq[i]; 439*f232836aSBrett Creeley 440*f232836aSBrett Creeley for (u8 bit_i = 0; bit_i < BITS_PER_TYPE(u64); ++bit_i) { 441*f232836aSBrett Creeley if (xor & BIT(bit_i)) { 442*f232836aSBrett Creeley u64 abs_bit_i = bmp_offset_bit + 443*f232836aSBrett Creeley i * BITS_PER_TYPE(u64) + bit_i; 444*f232836aSBrett Creeley u64 addr = abs_bit_i * page_size + region_start; 445*f232836aSBrett Creeley 446*f232836aSBrett Creeley iova_bitmap_set(dirty_bitmap, addr, page_size); 447*f232836aSBrett Creeley } 448*f232836aSBrett Creeley } 449*f232836aSBrett Creeley } 450*f232836aSBrett Creeley 451*f232836aSBrett Creeley return 0; 452*f232836aSBrett Creeley } 453*f232836aSBrett Creeley 454*f232836aSBrett Creeley static int pds_vfio_dirty_sync(struct pds_vfio_pci_device *pds_vfio, 455*f232836aSBrett Creeley struct iova_bitmap *dirty_bitmap, 456*f232836aSBrett Creeley unsigned long iova, unsigned long length) 457*f232836aSBrett Creeley { 458*f232836aSBrett Creeley struct device *dev = &pds_vfio->vfio_coredev.pdev->dev; 459*f232836aSBrett Creeley struct pds_vfio_dirty *dirty = &pds_vfio->dirty; 460*f232836aSBrett Creeley u64 bmp_offset, bmp_bytes; 461*f232836aSBrett Creeley u64 bitmap_size, pages; 462*f232836aSBrett Creeley int err; 463*f232836aSBrett Creeley 464*f232836aSBrett Creeley dev_dbg(dev, "vf%u: Get dirty page bitmap\n", pds_vfio->vf_id); 465*f232836aSBrett Creeley 466*f232836aSBrett Creeley if (!pds_vfio_dirty_is_enabled(pds_vfio)) { 467*f232836aSBrett Creeley dev_err(dev, "vf%u: Sync failed, dirty tracking is disabled\n", 468*f232836aSBrett Creeley pds_vfio->vf_id); 469*f232836aSBrett Creeley return -EINVAL; 470*f232836aSBrett Creeley } 471*f232836aSBrett Creeley 472*f232836aSBrett Creeley pages = DIV_ROUND_UP(length, pds_vfio->dirty.region_page_size); 473*f232836aSBrett Creeley bitmap_size = 474*f232836aSBrett Creeley round_up(pages, sizeof(u64) * BITS_PER_BYTE) / BITS_PER_BYTE; 475*f232836aSBrett Creeley 476*f232836aSBrett Creeley dev_dbg(dev, 477*f232836aSBrett Creeley "vf%u: iova 0x%lx length %lu page_size %llu pages %llu bitmap_size %llu\n", 478*f232836aSBrett Creeley pds_vfio->vf_id, iova, length, pds_vfio->dirty.region_page_size, 479*f232836aSBrett Creeley pages, bitmap_size); 480*f232836aSBrett Creeley 481*f232836aSBrett Creeley if (!length || ((dirty->region_start + iova + length) > 482*f232836aSBrett Creeley (dirty->region_start + dirty->region_size))) { 483*f232836aSBrett Creeley dev_err(dev, "Invalid iova 0x%lx and/or length 0x%lx to sync\n", 484*f232836aSBrett Creeley iova, length); 485*f232836aSBrett Creeley return -EINVAL; 486*f232836aSBrett Creeley } 487*f232836aSBrett Creeley 488*f232836aSBrett Creeley /* bitmap is modified in 64 bit chunks */ 489*f232836aSBrett Creeley bmp_bytes = ALIGN(DIV_ROUND_UP(length / dirty->region_page_size, 490*f232836aSBrett Creeley sizeof(u64)), 491*f232836aSBrett Creeley sizeof(u64)); 492*f232836aSBrett Creeley if (bmp_bytes != bitmap_size) { 493*f232836aSBrett Creeley dev_err(dev, 494*f232836aSBrett Creeley "Calculated bitmap bytes %llu not equal to bitmap size %llu\n", 495*f232836aSBrett Creeley bmp_bytes, bitmap_size); 496*f232836aSBrett Creeley return -EINVAL; 497*f232836aSBrett Creeley } 498*f232836aSBrett Creeley 499*f232836aSBrett Creeley bmp_offset = DIV_ROUND_UP(iova / dirty->region_page_size, sizeof(u64)); 500*f232836aSBrett Creeley 501*f232836aSBrett Creeley dev_dbg(dev, 502*f232836aSBrett Creeley "Syncing dirty bitmap, iova 0x%lx length 0x%lx, bmp_offset %llu bmp_bytes %llu\n", 503*f232836aSBrett Creeley iova, length, bmp_offset, bmp_bytes); 504*f232836aSBrett Creeley 505*f232836aSBrett Creeley err = pds_vfio_dirty_read_seq(pds_vfio, bmp_offset, bmp_bytes); 506*f232836aSBrett Creeley if (err) 507*f232836aSBrett Creeley return err; 508*f232836aSBrett Creeley 509*f232836aSBrett Creeley err = pds_vfio_dirty_process_bitmaps(pds_vfio, dirty_bitmap, bmp_offset, 510*f232836aSBrett Creeley bmp_bytes); 511*f232836aSBrett Creeley if (err) 512*f232836aSBrett Creeley return err; 513*f232836aSBrett Creeley 514*f232836aSBrett Creeley err = pds_vfio_dirty_write_ack(pds_vfio, bmp_offset, bmp_bytes); 515*f232836aSBrett Creeley if (err) 516*f232836aSBrett Creeley return err; 517*f232836aSBrett Creeley 518*f232836aSBrett Creeley return 0; 519*f232836aSBrett Creeley } 520*f232836aSBrett Creeley 521*f232836aSBrett Creeley int pds_vfio_dma_logging_report(struct vfio_device *vdev, unsigned long iova, 522*f232836aSBrett Creeley unsigned long length, struct iova_bitmap *dirty) 523*f232836aSBrett Creeley { 524*f232836aSBrett Creeley struct pds_vfio_pci_device *pds_vfio = 525*f232836aSBrett Creeley container_of(vdev, struct pds_vfio_pci_device, 526*f232836aSBrett Creeley vfio_coredev.vdev); 527*f232836aSBrett Creeley int err; 528*f232836aSBrett Creeley 529*f232836aSBrett Creeley mutex_lock(&pds_vfio->state_mutex); 530*f232836aSBrett Creeley err = pds_vfio_dirty_sync(pds_vfio, dirty, iova, length); 531*f232836aSBrett Creeley pds_vfio_state_mutex_unlock(pds_vfio); 532*f232836aSBrett Creeley 533*f232836aSBrett Creeley return err; 534*f232836aSBrett Creeley } 535*f232836aSBrett Creeley 536*f232836aSBrett Creeley int pds_vfio_dma_logging_start(struct vfio_device *vdev, 537*f232836aSBrett Creeley struct rb_root_cached *ranges, u32 nnodes, 538*f232836aSBrett Creeley u64 *page_size) 539*f232836aSBrett Creeley { 540*f232836aSBrett Creeley struct pds_vfio_pci_device *pds_vfio = 541*f232836aSBrett Creeley container_of(vdev, struct pds_vfio_pci_device, 542*f232836aSBrett Creeley vfio_coredev.vdev); 543*f232836aSBrett Creeley int err; 544*f232836aSBrett Creeley 545*f232836aSBrett Creeley mutex_lock(&pds_vfio->state_mutex); 546*f232836aSBrett Creeley pds_vfio_send_host_vf_lm_status_cmd(pds_vfio, PDS_LM_STA_IN_PROGRESS); 547*f232836aSBrett Creeley err = pds_vfio_dirty_enable(pds_vfio, ranges, nnodes, page_size); 548*f232836aSBrett Creeley pds_vfio_state_mutex_unlock(pds_vfio); 549*f232836aSBrett Creeley 550*f232836aSBrett Creeley return err; 551*f232836aSBrett Creeley } 552*f232836aSBrett Creeley 553*f232836aSBrett Creeley int pds_vfio_dma_logging_stop(struct vfio_device *vdev) 554*f232836aSBrett Creeley { 555*f232836aSBrett Creeley struct pds_vfio_pci_device *pds_vfio = 556*f232836aSBrett Creeley container_of(vdev, struct pds_vfio_pci_device, 557*f232836aSBrett Creeley vfio_coredev.vdev); 558*f232836aSBrett Creeley 559*f232836aSBrett Creeley mutex_lock(&pds_vfio->state_mutex); 560*f232836aSBrett Creeley pds_vfio_dirty_disable(pds_vfio, true); 561*f232836aSBrett Creeley pds_vfio_state_mutex_unlock(pds_vfio); 562*f232836aSBrett Creeley 563*f232836aSBrett Creeley return 0; 564*f232836aSBrett Creeley } 565