xref: /openbmc/linux/drivers/gpu/drm/i915/gvt/gtt.c (revision a90c367e)
12707e444SZhi Wang /*
22707e444SZhi Wang  * GTT virtualization
32707e444SZhi Wang  *
42707e444SZhi Wang  * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
52707e444SZhi Wang  *
62707e444SZhi Wang  * Permission is hereby granted, free of charge, to any person obtaining a
72707e444SZhi Wang  * copy of this software and associated documentation files (the "Software"),
82707e444SZhi Wang  * to deal in the Software without restriction, including without limitation
92707e444SZhi Wang  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
102707e444SZhi Wang  * and/or sell copies of the Software, and to permit persons to whom the
112707e444SZhi Wang  * Software is furnished to do so, subject to the following conditions:
122707e444SZhi Wang  *
132707e444SZhi Wang  * The above copyright notice and this permission notice (including the next
142707e444SZhi Wang  * paragraph) shall be included in all copies or substantial portions of the
152707e444SZhi Wang  * Software.
162707e444SZhi Wang  *
172707e444SZhi Wang  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
182707e444SZhi Wang  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
192707e444SZhi Wang  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
202707e444SZhi Wang  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
212707e444SZhi Wang  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
222707e444SZhi Wang  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
232707e444SZhi Wang  * SOFTWARE.
242707e444SZhi Wang  *
252707e444SZhi Wang  * Authors:
262707e444SZhi Wang  *    Zhi Wang <zhi.a.wang@intel.com>
272707e444SZhi Wang  *    Zhenyu Wang <zhenyuw@linux.intel.com>
282707e444SZhi Wang  *    Xiao Zheng <xiao.zheng@intel.com>
292707e444SZhi Wang  *
302707e444SZhi Wang  * Contributors:
312707e444SZhi Wang  *    Min He <min.he@intel.com>
322707e444SZhi Wang  *    Bing Niu <bing.niu@intel.com>
332707e444SZhi Wang  *
342707e444SZhi Wang  */
352707e444SZhi Wang 
362707e444SZhi Wang #include "i915_drv.h"
37feddf6e8SZhenyu Wang #include "gvt.h"
38feddf6e8SZhenyu Wang #include "i915_pvinfo.h"
392707e444SZhi Wang #include "trace.h"
402707e444SZhi Wang 
410d6419e9SMatt Roper #include "gt/intel_gt_regs.h"
420d6419e9SMatt Roper 
43bc37ab56SChangbin Du #if defined(VERBOSE_DEBUG)
44bc37ab56SChangbin Du #define gvt_vdbg_mm(fmt, args...) gvt_dbg_mm(fmt, ##args)
45bc37ab56SChangbin Du #else
46bc37ab56SChangbin Du #define gvt_vdbg_mm(fmt, args...)
47bc37ab56SChangbin Du #endif
48bc37ab56SChangbin Du 
492707e444SZhi Wang static bool enable_out_of_sync = false;
502707e444SZhi Wang static int preallocated_oos_pages = 8192;
512707e444SZhi Wang 
522707e444SZhi Wang /*
532707e444SZhi Wang  * validate a gm address and related range size,
542707e444SZhi Wang  * translate it to host gm address
552707e444SZhi Wang  */
intel_gvt_ggtt_validate_range(struct intel_vgpu * vgpu,u64 addr,u32 size)562707e444SZhi Wang bool intel_gvt_ggtt_validate_range(struct intel_vgpu *vgpu, u64 addr, u32 size)
572707e444SZhi Wang {
585e0b3f3bSXiong Zhang 	if (size == 0)
595e0b3f3bSXiong Zhang 		return vgpu_gmadr_is_valid(vgpu, addr);
605e0b3f3bSXiong Zhang 
615e0b3f3bSXiong Zhang 	if (vgpu_gmadr_is_aperture(vgpu, addr) &&
625e0b3f3bSXiong Zhang 	    vgpu_gmadr_is_aperture(vgpu, addr + size - 1))
635e0b3f3bSXiong Zhang 		return true;
645e0b3f3bSXiong Zhang 	else if (vgpu_gmadr_is_hidden(vgpu, addr) &&
655e0b3f3bSXiong Zhang 		 vgpu_gmadr_is_hidden(vgpu, addr + size - 1))
665e0b3f3bSXiong Zhang 		return true;
675e0b3f3bSXiong Zhang 
685e0b3f3bSXiong Zhang 	gvt_dbg_mm("Invalid ggtt range at 0x%llx, size: 0x%x\n",
69695fbc08STina Zhang 		     addr, size);
702707e444SZhi Wang 	return false;
712707e444SZhi Wang }
722707e444SZhi Wang 
732707e444SZhi Wang /* translate a guest gmadr to host gmadr */
intel_gvt_ggtt_gmadr_g2h(struct intel_vgpu * vgpu,u64 g_addr,u64 * h_addr)742707e444SZhi Wang int intel_gvt_ggtt_gmadr_g2h(struct intel_vgpu *vgpu, u64 g_addr, u64 *h_addr)
752707e444SZhi Wang {
76a61ac1e7SChris Wilson 	struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
7712d58619SPankaj Bharadiya 
7812d58619SPankaj Bharadiya 	if (drm_WARN(&i915->drm, !vgpu_gmadr_is_valid(vgpu, g_addr),
792707e444SZhi Wang 		     "invalid guest gmadr %llx\n", g_addr))
802707e444SZhi Wang 		return -EACCES;
812707e444SZhi Wang 
822707e444SZhi Wang 	if (vgpu_gmadr_is_aperture(vgpu, g_addr))
832707e444SZhi Wang 		*h_addr = vgpu_aperture_gmadr_base(vgpu)
842707e444SZhi Wang 			  + (g_addr - vgpu_aperture_offset(vgpu));
852707e444SZhi Wang 	else
862707e444SZhi Wang 		*h_addr = vgpu_hidden_gmadr_base(vgpu)
872707e444SZhi Wang 			  + (g_addr - vgpu_hidden_offset(vgpu));
882707e444SZhi Wang 	return 0;
892707e444SZhi Wang }
902707e444SZhi Wang 
912707e444SZhi Wang /* translate a host gmadr to guest gmadr */
intel_gvt_ggtt_gmadr_h2g(struct intel_vgpu * vgpu,u64 h_addr,u64 * g_addr)922707e444SZhi Wang int intel_gvt_ggtt_gmadr_h2g(struct intel_vgpu *vgpu, u64 h_addr, u64 *g_addr)
932707e444SZhi Wang {
94a61ac1e7SChris Wilson 	struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
9512d58619SPankaj Bharadiya 
9612d58619SPankaj Bharadiya 	if (drm_WARN(&i915->drm, !gvt_gmadr_is_valid(vgpu->gvt, h_addr),
972707e444SZhi Wang 		     "invalid host gmadr %llx\n", h_addr))
982707e444SZhi Wang 		return -EACCES;
992707e444SZhi Wang 
1002707e444SZhi Wang 	if (gvt_gmadr_is_aperture(vgpu->gvt, h_addr))
1012707e444SZhi Wang 		*g_addr = vgpu_aperture_gmadr_base(vgpu)
1022707e444SZhi Wang 			+ (h_addr - gvt_aperture_gmadr_base(vgpu->gvt));
1032707e444SZhi Wang 	else
1042707e444SZhi Wang 		*g_addr = vgpu_hidden_gmadr_base(vgpu)
1052707e444SZhi Wang 			+ (h_addr - gvt_hidden_gmadr_base(vgpu->gvt));
1062707e444SZhi Wang 	return 0;
1072707e444SZhi Wang }
1082707e444SZhi Wang 
intel_gvt_ggtt_index_g2h(struct intel_vgpu * vgpu,unsigned long g_index,unsigned long * h_index)1092707e444SZhi Wang int intel_gvt_ggtt_index_g2h(struct intel_vgpu *vgpu, unsigned long g_index,
1102707e444SZhi Wang 			     unsigned long *h_index)
1112707e444SZhi Wang {
1122707e444SZhi Wang 	u64 h_addr;
1132707e444SZhi Wang 	int ret;
1142707e444SZhi Wang 
1159556e118SZhi Wang 	ret = intel_gvt_ggtt_gmadr_g2h(vgpu, g_index << I915_GTT_PAGE_SHIFT,
1162707e444SZhi Wang 				       &h_addr);
1172707e444SZhi Wang 	if (ret)
1182707e444SZhi Wang 		return ret;
1192707e444SZhi Wang 
1209556e118SZhi Wang 	*h_index = h_addr >> I915_GTT_PAGE_SHIFT;
1212707e444SZhi Wang 	return 0;
1222707e444SZhi Wang }
1232707e444SZhi Wang 
intel_gvt_ggtt_h2g_index(struct intel_vgpu * vgpu,unsigned long h_index,unsigned long * g_index)1242707e444SZhi Wang int intel_gvt_ggtt_h2g_index(struct intel_vgpu *vgpu, unsigned long h_index,
1252707e444SZhi Wang 			     unsigned long *g_index)
1262707e444SZhi Wang {
1272707e444SZhi Wang 	u64 g_addr;
1282707e444SZhi Wang 	int ret;
1292707e444SZhi Wang 
1309556e118SZhi Wang 	ret = intel_gvt_ggtt_gmadr_h2g(vgpu, h_index << I915_GTT_PAGE_SHIFT,
1312707e444SZhi Wang 				       &g_addr);
1322707e444SZhi Wang 	if (ret)
1332707e444SZhi Wang 		return ret;
1342707e444SZhi Wang 
1359556e118SZhi Wang 	*g_index = g_addr >> I915_GTT_PAGE_SHIFT;
1362707e444SZhi Wang 	return 0;
1372707e444SZhi Wang }
1382707e444SZhi Wang 
1392707e444SZhi Wang #define gtt_type_is_entry(type) \
1402707e444SZhi Wang 	(type > GTT_TYPE_INVALID && type < GTT_TYPE_PPGTT_ENTRY \
1412707e444SZhi Wang 	 && type != GTT_TYPE_PPGTT_PTE_ENTRY \
1422707e444SZhi Wang 	 && type != GTT_TYPE_PPGTT_ROOT_ENTRY)
1432707e444SZhi Wang 
1442707e444SZhi Wang #define gtt_type_is_pt(type) \
1452707e444SZhi Wang 	(type >= GTT_TYPE_PPGTT_PTE_PT && type < GTT_TYPE_MAX)
1462707e444SZhi Wang 
1472707e444SZhi Wang #define gtt_type_is_pte_pt(type) \
1482707e444SZhi Wang 	(type == GTT_TYPE_PPGTT_PTE_PT)
1492707e444SZhi Wang 
1502707e444SZhi Wang #define gtt_type_is_root_pointer(type) \
1512707e444SZhi Wang 	(gtt_type_is_entry(type) && type > GTT_TYPE_PPGTT_ROOT_ENTRY)
1522707e444SZhi Wang 
1532707e444SZhi Wang #define gtt_init_entry(e, t, p, v) do { \
1542707e444SZhi Wang 	(e)->type = t; \
1552707e444SZhi Wang 	(e)->pdev = p; \
1562707e444SZhi Wang 	memcpy(&(e)->val64, &v, sizeof(v)); \
1572707e444SZhi Wang } while (0)
1582707e444SZhi Wang 
1592707e444SZhi Wang /*
1602707e444SZhi Wang  * Mappings between GTT_TYPE* enumerations.
1612707e444SZhi Wang  * Following information can be found according to the given type:
1622707e444SZhi Wang  * - type of next level page table
1632707e444SZhi Wang  * - type of entry inside this level page table
1642707e444SZhi Wang  * - type of entry with PSE set
1652707e444SZhi Wang  *
1662707e444SZhi Wang  * If the given type doesn't have such a kind of information,
1672707e444SZhi Wang  * e.g. give a l4 root entry type, then request to get its PSE type,
1682707e444SZhi Wang  * give a PTE page table type, then request to get its next level page
1692707e444SZhi Wang  * table type, as we know l4 root entry doesn't have a PSE bit,
1702707e444SZhi Wang  * and a PTE page table doesn't have a next level page table type,
1712707e444SZhi Wang  * GTT_TYPE_INVALID will be returned. This is useful when traversing a
1722707e444SZhi Wang  * page table.
1732707e444SZhi Wang  */
1742707e444SZhi Wang 
1752707e444SZhi Wang struct gtt_type_table_entry {
1762707e444SZhi Wang 	int entry_type;
177054f4ebaSZhi Wang 	int pt_type;
1782707e444SZhi Wang 	int next_pt_type;
1792707e444SZhi Wang 	int pse_entry_type;
1802707e444SZhi Wang };
1812707e444SZhi Wang 
182054f4ebaSZhi Wang #define GTT_TYPE_TABLE_ENTRY(type, e_type, cpt_type, npt_type, pse_type) \
1832707e444SZhi Wang 	[type] = { \
1842707e444SZhi Wang 		.entry_type = e_type, \
185054f4ebaSZhi Wang 		.pt_type = cpt_type, \
1862707e444SZhi Wang 		.next_pt_type = npt_type, \
1872707e444SZhi Wang 		.pse_entry_type = pse_type, \
1882707e444SZhi Wang 	}
1892707e444SZhi Wang 
19038bd13a0SRikard Falkeborn static const struct gtt_type_table_entry gtt_type_table[] = {
1912707e444SZhi Wang 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L4_ENTRY,
1922707e444SZhi Wang 			GTT_TYPE_PPGTT_ROOT_L4_ENTRY,
193054f4ebaSZhi Wang 			GTT_TYPE_INVALID,
1942707e444SZhi Wang 			GTT_TYPE_PPGTT_PML4_PT,
1952707e444SZhi Wang 			GTT_TYPE_INVALID),
1962707e444SZhi Wang 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_PT,
1972707e444SZhi Wang 			GTT_TYPE_PPGTT_PML4_ENTRY,
198054f4ebaSZhi Wang 			GTT_TYPE_PPGTT_PML4_PT,
1992707e444SZhi Wang 			GTT_TYPE_PPGTT_PDP_PT,
2002707e444SZhi Wang 			GTT_TYPE_INVALID),
2012707e444SZhi Wang 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_ENTRY,
2022707e444SZhi Wang 			GTT_TYPE_PPGTT_PML4_ENTRY,
203054f4ebaSZhi Wang 			GTT_TYPE_PPGTT_PML4_PT,
2042707e444SZhi Wang 			GTT_TYPE_PPGTT_PDP_PT,
2052707e444SZhi Wang 			GTT_TYPE_INVALID),
2062707e444SZhi Wang 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_PT,
2072707e444SZhi Wang 			GTT_TYPE_PPGTT_PDP_ENTRY,
208054f4ebaSZhi Wang 			GTT_TYPE_PPGTT_PDP_PT,
2092707e444SZhi Wang 			GTT_TYPE_PPGTT_PDE_PT,
2102707e444SZhi Wang 			GTT_TYPE_PPGTT_PTE_1G_ENTRY),
2112707e444SZhi Wang 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L3_ENTRY,
2122707e444SZhi Wang 			GTT_TYPE_PPGTT_ROOT_L3_ENTRY,
213054f4ebaSZhi Wang 			GTT_TYPE_INVALID,
2142707e444SZhi Wang 			GTT_TYPE_PPGTT_PDE_PT,
2152707e444SZhi Wang 			GTT_TYPE_PPGTT_PTE_1G_ENTRY),
2162707e444SZhi Wang 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_ENTRY,
2172707e444SZhi Wang 			GTT_TYPE_PPGTT_PDP_ENTRY,
218054f4ebaSZhi Wang 			GTT_TYPE_PPGTT_PDP_PT,
2192707e444SZhi Wang 			GTT_TYPE_PPGTT_PDE_PT,
2202707e444SZhi Wang 			GTT_TYPE_PPGTT_PTE_1G_ENTRY),
2212707e444SZhi Wang 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_PT,
2222707e444SZhi Wang 			GTT_TYPE_PPGTT_PDE_ENTRY,
223054f4ebaSZhi Wang 			GTT_TYPE_PPGTT_PDE_PT,
2242707e444SZhi Wang 			GTT_TYPE_PPGTT_PTE_PT,
2252707e444SZhi Wang 			GTT_TYPE_PPGTT_PTE_2M_ENTRY),
2262707e444SZhi Wang 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_ENTRY,
2272707e444SZhi Wang 			GTT_TYPE_PPGTT_PDE_ENTRY,
228054f4ebaSZhi Wang 			GTT_TYPE_PPGTT_PDE_PT,
2292707e444SZhi Wang 			GTT_TYPE_PPGTT_PTE_PT,
2302707e444SZhi Wang 			GTT_TYPE_PPGTT_PTE_2M_ENTRY),
231b294657dSChangbin Du 	/* We take IPS bit as 'PSE' for PTE level. */
2322707e444SZhi Wang 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_PT,
2332707e444SZhi Wang 			GTT_TYPE_PPGTT_PTE_4K_ENTRY,
234054f4ebaSZhi Wang 			GTT_TYPE_PPGTT_PTE_PT,
2352707e444SZhi Wang 			GTT_TYPE_INVALID,
236b294657dSChangbin Du 			GTT_TYPE_PPGTT_PTE_64K_ENTRY),
2372707e444SZhi Wang 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_4K_ENTRY,
2382707e444SZhi Wang 			GTT_TYPE_PPGTT_PTE_4K_ENTRY,
239054f4ebaSZhi Wang 			GTT_TYPE_PPGTT_PTE_PT,
2402707e444SZhi Wang 			GTT_TYPE_INVALID,
241b294657dSChangbin Du 			GTT_TYPE_PPGTT_PTE_64K_ENTRY),
242b294657dSChangbin Du 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_64K_ENTRY,
243b294657dSChangbin Du 			GTT_TYPE_PPGTT_PTE_4K_ENTRY,
244b294657dSChangbin Du 			GTT_TYPE_PPGTT_PTE_PT,
245b294657dSChangbin Du 			GTT_TYPE_INVALID,
246b294657dSChangbin Du 			GTT_TYPE_PPGTT_PTE_64K_ENTRY),
2472707e444SZhi Wang 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_2M_ENTRY,
2482707e444SZhi Wang 			GTT_TYPE_PPGTT_PDE_ENTRY,
249054f4ebaSZhi Wang 			GTT_TYPE_PPGTT_PDE_PT,
2502707e444SZhi Wang 			GTT_TYPE_INVALID,
2512707e444SZhi Wang 			GTT_TYPE_PPGTT_PTE_2M_ENTRY),
2522707e444SZhi Wang 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_1G_ENTRY,
2532707e444SZhi Wang 			GTT_TYPE_PPGTT_PDP_ENTRY,
254054f4ebaSZhi Wang 			GTT_TYPE_PPGTT_PDP_PT,
2552707e444SZhi Wang 			GTT_TYPE_INVALID,
2562707e444SZhi Wang 			GTT_TYPE_PPGTT_PTE_1G_ENTRY),
2572707e444SZhi Wang 	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_GGTT_PTE,
2582707e444SZhi Wang 			GTT_TYPE_GGTT_PTE,
2592707e444SZhi Wang 			GTT_TYPE_INVALID,
260054f4ebaSZhi Wang 			GTT_TYPE_INVALID,
2612707e444SZhi Wang 			GTT_TYPE_INVALID),
2622707e444SZhi Wang };
2632707e444SZhi Wang 
get_next_pt_type(int type)2642707e444SZhi Wang static inline int get_next_pt_type(int type)
2652707e444SZhi Wang {
2662707e444SZhi Wang 	return gtt_type_table[type].next_pt_type;
2672707e444SZhi Wang }
2682707e444SZhi Wang 
get_entry_type(int type)2692707e444SZhi Wang static inline int get_entry_type(int type)
2702707e444SZhi Wang {
2712707e444SZhi Wang 	return gtt_type_table[type].entry_type;
2722707e444SZhi Wang }
2732707e444SZhi Wang 
get_pse_type(int type)2742707e444SZhi Wang static inline int get_pse_type(int type)
2752707e444SZhi Wang {
2762707e444SZhi Wang 	return gtt_type_table[type].pse_entry_type;
2772707e444SZhi Wang }
2782707e444SZhi Wang 
read_pte64(struct i915_ggtt * ggtt,unsigned long index)279a61ac1e7SChris Wilson static u64 read_pte64(struct i915_ggtt *ggtt, unsigned long index)
2802707e444SZhi Wang {
281a61ac1e7SChris Wilson 	void __iomem *addr = (gen8_pte_t __iomem *)ggtt->gsm + index;
2822707e444SZhi Wang 
283905a5035SChangbin Du 	return readq(addr);
2842707e444SZhi Wang }
2852707e444SZhi Wang 
ggtt_invalidate(struct intel_gt * gt)286a61ac1e7SChris Wilson static void ggtt_invalidate(struct intel_gt *gt)
287af2c6399SChuanxiao Dong {
288a61ac1e7SChris Wilson 	mmio_hw_access_pre(gt);
289a61ac1e7SChris Wilson 	intel_uncore_write(gt->uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
290a61ac1e7SChris Wilson 	mmio_hw_access_post(gt);
291af2c6399SChuanxiao Dong }
292af2c6399SChuanxiao Dong 
write_pte64(struct i915_ggtt * ggtt,unsigned long index,u64 pte)293a61ac1e7SChris Wilson static void write_pte64(struct i915_ggtt *ggtt, unsigned long index, u64 pte)
2942707e444SZhi Wang {
295a61ac1e7SChris Wilson 	void __iomem *addr = (gen8_pte_t __iomem *)ggtt->gsm + index;
2962707e444SZhi Wang 
2972707e444SZhi Wang 	writeq(pte, addr);
2982707e444SZhi Wang }
2992707e444SZhi Wang 
gtt_get_entry64(void * pt,struct intel_gvt_gtt_entry * e,unsigned long index,bool hypervisor_access,unsigned long gpa,struct intel_vgpu * vgpu)3004b2dbbc2SChangbin Du static inline int gtt_get_entry64(void *pt,
3012707e444SZhi Wang 		struct intel_gvt_gtt_entry *e,
3022707e444SZhi Wang 		unsigned long index, bool hypervisor_access, unsigned long gpa,
3032707e444SZhi Wang 		struct intel_vgpu *vgpu)
3042707e444SZhi Wang {
3052707e444SZhi Wang 	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
3062707e444SZhi Wang 	int ret;
3072707e444SZhi Wang 
3082707e444SZhi Wang 	if (WARN_ON(info->gtt_entry_size != 8))
3094b2dbbc2SChangbin Du 		return -EINVAL;
3102707e444SZhi Wang 
3112707e444SZhi Wang 	if (hypervisor_access) {
312e3d7640eSChristoph Hellwig 		ret = intel_gvt_read_gpa(vgpu, gpa +
3132707e444SZhi Wang 				(index << info->gtt_entry_size_shift),
3142707e444SZhi Wang 				&e->val64, 8);
3154b2dbbc2SChangbin Du 		if (WARN_ON(ret))
3164b2dbbc2SChangbin Du 			return ret;
3172707e444SZhi Wang 	} else if (!pt) {
318a61ac1e7SChris Wilson 		e->val64 = read_pte64(vgpu->gvt->gt->ggtt, index);
3192707e444SZhi Wang 	} else {
3202707e444SZhi Wang 		e->val64 = *((u64 *)pt + index);
3212707e444SZhi Wang 	}
3224b2dbbc2SChangbin Du 	return 0;
3232707e444SZhi Wang }
3242707e444SZhi Wang 
gtt_set_entry64(void * pt,struct intel_gvt_gtt_entry * e,unsigned long index,bool hypervisor_access,unsigned long gpa,struct intel_vgpu * vgpu)3254b2dbbc2SChangbin Du static inline int gtt_set_entry64(void *pt,
3262707e444SZhi Wang 		struct intel_gvt_gtt_entry *e,
3272707e444SZhi Wang 		unsigned long index, bool hypervisor_access, unsigned long gpa,
3282707e444SZhi Wang 		struct intel_vgpu *vgpu)
3292707e444SZhi Wang {
3302707e444SZhi Wang 	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
3312707e444SZhi Wang 	int ret;
3322707e444SZhi Wang 
3332707e444SZhi Wang 	if (WARN_ON(info->gtt_entry_size != 8))
3344b2dbbc2SChangbin Du 		return -EINVAL;
3352707e444SZhi Wang 
3362707e444SZhi Wang 	if (hypervisor_access) {
337e3d7640eSChristoph Hellwig 		ret = intel_gvt_write_gpa(vgpu, gpa +
3382707e444SZhi Wang 				(index << info->gtt_entry_size_shift),
3392707e444SZhi Wang 				&e->val64, 8);
3404b2dbbc2SChangbin Du 		if (WARN_ON(ret))
3414b2dbbc2SChangbin Du 			return ret;
3422707e444SZhi Wang 	} else if (!pt) {
343a61ac1e7SChris Wilson 		write_pte64(vgpu->gvt->gt->ggtt, index, e->val64);
3442707e444SZhi Wang 	} else {
3452707e444SZhi Wang 		*((u64 *)pt + index) = e->val64;
3462707e444SZhi Wang 	}
3474b2dbbc2SChangbin Du 	return 0;
3482707e444SZhi Wang }
3492707e444SZhi Wang 
3502707e444SZhi Wang #define GTT_HAW 46
3512707e444SZhi Wang 
352420fba78SChangbin Du #define ADDR_1G_MASK	GENMASK_ULL(GTT_HAW - 1, 30)
353420fba78SChangbin Du #define ADDR_2M_MASK	GENMASK_ULL(GTT_HAW - 1, 21)
354b294657dSChangbin Du #define ADDR_64K_MASK	GENMASK_ULL(GTT_HAW - 1, 16)
355420fba78SChangbin Du #define ADDR_4K_MASK	GENMASK_ULL(GTT_HAW - 1, 12)
3562707e444SZhi Wang 
35771634848SChangbin Du #define GTT_SPTE_FLAG_MASK GENMASK_ULL(62, 52)
35871634848SChangbin Du #define GTT_SPTE_FLAG_64K_SPLITED BIT(52) /* splited 64K gtt entry */
35971634848SChangbin Du 
3604c9414d7SChangbin Du #define GTT_64K_PTE_STRIDE 16
3614c9414d7SChangbin Du 
gen8_gtt_get_pfn(struct intel_gvt_gtt_entry * e)3622707e444SZhi Wang static unsigned long gen8_gtt_get_pfn(struct intel_gvt_gtt_entry *e)
3632707e444SZhi Wang {
3642707e444SZhi Wang 	unsigned long pfn;
3652707e444SZhi Wang 
3662707e444SZhi Wang 	if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY)
367d861ca23SChangbin Du 		pfn = (e->val64 & ADDR_1G_MASK) >> PAGE_SHIFT;
3682707e444SZhi Wang 	else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY)
369d861ca23SChangbin Du 		pfn = (e->val64 & ADDR_2M_MASK) >> PAGE_SHIFT;
370b294657dSChangbin Du 	else if (e->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY)
371b294657dSChangbin Du 		pfn = (e->val64 & ADDR_64K_MASK) >> PAGE_SHIFT;
3722707e444SZhi Wang 	else
373d861ca23SChangbin Du 		pfn = (e->val64 & ADDR_4K_MASK) >> PAGE_SHIFT;
3742707e444SZhi Wang 	return pfn;
3752707e444SZhi Wang }
3762707e444SZhi Wang 
gen8_gtt_set_pfn(struct intel_gvt_gtt_entry * e,unsigned long pfn)3772707e444SZhi Wang static void gen8_gtt_set_pfn(struct intel_gvt_gtt_entry *e, unsigned long pfn)
3782707e444SZhi Wang {
3792707e444SZhi Wang 	if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) {
3802707e444SZhi Wang 		e->val64 &= ~ADDR_1G_MASK;
381d861ca23SChangbin Du 		pfn &= (ADDR_1G_MASK >> PAGE_SHIFT);
3822707e444SZhi Wang 	} else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY) {
3832707e444SZhi Wang 		e->val64 &= ~ADDR_2M_MASK;
384d861ca23SChangbin Du 		pfn &= (ADDR_2M_MASK >> PAGE_SHIFT);
385b294657dSChangbin Du 	} else if (e->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY) {
386b294657dSChangbin Du 		e->val64 &= ~ADDR_64K_MASK;
387b294657dSChangbin Du 		pfn &= (ADDR_64K_MASK >> PAGE_SHIFT);
3882707e444SZhi Wang 	} else {
3892707e444SZhi Wang 		e->val64 &= ~ADDR_4K_MASK;
390d861ca23SChangbin Du 		pfn &= (ADDR_4K_MASK >> PAGE_SHIFT);
3912707e444SZhi Wang 	}
3922707e444SZhi Wang 
393d861ca23SChangbin Du 	e->val64 |= (pfn << PAGE_SHIFT);
3942707e444SZhi Wang }
3952707e444SZhi Wang 
gen8_gtt_test_pse(struct intel_gvt_gtt_entry * e)3962707e444SZhi Wang static bool gen8_gtt_test_pse(struct intel_gvt_gtt_entry *e)
3972707e444SZhi Wang {
39840b27176SChangbin Du 	return !!(e->val64 & _PAGE_PSE);
3992707e444SZhi Wang }
4002707e444SZhi Wang 
gen8_gtt_clear_pse(struct intel_gvt_gtt_entry * e)401c3e69763SChangbin Du static void gen8_gtt_clear_pse(struct intel_gvt_gtt_entry *e)
402c3e69763SChangbin Du {
403c3e69763SChangbin Du 	if (gen8_gtt_test_pse(e)) {
404c3e69763SChangbin Du 		switch (e->type) {
405c3e69763SChangbin Du 		case GTT_TYPE_PPGTT_PTE_2M_ENTRY:
406c3e69763SChangbin Du 			e->val64 &= ~_PAGE_PSE;
407c3e69763SChangbin Du 			e->type = GTT_TYPE_PPGTT_PDE_ENTRY;
408c3e69763SChangbin Du 			break;
409c3e69763SChangbin Du 		case GTT_TYPE_PPGTT_PTE_1G_ENTRY:
410c3e69763SChangbin Du 			e->type = GTT_TYPE_PPGTT_PDP_ENTRY;
411c3e69763SChangbin Du 			e->val64 &= ~_PAGE_PSE;
412c3e69763SChangbin Du 			break;
413c3e69763SChangbin Du 		default:
414c3e69763SChangbin Du 			WARN_ON(1);
415c3e69763SChangbin Du 		}
416c3e69763SChangbin Du 	}
417c3e69763SChangbin Du }
418c3e69763SChangbin Du 
gen8_gtt_test_ips(struct intel_gvt_gtt_entry * e)4196fd79378SChangbin Du static bool gen8_gtt_test_ips(struct intel_gvt_gtt_entry *e)
4206fd79378SChangbin Du {
4216fd79378SChangbin Du 	if (GEM_WARN_ON(e->type != GTT_TYPE_PPGTT_PDE_ENTRY))
4222707e444SZhi Wang 		return false;
4232707e444SZhi Wang 
4246fd79378SChangbin Du 	return !!(e->val64 & GEN8_PDE_IPS_64K);
4256fd79378SChangbin Du }
4262707e444SZhi Wang 
gen8_gtt_clear_ips(struct intel_gvt_gtt_entry * e)4276fd79378SChangbin Du static void gen8_gtt_clear_ips(struct intel_gvt_gtt_entry *e)
4286fd79378SChangbin Du {
4296fd79378SChangbin Du 	if (GEM_WARN_ON(e->type != GTT_TYPE_PPGTT_PDE_ENTRY))
4306fd79378SChangbin Du 		return;
4316fd79378SChangbin Du 
4326fd79378SChangbin Du 	e->val64 &= ~GEN8_PDE_IPS_64K;
4332707e444SZhi Wang }
4342707e444SZhi Wang 
gen8_gtt_test_present(struct intel_gvt_gtt_entry * e)4352707e444SZhi Wang static bool gen8_gtt_test_present(struct intel_gvt_gtt_entry *e)
4362707e444SZhi Wang {
4372707e444SZhi Wang 	/*
4382707e444SZhi Wang 	 * i915 writes PDP root pointer registers without present bit,
4392707e444SZhi Wang 	 * it also works, so we need to treat root pointer entry
4402707e444SZhi Wang 	 * specifically.
4412707e444SZhi Wang 	 */
4422707e444SZhi Wang 	if (e->type == GTT_TYPE_PPGTT_ROOT_L3_ENTRY
4432707e444SZhi Wang 			|| e->type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY)
4442707e444SZhi Wang 		return (e->val64 != 0);
4452707e444SZhi Wang 	else
4465f978167SMichael Cheng 		return (e->val64 & GEN8_PAGE_PRESENT);
4472707e444SZhi Wang }
4482707e444SZhi Wang 
gtt_entry_clear_present(struct intel_gvt_gtt_entry * e)4492707e444SZhi Wang static void gtt_entry_clear_present(struct intel_gvt_gtt_entry *e)
4502707e444SZhi Wang {
4515f978167SMichael Cheng 	e->val64 &= ~GEN8_PAGE_PRESENT;
4522707e444SZhi Wang }
4532707e444SZhi Wang 
gtt_entry_set_present(struct intel_gvt_gtt_entry * e)454655c64efSZhi Wang static void gtt_entry_set_present(struct intel_gvt_gtt_entry *e)
455655c64efSZhi Wang {
4565f978167SMichael Cheng 	e->val64 |= GEN8_PAGE_PRESENT;
4572707e444SZhi Wang }
4582707e444SZhi Wang 
gen8_gtt_test_64k_splited(struct intel_gvt_gtt_entry * e)45971634848SChangbin Du static bool gen8_gtt_test_64k_splited(struct intel_gvt_gtt_entry *e)
46071634848SChangbin Du {
46171634848SChangbin Du 	return !!(e->val64 & GTT_SPTE_FLAG_64K_SPLITED);
46271634848SChangbin Du }
46371634848SChangbin Du 
gen8_gtt_set_64k_splited(struct intel_gvt_gtt_entry * e)46471634848SChangbin Du static void gen8_gtt_set_64k_splited(struct intel_gvt_gtt_entry *e)
46571634848SChangbin Du {
46671634848SChangbin Du 	e->val64 |= GTT_SPTE_FLAG_64K_SPLITED;
46771634848SChangbin Du }
46871634848SChangbin Du 
gen8_gtt_clear_64k_splited(struct intel_gvt_gtt_entry * e)46971634848SChangbin Du static void gen8_gtt_clear_64k_splited(struct intel_gvt_gtt_entry *e)
47071634848SChangbin Du {
47171634848SChangbin Du 	e->val64 &= ~GTT_SPTE_FLAG_64K_SPLITED;
47271634848SChangbin Du }
47371634848SChangbin Du 
4742707e444SZhi Wang /*
4752707e444SZhi Wang  * Per-platform GMA routines.
4762707e444SZhi Wang  */
gma_to_ggtt_pte_index(unsigned long gma)4772707e444SZhi Wang static unsigned long gma_to_ggtt_pte_index(unsigned long gma)
4782707e444SZhi Wang {
4799556e118SZhi Wang 	unsigned long x = (gma >> I915_GTT_PAGE_SHIFT);
4802707e444SZhi Wang 
4812707e444SZhi Wang 	trace_gma_index(__func__, gma, x);
4822707e444SZhi Wang 	return x;
4832707e444SZhi Wang }
4842707e444SZhi Wang 
4852707e444SZhi Wang #define DEFINE_PPGTT_GMA_TO_INDEX(prefix, ename, exp) \
4862707e444SZhi Wang static unsigned long prefix##_gma_to_##ename##_index(unsigned long gma) \
4872707e444SZhi Wang { \
4882707e444SZhi Wang 	unsigned long x = (exp); \
4892707e444SZhi Wang 	trace_gma_index(__func__, gma, x); \
4902707e444SZhi Wang 	return x; \
4912707e444SZhi Wang }
4922707e444SZhi Wang 
4932707e444SZhi Wang DEFINE_PPGTT_GMA_TO_INDEX(gen8, pte, (gma >> 12 & 0x1ff));
4942707e444SZhi Wang DEFINE_PPGTT_GMA_TO_INDEX(gen8, pde, (gma >> 21 & 0x1ff));
4952707e444SZhi Wang DEFINE_PPGTT_GMA_TO_INDEX(gen8, l3_pdp, (gma >> 30 & 0x3));
4962707e444SZhi Wang DEFINE_PPGTT_GMA_TO_INDEX(gen8, l4_pdp, (gma >> 30 & 0x1ff));
4972707e444SZhi Wang DEFINE_PPGTT_GMA_TO_INDEX(gen8, pml4, (gma >> 39 & 0x1ff));
4982707e444SZhi Wang 
4995512445cSRikard Falkeborn static const struct intel_gvt_gtt_pte_ops gen8_gtt_pte_ops = {
5002707e444SZhi Wang 	.get_entry = gtt_get_entry64,
5012707e444SZhi Wang 	.set_entry = gtt_set_entry64,
5022707e444SZhi Wang 	.clear_present = gtt_entry_clear_present,
503655c64efSZhi Wang 	.set_present = gtt_entry_set_present,
5042707e444SZhi Wang 	.test_present = gen8_gtt_test_present,
5052707e444SZhi Wang 	.test_pse = gen8_gtt_test_pse,
506c3e69763SChangbin Du 	.clear_pse = gen8_gtt_clear_pse,
5076fd79378SChangbin Du 	.clear_ips = gen8_gtt_clear_ips,
5086fd79378SChangbin Du 	.test_ips = gen8_gtt_test_ips,
50971634848SChangbin Du 	.clear_64k_splited = gen8_gtt_clear_64k_splited,
51071634848SChangbin Du 	.set_64k_splited = gen8_gtt_set_64k_splited,
51171634848SChangbin Du 	.test_64k_splited = gen8_gtt_test_64k_splited,
5122707e444SZhi Wang 	.get_pfn = gen8_gtt_get_pfn,
5132707e444SZhi Wang 	.set_pfn = gen8_gtt_set_pfn,
5142707e444SZhi Wang };
5152707e444SZhi Wang 
516c41aadd2SRikard Falkeborn static const struct intel_gvt_gtt_gma_ops gen8_gtt_gma_ops = {
5172707e444SZhi Wang 	.gma_to_ggtt_pte_index = gma_to_ggtt_pte_index,
5182707e444SZhi Wang 	.gma_to_pte_index = gen8_gma_to_pte_index,
5192707e444SZhi Wang 	.gma_to_pde_index = gen8_gma_to_pde_index,
5202707e444SZhi Wang 	.gma_to_l3_pdp_index = gen8_gma_to_l3_pdp_index,
5212707e444SZhi Wang 	.gma_to_l4_pdp_index = gen8_gma_to_l4_pdp_index,
5222707e444SZhi Wang 	.gma_to_pml4_index = gen8_gma_to_pml4_index,
5232707e444SZhi Wang };
5242707e444SZhi Wang 
52540b27176SChangbin Du /* Update entry type per pse and ips bit. */
update_entry_type_for_real(const struct intel_gvt_gtt_pte_ops * pte_ops,struct intel_gvt_gtt_entry * entry,bool ips)5265512445cSRikard Falkeborn static void update_entry_type_for_real(const struct intel_gvt_gtt_pte_ops *pte_ops,
52740b27176SChangbin Du 	struct intel_gvt_gtt_entry *entry, bool ips)
52840b27176SChangbin Du {
52940b27176SChangbin Du 	switch (entry->type) {
53040b27176SChangbin Du 	case GTT_TYPE_PPGTT_PDE_ENTRY:
53140b27176SChangbin Du 	case GTT_TYPE_PPGTT_PDP_ENTRY:
53240b27176SChangbin Du 		if (pte_ops->test_pse(entry))
53340b27176SChangbin Du 			entry->type = get_pse_type(entry->type);
53440b27176SChangbin Du 		break;
53540b27176SChangbin Du 	case GTT_TYPE_PPGTT_PTE_4K_ENTRY:
53640b27176SChangbin Du 		if (ips)
53740b27176SChangbin Du 			entry->type = get_pse_type(entry->type);
53840b27176SChangbin Du 		break;
53940b27176SChangbin Du 	default:
54040b27176SChangbin Du 		GEM_BUG_ON(!gtt_type_is_entry(entry->type));
54140b27176SChangbin Du 	}
54240b27176SChangbin Du 
54340b27176SChangbin Du 	GEM_BUG_ON(entry->type == GTT_TYPE_INVALID);
54440b27176SChangbin Du }
54540b27176SChangbin Du 
5462707e444SZhi Wang /*
5472707e444SZhi Wang  * MM helpers.
5482707e444SZhi Wang  */
_ppgtt_get_root_entry(struct intel_vgpu_mm * mm,struct intel_gvt_gtt_entry * entry,unsigned long index,bool guest)5493aff3512SChangbin Du static void _ppgtt_get_root_entry(struct intel_vgpu_mm *mm,
5503aff3512SChangbin Du 		struct intel_gvt_gtt_entry *entry, unsigned long index,
5513aff3512SChangbin Du 		bool guest)
5522707e444SZhi Wang {
5535512445cSRikard Falkeborn 	const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
5542707e444SZhi Wang 
5553aff3512SChangbin Du 	GEM_BUG_ON(mm->type != INTEL_GVT_MM_PPGTT);
5562707e444SZhi Wang 
5573aff3512SChangbin Du 	entry->type = mm->ppgtt_mm.root_entry_type;
5583aff3512SChangbin Du 	pte_ops->get_entry(guest ? mm->ppgtt_mm.guest_pdps :
5593aff3512SChangbin Du 			   mm->ppgtt_mm.shadow_pdps,
5603aff3512SChangbin Du 			   entry, index, false, 0, mm->vgpu);
56140b27176SChangbin Du 	update_entry_type_for_real(pte_ops, entry, false);
5622707e444SZhi Wang }
5632707e444SZhi Wang 
ppgtt_get_guest_root_entry(struct intel_vgpu_mm * mm,struct intel_gvt_gtt_entry * entry,unsigned long index)5643aff3512SChangbin Du static inline void ppgtt_get_guest_root_entry(struct intel_vgpu_mm *mm,
5653aff3512SChangbin Du 		struct intel_gvt_gtt_entry *entry, unsigned long index)
5662707e444SZhi Wang {
5673aff3512SChangbin Du 	_ppgtt_get_root_entry(mm, entry, index, true);
5683aff3512SChangbin Du }
5692707e444SZhi Wang 
ppgtt_get_shadow_root_entry(struct intel_vgpu_mm * mm,struct intel_gvt_gtt_entry * entry,unsigned long index)5703aff3512SChangbin Du static inline void ppgtt_get_shadow_root_entry(struct intel_vgpu_mm *mm,
5713aff3512SChangbin Du 		struct intel_gvt_gtt_entry *entry, unsigned long index)
5723aff3512SChangbin Du {
5733aff3512SChangbin Du 	_ppgtt_get_root_entry(mm, entry, index, false);
5743aff3512SChangbin Du }
5753aff3512SChangbin Du 
_ppgtt_set_root_entry(struct intel_vgpu_mm * mm,struct intel_gvt_gtt_entry * entry,unsigned long index,bool guest)5763aff3512SChangbin Du static void _ppgtt_set_root_entry(struct intel_vgpu_mm *mm,
5773aff3512SChangbin Du 		struct intel_gvt_gtt_entry *entry, unsigned long index,
5783aff3512SChangbin Du 		bool guest)
5793aff3512SChangbin Du {
5805512445cSRikard Falkeborn 	const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
5813aff3512SChangbin Du 
5823aff3512SChangbin Du 	pte_ops->set_entry(guest ? mm->ppgtt_mm.guest_pdps :
5833aff3512SChangbin Du 			   mm->ppgtt_mm.shadow_pdps,
5843aff3512SChangbin Du 			   entry, index, false, 0, mm->vgpu);
5853aff3512SChangbin Du }
5863aff3512SChangbin Du 
ppgtt_set_shadow_root_entry(struct intel_vgpu_mm * mm,struct intel_gvt_gtt_entry * entry,unsigned long index)5873aff3512SChangbin Du static inline void ppgtt_set_shadow_root_entry(struct intel_vgpu_mm *mm,
5883aff3512SChangbin Du 		struct intel_gvt_gtt_entry *entry, unsigned long index)
5893aff3512SChangbin Du {
5903aff3512SChangbin Du 	_ppgtt_set_root_entry(mm, entry, index, false);
5913aff3512SChangbin Du }
5923aff3512SChangbin Du 
ggtt_get_guest_entry(struct intel_vgpu_mm * mm,struct intel_gvt_gtt_entry * entry,unsigned long index)5933aff3512SChangbin Du static void ggtt_get_guest_entry(struct intel_vgpu_mm *mm,
5943aff3512SChangbin Du 		struct intel_gvt_gtt_entry *entry, unsigned long index)
5953aff3512SChangbin Du {
5965512445cSRikard Falkeborn 	const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
5973aff3512SChangbin Du 
5983aff3512SChangbin Du 	GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
5993aff3512SChangbin Du 
6003aff3512SChangbin Du 	entry->type = GTT_TYPE_GGTT_PTE;
6013aff3512SChangbin Du 	pte_ops->get_entry(mm->ggtt_mm.virtual_ggtt, entry, index,
6023aff3512SChangbin Du 			   false, 0, mm->vgpu);
6033aff3512SChangbin Du }
6043aff3512SChangbin Du 
ggtt_set_guest_entry(struct intel_vgpu_mm * mm,struct intel_gvt_gtt_entry * entry,unsigned long index)6053aff3512SChangbin Du static void ggtt_set_guest_entry(struct intel_vgpu_mm *mm,
6063aff3512SChangbin Du 		struct intel_gvt_gtt_entry *entry, unsigned long index)
6073aff3512SChangbin Du {
6085512445cSRikard Falkeborn 	const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
6093aff3512SChangbin Du 
6103aff3512SChangbin Du 	GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
6113aff3512SChangbin Du 
6123aff3512SChangbin Du 	pte_ops->set_entry(mm->ggtt_mm.virtual_ggtt, entry, index,
6133aff3512SChangbin Du 			   false, 0, mm->vgpu);
6143aff3512SChangbin Du }
6153aff3512SChangbin Du 
ggtt_get_host_entry(struct intel_vgpu_mm * mm,struct intel_gvt_gtt_entry * entry,unsigned long index)6167598e870SChangbin Du static void ggtt_get_host_entry(struct intel_vgpu_mm *mm,
6177598e870SChangbin Du 		struct intel_gvt_gtt_entry *entry, unsigned long index)
6187598e870SChangbin Du {
6195512445cSRikard Falkeborn 	const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
6207598e870SChangbin Du 
6217598e870SChangbin Du 	GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
6227598e870SChangbin Du 
6237598e870SChangbin Du 	pte_ops->get_entry(NULL, entry, index, false, 0, mm->vgpu);
6247598e870SChangbin Du }
6257598e870SChangbin Du 
ggtt_set_host_entry(struct intel_vgpu_mm * mm,struct intel_gvt_gtt_entry * entry,unsigned long index)6263aff3512SChangbin Du static void ggtt_set_host_entry(struct intel_vgpu_mm *mm,
6273aff3512SChangbin Du 		struct intel_gvt_gtt_entry *entry, unsigned long index)
6283aff3512SChangbin Du {
6295512445cSRikard Falkeborn 	const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
6305f60b12eSColin Xu 	unsigned long offset = index;
6313aff3512SChangbin Du 
6323aff3512SChangbin Du 	GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
6333aff3512SChangbin Du 
6345f60b12eSColin Xu 	if (vgpu_gmadr_is_aperture(mm->vgpu, index << I915_GTT_PAGE_SHIFT)) {
6355f60b12eSColin Xu 		offset -= (vgpu_aperture_gmadr_base(mm->vgpu) >> PAGE_SHIFT);
6365f60b12eSColin Xu 		mm->ggtt_mm.host_ggtt_aperture[offset] = entry->val64;
6375f60b12eSColin Xu 	} else if (vgpu_gmadr_is_hidden(mm->vgpu, index << I915_GTT_PAGE_SHIFT)) {
6385f60b12eSColin Xu 		offset -= (vgpu_hidden_gmadr_base(mm->vgpu) >> PAGE_SHIFT);
6395f60b12eSColin Xu 		mm->ggtt_mm.host_ggtt_hidden[offset] = entry->val64;
6405f60b12eSColin Xu 	}
6415f60b12eSColin Xu 
6423aff3512SChangbin Du 	pte_ops->set_entry(NULL, entry, index, false, 0, mm->vgpu);
6432707e444SZhi Wang }
6442707e444SZhi Wang 
6452707e444SZhi Wang /*
6462707e444SZhi Wang  * PPGTT shadow page table helpers.
6472707e444SZhi Wang  */
ppgtt_spt_get_entry(struct intel_vgpu_ppgtt_spt * spt,void * page_table,int type,struct intel_gvt_gtt_entry * e,unsigned long index,bool guest)6484b2dbbc2SChangbin Du static inline int ppgtt_spt_get_entry(
6492707e444SZhi Wang 		struct intel_vgpu_ppgtt_spt *spt,
6502707e444SZhi Wang 		void *page_table, int type,
6512707e444SZhi Wang 		struct intel_gvt_gtt_entry *e, unsigned long index,
6522707e444SZhi Wang 		bool guest)
6532707e444SZhi Wang {
6542707e444SZhi Wang 	struct intel_gvt *gvt = spt->vgpu->gvt;
6555512445cSRikard Falkeborn 	const struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
6564b2dbbc2SChangbin Du 	int ret;
6572707e444SZhi Wang 
6582707e444SZhi Wang 	e->type = get_entry_type(type);
6592707e444SZhi Wang 
6602707e444SZhi Wang 	if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n"))
6614b2dbbc2SChangbin Du 		return -EINVAL;
6622707e444SZhi Wang 
6634b2dbbc2SChangbin Du 	ret = ops->get_entry(page_table, e, index, guest,
664e502a2afSChangbin Du 			spt->guest_page.gfn << I915_GTT_PAGE_SHIFT,
6652707e444SZhi Wang 			spt->vgpu);
6664b2dbbc2SChangbin Du 	if (ret)
6674b2dbbc2SChangbin Du 		return ret;
6684b2dbbc2SChangbin Du 
66940b27176SChangbin Du 	update_entry_type_for_real(ops, e, guest ?
67040b27176SChangbin Du 				   spt->guest_page.pde_ips : false);
671bc37ab56SChangbin Du 
672bc37ab56SChangbin Du 	gvt_vdbg_mm("read ppgtt entry, spt type %d, entry type %d, index %lu, value %llx\n",
673bc37ab56SChangbin Du 		    type, e->type, index, e->val64);
6744b2dbbc2SChangbin Du 	return 0;
6752707e444SZhi Wang }
6762707e444SZhi Wang 
ppgtt_spt_set_entry(struct intel_vgpu_ppgtt_spt * spt,void * page_table,int type,struct intel_gvt_gtt_entry * e,unsigned long index,bool guest)6774b2dbbc2SChangbin Du static inline int ppgtt_spt_set_entry(
6782707e444SZhi Wang 		struct intel_vgpu_ppgtt_spt *spt,
6792707e444SZhi Wang 		void *page_table, int type,
6802707e444SZhi Wang 		struct intel_gvt_gtt_entry *e, unsigned long index,
6812707e444SZhi Wang 		bool guest)
6822707e444SZhi Wang {
6832707e444SZhi Wang 	struct intel_gvt *gvt = spt->vgpu->gvt;
6845512445cSRikard Falkeborn 	const struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
6852707e444SZhi Wang 
6862707e444SZhi Wang 	if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n"))
6874b2dbbc2SChangbin Du 		return -EINVAL;
6882707e444SZhi Wang 
689bc37ab56SChangbin Du 	gvt_vdbg_mm("set ppgtt entry, spt type %d, entry type %d, index %lu, value %llx\n",
690bc37ab56SChangbin Du 		    type, e->type, index, e->val64);
691bc37ab56SChangbin Du 
6922707e444SZhi Wang 	return ops->set_entry(page_table, e, index, guest,
693e502a2afSChangbin Du 			spt->guest_page.gfn << I915_GTT_PAGE_SHIFT,
6942707e444SZhi Wang 			spt->vgpu);
6952707e444SZhi Wang }
6962707e444SZhi Wang 
6972707e444SZhi Wang #define ppgtt_get_guest_entry(spt, e, index) \
6982707e444SZhi Wang 	ppgtt_spt_get_entry(spt, NULL, \
69944b46733SChangbin Du 		spt->guest_page.type, e, index, true)
7002707e444SZhi Wang 
7012707e444SZhi Wang #define ppgtt_set_guest_entry(spt, e, index) \
7022707e444SZhi Wang 	ppgtt_spt_set_entry(spt, NULL, \
70344b46733SChangbin Du 		spt->guest_page.type, e, index, true)
7042707e444SZhi Wang 
7052707e444SZhi Wang #define ppgtt_get_shadow_entry(spt, e, index) \
7062707e444SZhi Wang 	ppgtt_spt_get_entry(spt, spt->shadow_page.vaddr, \
7072707e444SZhi Wang 		spt->shadow_page.type, e, index, false)
7082707e444SZhi Wang 
7092707e444SZhi Wang #define ppgtt_set_shadow_entry(spt, e, index) \
7102707e444SZhi Wang 	ppgtt_spt_set_entry(spt, spt->shadow_page.vaddr, \
7112707e444SZhi Wang 		spt->shadow_page.type, e, index, false)
7122707e444SZhi Wang 
alloc_spt(gfp_t gfp_mask)71344b46733SChangbin Du static void *alloc_spt(gfp_t gfp_mask)
7147d1e5cdfSZhi Wang {
71544b46733SChangbin Du 	struct intel_vgpu_ppgtt_spt *spt;
7167d1e5cdfSZhi Wang 
71744b46733SChangbin Du 	spt = kzalloc(sizeof(*spt), gfp_mask);
71844b46733SChangbin Du 	if (!spt)
71944b46733SChangbin Du 		return NULL;
7207d1e5cdfSZhi Wang 
72144b46733SChangbin Du 	spt->shadow_page.page = alloc_page(gfp_mask);
72244b46733SChangbin Du 	if (!spt->shadow_page.page) {
72344b46733SChangbin Du 		kfree(spt);
72444b46733SChangbin Du 		return NULL;
72544b46733SChangbin Du 	}
72644b46733SChangbin Du 	return spt;
7277d1e5cdfSZhi Wang }
7287d1e5cdfSZhi Wang 
free_spt(struct intel_vgpu_ppgtt_spt * spt)72944b46733SChangbin Du static void free_spt(struct intel_vgpu_ppgtt_spt *spt)
7307d1e5cdfSZhi Wang {
73144b46733SChangbin Du 	__free_page(spt->shadow_page.page);
73244b46733SChangbin Du 	kfree(spt);
7337d1e5cdfSZhi Wang }
7347d1e5cdfSZhi Wang 
7352707e444SZhi Wang static int detach_oos_page(struct intel_vgpu *vgpu,
7362707e444SZhi Wang 		struct intel_vgpu_oos_page *oos_page);
7372707e444SZhi Wang 
ppgtt_free_spt(struct intel_vgpu_ppgtt_spt * spt)738d87f5ff3SChangbin Du static void ppgtt_free_spt(struct intel_vgpu_ppgtt_spt *spt)
7392707e444SZhi Wang {
7409ff06c38SThomas Zimmermann 	struct device *kdev = spt->vgpu->gvt->gt->i915->drm.dev;
7412707e444SZhi Wang 
74244b46733SChangbin Du 	trace_spt_free(spt->vgpu->id, spt, spt->guest_page.type);
74344b46733SChangbin Du 
74444b46733SChangbin Du 	dma_unmap_page(kdev, spt->shadow_page.mfn << I915_GTT_PAGE_SHIFT, 4096,
745c4f61203SCai Huoqing 		       DMA_BIDIRECTIONAL);
746b6c126a3SChangbin Du 
747b6c126a3SChangbin Du 	radix_tree_delete(&spt->vgpu->gtt.spt_tree, spt->shadow_page.mfn);
74844b46733SChangbin Du 
749155521c9SChangbin Du 	if (spt->guest_page.gfn) {
75044b46733SChangbin Du 		if (spt->guest_page.oos_page)
75144b46733SChangbin Du 			detach_oos_page(spt->vgpu, spt->guest_page.oos_page);
75244b46733SChangbin Du 
753e502a2afSChangbin Du 		intel_vgpu_unregister_page_track(spt->vgpu, spt->guest_page.gfn);
754155521c9SChangbin Du 	}
75544b46733SChangbin Du 
7562707e444SZhi Wang 	list_del_init(&spt->post_shadow_list);
7572707e444SZhi Wang 	free_spt(spt);
7582707e444SZhi Wang }
7592707e444SZhi Wang 
ppgtt_free_all_spt(struct intel_vgpu * vgpu)760d87f5ff3SChangbin Du static void ppgtt_free_all_spt(struct intel_vgpu *vgpu)
7612707e444SZhi Wang {
762968a85b1SChris Wilson 	struct intel_vgpu_ppgtt_spt *spt, *spn;
763b6c126a3SChangbin Du 	struct radix_tree_iter iter;
764968a85b1SChris Wilson 	LIST_HEAD(all_spt);
765968a85b1SChris Wilson 	void __rcu **slot;
7662707e444SZhi Wang 
767968a85b1SChris Wilson 	rcu_read_lock();
768b6c126a3SChangbin Du 	radix_tree_for_each_slot(slot, &vgpu->gtt.spt_tree, &iter, 0) {
769b6c126a3SChangbin Du 		spt = radix_tree_deref_slot(slot);
770968a85b1SChris Wilson 		list_move(&spt->post_shadow_list, &all_spt);
7712707e444SZhi Wang 	}
772968a85b1SChris Wilson 	rcu_read_unlock();
773968a85b1SChris Wilson 
774968a85b1SChris Wilson 	list_for_each_entry_safe(spt, spn, &all_spt, post_shadow_list)
775968a85b1SChris Wilson 		ppgtt_free_spt(spt);
776b6c126a3SChangbin Du }
7772707e444SZhi Wang 
7787d1e5cdfSZhi Wang static int ppgtt_handle_guest_write_page_table_bytes(
77944b46733SChangbin Du 		struct intel_vgpu_ppgtt_spt *spt,
7802707e444SZhi Wang 		u64 pa, void *p_data, int bytes);
7812707e444SZhi Wang 
ppgtt_write_protection_handler(struct intel_vgpu_page_track * page_track,u64 gpa,void * data,int bytes)782e502a2afSChangbin Du static int ppgtt_write_protection_handler(
783e502a2afSChangbin Du 		struct intel_vgpu_page_track *page_track,
784e502a2afSChangbin Du 		u64 gpa, void *data, int bytes)
7852707e444SZhi Wang {
786e502a2afSChangbin Du 	struct intel_vgpu_ppgtt_spt *spt = page_track->priv_data;
787e502a2afSChangbin Du 
7882707e444SZhi Wang 	int ret;
7892707e444SZhi Wang 
7902707e444SZhi Wang 	if (bytes != 4 && bytes != 8)
7912707e444SZhi Wang 		return -EINVAL;
7922707e444SZhi Wang 
793e502a2afSChangbin Du 	ret = ppgtt_handle_guest_write_page_table_bytes(spt, gpa, data, bytes);
7942707e444SZhi Wang 	if (ret)
7952707e444SZhi Wang 		return ret;
7962707e444SZhi Wang 	return ret;
7972707e444SZhi Wang }
7982707e444SZhi Wang 
79944b46733SChangbin Du /* Find a spt by guest gfn. */
intel_vgpu_find_spt_by_gfn(struct intel_vgpu * vgpu,unsigned long gfn)80044b46733SChangbin Du static struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_gfn(
80144b46733SChangbin Du 		struct intel_vgpu *vgpu, unsigned long gfn)
80244b46733SChangbin Du {
80344b46733SChangbin Du 	struct intel_vgpu_page_track *track;
80444b46733SChangbin Du 
805e502a2afSChangbin Du 	track = intel_vgpu_find_page_track(vgpu, gfn);
806e502a2afSChangbin Du 	if (track && track->handler == ppgtt_write_protection_handler)
807e502a2afSChangbin Du 		return track->priv_data;
80844b46733SChangbin Du 
80944b46733SChangbin Du 	return NULL;
81044b46733SChangbin Du }
81144b46733SChangbin Du 
81244b46733SChangbin Du /* Find the spt by shadow page mfn. */
intel_vgpu_find_spt_by_mfn(struct intel_vgpu * vgpu,unsigned long mfn)813b6c126a3SChangbin Du static inline struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_mfn(
81444b46733SChangbin Du 		struct intel_vgpu *vgpu, unsigned long mfn)
81544b46733SChangbin Du {
816b6c126a3SChangbin Du 	return radix_tree_lookup(&vgpu->gtt.spt_tree, mfn);
81744b46733SChangbin Du }
81844b46733SChangbin Du 
819ede9d0cfSChangbin Du static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt);
8202707e444SZhi Wang 
821155521c9SChangbin Du /* Allocate shadow page table without guest page. */
ppgtt_alloc_spt(struct intel_vgpu * vgpu,enum intel_gvt_gtt_type type)822d87f5ff3SChangbin Du static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt(
8230cf8f58dSAleksei Gimbitskii 		struct intel_vgpu *vgpu, enum intel_gvt_gtt_type type)
8242707e444SZhi Wang {
8259ff06c38SThomas Zimmermann 	struct device *kdev = vgpu->gvt->gt->i915->drm.dev;
8262707e444SZhi Wang 	struct intel_vgpu_ppgtt_spt *spt = NULL;
82744b46733SChangbin Du 	dma_addr_t daddr;
828e502a2afSChangbin Du 	int ret;
8292707e444SZhi Wang 
8302707e444SZhi Wang retry:
8312707e444SZhi Wang 	spt = alloc_spt(GFP_KERNEL | __GFP_ZERO);
8322707e444SZhi Wang 	if (!spt) {
833ede9d0cfSChangbin Du 		if (reclaim_one_ppgtt_mm(vgpu->gvt))
8342707e444SZhi Wang 			goto retry;
8352707e444SZhi Wang 
836695fbc08STina Zhang 		gvt_vgpu_err("fail to allocate ppgtt shadow page\n");
8372707e444SZhi Wang 		return ERR_PTR(-ENOMEM);
8382707e444SZhi Wang 	}
8392707e444SZhi Wang 
8402707e444SZhi Wang 	spt->vgpu = vgpu;
8412707e444SZhi Wang 	atomic_set(&spt->refcount, 1);
8422707e444SZhi Wang 	INIT_LIST_HEAD(&spt->post_shadow_list);
8432707e444SZhi Wang 
8442707e444SZhi Wang 	/*
84544b46733SChangbin Du 	 * Init shadow_page.
8462707e444SZhi Wang 	 */
84744b46733SChangbin Du 	spt->shadow_page.type = type;
84844b46733SChangbin Du 	daddr = dma_map_page(kdev, spt->shadow_page.page,
849c4f61203SCai Huoqing 			     0, 4096, DMA_BIDIRECTIONAL);
85044b46733SChangbin Du 	if (dma_mapping_error(kdev, daddr)) {
85144b46733SChangbin Du 		gvt_vgpu_err("fail to map dma addr\n");
852b6c126a3SChangbin Du 		ret = -EINVAL;
853b6c126a3SChangbin Du 		goto err_free_spt;
8542707e444SZhi Wang 	}
85544b46733SChangbin Du 	spt->shadow_page.vaddr = page_address(spt->shadow_page.page);
85644b46733SChangbin Du 	spt->shadow_page.mfn = daddr >> I915_GTT_PAGE_SHIFT;
8572707e444SZhi Wang 
858155521c9SChangbin Du 	ret = radix_tree_insert(&vgpu->gtt.spt_tree, spt->shadow_page.mfn, spt);
859b6c126a3SChangbin Du 	if (ret)
860b6c126a3SChangbin Du 		goto err_unmap_dma;
86144b46733SChangbin Du 
8622707e444SZhi Wang 	return spt;
863b6c126a3SChangbin Du 
864b6c126a3SChangbin Du err_unmap_dma:
865c4f61203SCai Huoqing 	dma_unmap_page(kdev, daddr, PAGE_SIZE, DMA_BIDIRECTIONAL);
866b6c126a3SChangbin Du err_free_spt:
867b6c126a3SChangbin Du 	free_spt(spt);
868b6c126a3SChangbin Du 	return ERR_PTR(ret);
8692707e444SZhi Wang }
8702707e444SZhi Wang 
871155521c9SChangbin Du /* Allocate shadow page table associated with specific gfn. */
ppgtt_alloc_spt_gfn(struct intel_vgpu * vgpu,enum intel_gvt_gtt_type type,unsigned long gfn,bool guest_pde_ips)872155521c9SChangbin Du static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt_gfn(
8730cf8f58dSAleksei Gimbitskii 		struct intel_vgpu *vgpu, enum intel_gvt_gtt_type type,
874155521c9SChangbin Du 		unsigned long gfn, bool guest_pde_ips)
875155521c9SChangbin Du {
876155521c9SChangbin Du 	struct intel_vgpu_ppgtt_spt *spt;
877155521c9SChangbin Du 	int ret;
878155521c9SChangbin Du 
879155521c9SChangbin Du 	spt = ppgtt_alloc_spt(vgpu, type);
880155521c9SChangbin Du 	if (IS_ERR(spt))
881155521c9SChangbin Du 		return spt;
882155521c9SChangbin Du 
883155521c9SChangbin Du 	/*
884155521c9SChangbin Du 	 * Init guest_page.
885155521c9SChangbin Du 	 */
886155521c9SChangbin Du 	ret = intel_vgpu_register_page_track(vgpu, gfn,
887155521c9SChangbin Du 			ppgtt_write_protection_handler, spt);
888155521c9SChangbin Du 	if (ret) {
889155521c9SChangbin Du 		ppgtt_free_spt(spt);
890155521c9SChangbin Du 		return ERR_PTR(ret);
891155521c9SChangbin Du 	}
892155521c9SChangbin Du 
893155521c9SChangbin Du 	spt->guest_page.type = type;
894155521c9SChangbin Du 	spt->guest_page.gfn = gfn;
895155521c9SChangbin Du 	spt->guest_page.pde_ips = guest_pde_ips;
896155521c9SChangbin Du 
897155521c9SChangbin Du 	trace_spt_alloc(vgpu->id, spt, type, spt->shadow_page.mfn, gfn);
898155521c9SChangbin Du 
899155521c9SChangbin Du 	return spt;
900155521c9SChangbin Du }
901155521c9SChangbin Du 
9022707e444SZhi Wang #define pt_entry_size_shift(spt) \
9032707e444SZhi Wang 	((spt)->vgpu->gvt->device_info.gtt_entry_size_shift)
9042707e444SZhi Wang 
9052707e444SZhi Wang #define pt_entries(spt) \
9069556e118SZhi Wang 	(I915_GTT_PAGE_SIZE >> pt_entry_size_shift(spt))
9072707e444SZhi Wang 
9082707e444SZhi Wang #define for_each_present_guest_entry(spt, e, i) \
9094c9414d7SChangbin Du 	for (i = 0; i < pt_entries(spt); \
9104c9414d7SChangbin Du 	     i += spt->guest_page.pde_ips ? GTT_64K_PTE_STRIDE : 1) \
9114b2dbbc2SChangbin Du 		if (!ppgtt_get_guest_entry(spt, e, i) && \
9124b2dbbc2SChangbin Du 		    spt->vgpu->gvt->gtt.pte_ops->test_present(e))
9132707e444SZhi Wang 
9142707e444SZhi Wang #define for_each_present_shadow_entry(spt, e, i) \
9154c9414d7SChangbin Du 	for (i = 0; i < pt_entries(spt); \
9164c9414d7SChangbin Du 	     i += spt->shadow_page.pde_ips ? GTT_64K_PTE_STRIDE : 1) \
9174b2dbbc2SChangbin Du 		if (!ppgtt_get_shadow_entry(spt, e, i) && \
9184b2dbbc2SChangbin Du 		    spt->vgpu->gvt->gtt.pte_ops->test_present(e))
9192707e444SZhi Wang 
920b901b252SChangbin Du #define for_each_shadow_entry(spt, e, i) \
921b901b252SChangbin Du 	for (i = 0; i < pt_entries(spt); \
922b901b252SChangbin Du 	     i += (spt->shadow_page.pde_ips ? GTT_64K_PTE_STRIDE : 1)) \
923b901b252SChangbin Du 		if (!ppgtt_get_shadow_entry(spt, e, i))
924b901b252SChangbin Du 
ppgtt_get_spt(struct intel_vgpu_ppgtt_spt * spt)92580e76ea6SChangbin Du static inline void ppgtt_get_spt(struct intel_vgpu_ppgtt_spt *spt)
9262707e444SZhi Wang {
9272707e444SZhi Wang 	int v = atomic_read(&spt->refcount);
9282707e444SZhi Wang 
9292707e444SZhi Wang 	trace_spt_refcount(spt->vgpu->id, "inc", spt, v, (v + 1));
9302707e444SZhi Wang 	atomic_inc(&spt->refcount);
9312707e444SZhi Wang }
9322707e444SZhi Wang 
ppgtt_put_spt(struct intel_vgpu_ppgtt_spt * spt)93380e76ea6SChangbin Du static inline int ppgtt_put_spt(struct intel_vgpu_ppgtt_spt *spt)
93480e76ea6SChangbin Du {
93580e76ea6SChangbin Du 	int v = atomic_read(&spt->refcount);
93680e76ea6SChangbin Du 
93780e76ea6SChangbin Du 	trace_spt_refcount(spt->vgpu->id, "dec", spt, v, (v - 1));
93880e76ea6SChangbin Du 	return atomic_dec_return(&spt->refcount);
93980e76ea6SChangbin Du }
94080e76ea6SChangbin Du 
941d87f5ff3SChangbin Du static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt);
9422707e444SZhi Wang 
ppgtt_invalidate_spt_by_shadow_entry(struct intel_vgpu * vgpu,struct intel_gvt_gtt_entry * e)943d87f5ff3SChangbin Du static int ppgtt_invalidate_spt_by_shadow_entry(struct intel_vgpu *vgpu,
9442707e444SZhi Wang 		struct intel_gvt_gtt_entry *e)
9452707e444SZhi Wang {
946a61ac1e7SChris Wilson 	struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
9475512445cSRikard Falkeborn 	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
9482707e444SZhi Wang 	struct intel_vgpu_ppgtt_spt *s;
9490cf8f58dSAleksei Gimbitskii 	enum intel_gvt_gtt_type cur_pt_type;
9502707e444SZhi Wang 
95172f03d7eSChangbin Du 	GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(e->type)));
9522707e444SZhi Wang 
9533b6411c2SPing Gao 	if (e->type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY
9543b6411c2SPing Gao 		&& e->type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY) {
95527348cc9SAleksei Gimbitskii 		cur_pt_type = get_next_pt_type(e->type);
95627348cc9SAleksei Gimbitskii 
95727348cc9SAleksei Gimbitskii 		if (!gtt_type_is_pt(cur_pt_type) ||
95827348cc9SAleksei Gimbitskii 				!gtt_type_is_pt(cur_pt_type + 1)) {
95912d58619SPankaj Bharadiya 			drm_WARN(&i915->drm, 1,
96012d58619SPankaj Bharadiya 				 "Invalid page table type, cur_pt_type is: %d\n",
96112d58619SPankaj Bharadiya 				 cur_pt_type);
96227348cc9SAleksei Gimbitskii 			return -EINVAL;
96327348cc9SAleksei Gimbitskii 		}
96427348cc9SAleksei Gimbitskii 
96527348cc9SAleksei Gimbitskii 		cur_pt_type += 1;
96627348cc9SAleksei Gimbitskii 
9673b6411c2SPing Gao 		if (ops->get_pfn(e) ==
9683b6411c2SPing Gao 			vgpu->gtt.scratch_pt[cur_pt_type].page_mfn)
9692707e444SZhi Wang 			return 0;
9703b6411c2SPing Gao 	}
97144b46733SChangbin Du 	s = intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(e));
9722707e444SZhi Wang 	if (!s) {
973695fbc08STina Zhang 		gvt_vgpu_err("fail to find shadow page: mfn: 0x%lx\n",
974695fbc08STina Zhang 				ops->get_pfn(e));
9752707e444SZhi Wang 		return -ENXIO;
9762707e444SZhi Wang 	}
977d87f5ff3SChangbin Du 	return ppgtt_invalidate_spt(s);
9782707e444SZhi Wang }
9792707e444SZhi Wang 
ppgtt_invalidate_pte(struct intel_vgpu_ppgtt_spt * spt,struct intel_gvt_gtt_entry * entry)980cf4ee73fSChangbin Du static inline void ppgtt_invalidate_pte(struct intel_vgpu_ppgtt_spt *spt,
981cf4ee73fSChangbin Du 		struct intel_gvt_gtt_entry *entry)
982cf4ee73fSChangbin Du {
983cf4ee73fSChangbin Du 	struct intel_vgpu *vgpu = spt->vgpu;
9845512445cSRikard Falkeborn 	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
985cf4ee73fSChangbin Du 	unsigned long pfn;
986cf4ee73fSChangbin Du 	int type;
987cf4ee73fSChangbin Du 
988cf4ee73fSChangbin Du 	pfn = ops->get_pfn(entry);
989cf4ee73fSChangbin Du 	type = spt->shadow_page.type;
990cf4ee73fSChangbin Du 
991b901b252SChangbin Du 	/* Uninitialized spte or unshadowed spte. */
992b901b252SChangbin Du 	if (!pfn || pfn == vgpu->gtt.scratch_pt[type].page_mfn)
993cf4ee73fSChangbin Du 		return;
994cf4ee73fSChangbin Du 
9958398eee8SChristoph Hellwig 	intel_gvt_dma_unmap_guest_page(vgpu, pfn << PAGE_SHIFT);
996cf4ee73fSChangbin Du }
997cf4ee73fSChangbin Du 
ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt * spt)998d87f5ff3SChangbin Du static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt)
9992707e444SZhi Wang {
1000695fbc08STina Zhang 	struct intel_vgpu *vgpu = spt->vgpu;
10012707e444SZhi Wang 	struct intel_gvt_gtt_entry e;
10022707e444SZhi Wang 	unsigned long index;
10032707e444SZhi Wang 	int ret;
10042707e444SZhi Wang 
10052707e444SZhi Wang 	trace_spt_change(spt->vgpu->id, "die", spt,
100644b46733SChangbin Du 			spt->guest_page.gfn, spt->shadow_page.type);
10072707e444SZhi Wang 
100880e76ea6SChangbin Du 	if (ppgtt_put_spt(spt) > 0)
10092707e444SZhi Wang 		return 0;
10102707e444SZhi Wang 
10112707e444SZhi Wang 	for_each_present_shadow_entry(spt, &e, index) {
101272f03d7eSChangbin Du 		switch (e.type) {
101372f03d7eSChangbin Du 		case GTT_TYPE_PPGTT_PTE_4K_ENTRY:
101472f03d7eSChangbin Du 			gvt_vdbg_mm("invalidate 4K entry\n");
1015cf4ee73fSChangbin Du 			ppgtt_invalidate_pte(spt, &e);
1016cf4ee73fSChangbin Du 			break;
1017b294657dSChangbin Du 		case GTT_TYPE_PPGTT_PTE_64K_ENTRY:
1018eb3a3530SChangbin Du 			/* We don't setup 64K shadow entry so far. */
1019eb3a3530SChangbin Du 			WARN(1, "suspicious 64K gtt entry\n");
1020eb3a3530SChangbin Du 			continue;
102172f03d7eSChangbin Du 		case GTT_TYPE_PPGTT_PTE_2M_ENTRY:
1022b901b252SChangbin Du 			gvt_vdbg_mm("invalidate 2M entry\n");
1023b901b252SChangbin Du 			continue;
102472f03d7eSChangbin Du 		case GTT_TYPE_PPGTT_PTE_1G_ENTRY:
1025b901b252SChangbin Du 			WARN(1, "GVT doesn't support 1GB page\n");
102672f03d7eSChangbin Du 			continue;
102772f03d7eSChangbin Du 		case GTT_TYPE_PPGTT_PML4_ENTRY:
102872f03d7eSChangbin Du 		case GTT_TYPE_PPGTT_PDP_ENTRY:
102972f03d7eSChangbin Du 		case GTT_TYPE_PPGTT_PDE_ENTRY:
103072f03d7eSChangbin Du 			gvt_vdbg_mm("invalidate PMUL4/PDP/PDE entry\n");
1031d87f5ff3SChangbin Du 			ret = ppgtt_invalidate_spt_by_shadow_entry(
10322707e444SZhi Wang 					spt->vgpu, &e);
10332707e444SZhi Wang 			if (ret)
10342707e444SZhi Wang 				goto fail;
103572f03d7eSChangbin Du 			break;
103672f03d7eSChangbin Du 		default:
103772f03d7eSChangbin Du 			GEM_BUG_ON(1);
103872f03d7eSChangbin Du 		}
10392707e444SZhi Wang 	}
1040cf4ee73fSChangbin Du 
10412707e444SZhi Wang 	trace_spt_change(spt->vgpu->id, "release", spt,
104244b46733SChangbin Du 			 spt->guest_page.gfn, spt->shadow_page.type);
1043d87f5ff3SChangbin Du 	ppgtt_free_spt(spt);
10442707e444SZhi Wang 	return 0;
10452707e444SZhi Wang fail:
1046695fbc08STina Zhang 	gvt_vgpu_err("fail: shadow page %p shadow entry 0x%llx type %d\n",
1047695fbc08STina Zhang 			spt, e.val64, e.type);
10482707e444SZhi Wang 	return ret;
10492707e444SZhi Wang }
10502707e444SZhi Wang 
vgpu_ips_enabled(struct intel_vgpu * vgpu)105140b27176SChangbin Du static bool vgpu_ips_enabled(struct intel_vgpu *vgpu)
105240b27176SChangbin Du {
1053a61ac1e7SChris Wilson 	struct drm_i915_private *dev_priv = vgpu->gvt->gt->i915;
105440b27176SChangbin Du 
10555dae69a9SLucas De Marchi 	if (GRAPHICS_VER(dev_priv) == 9) {
105640b27176SChangbin Du 		u32 ips = vgpu_vreg_t(vgpu, GEN8_GAMW_ECO_DEV_RW_IA) &
105740b27176SChangbin Du 			GAMW_ECO_ENABLE_64K_IPS_FIELD;
105840b27176SChangbin Du 
105940b27176SChangbin Du 		return ips == GAMW_ECO_ENABLE_64K_IPS_FIELD;
1060d8d12312SLucas De Marchi 	} else if (GRAPHICS_VER(dev_priv) >= 11) {
106140b27176SChangbin Du 		/* 64K paging only controlled by IPS bit in PTE now. */
106240b27176SChangbin Du 		return true;
106340b27176SChangbin Du 	} else
106440b27176SChangbin Du 		return false;
106540b27176SChangbin Du }
106640b27176SChangbin Du 
1067d87f5ff3SChangbin Du static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt);
10682707e444SZhi Wang 
ppgtt_populate_spt_by_guest_entry(struct intel_vgpu * vgpu,struct intel_gvt_gtt_entry * we)1069d87f5ff3SChangbin Du static struct intel_vgpu_ppgtt_spt *ppgtt_populate_spt_by_guest_entry(
10702707e444SZhi Wang 		struct intel_vgpu *vgpu, struct intel_gvt_gtt_entry *we)
10712707e444SZhi Wang {
10725512445cSRikard Falkeborn 	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
107344b46733SChangbin Du 	struct intel_vgpu_ppgtt_spt *spt = NULL;
107440b27176SChangbin Du 	bool ips = false;
10752707e444SZhi Wang 	int ret;
10762707e444SZhi Wang 
107772f03d7eSChangbin Du 	GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(we->type)));
10782707e444SZhi Wang 
107940b27176SChangbin Du 	if (we->type == GTT_TYPE_PPGTT_PDE_ENTRY)
108040b27176SChangbin Du 		ips = vgpu_ips_enabled(vgpu) && ops->test_ips(we);
108140b27176SChangbin Du 
108244b46733SChangbin Du 	spt = intel_vgpu_find_spt_by_gfn(vgpu, ops->get_pfn(we));
108354c81653SChangbin Du 	if (spt) {
1084d87f5ff3SChangbin Du 		ppgtt_get_spt(spt);
108554c81653SChangbin Du 
108654c81653SChangbin Du 		if (ips != spt->guest_page.pde_ips) {
108754c81653SChangbin Du 			spt->guest_page.pde_ips = ips;
108854c81653SChangbin Du 
108954c81653SChangbin Du 			gvt_dbg_mm("reshadow PDE since ips changed\n");
109054c81653SChangbin Du 			clear_page(spt->shadow_page.vaddr);
109154c81653SChangbin Du 			ret = ppgtt_populate_spt(spt);
109280e76ea6SChangbin Du 			if (ret) {
109380e76ea6SChangbin Du 				ppgtt_put_spt(spt);
109480e76ea6SChangbin Du 				goto err;
109580e76ea6SChangbin Du 			}
109654c81653SChangbin Du 		}
109754c81653SChangbin Du 	} else {
10982707e444SZhi Wang 		int type = get_next_pt_type(we->type);
10992707e444SZhi Wang 
1100591c39ffSDan Carpenter 		if (!gtt_type_is_pt(type)) {
1101591c39ffSDan Carpenter 			ret = -EINVAL;
1102930c8dfeSAleksei Gimbitskii 			goto err;
1103591c39ffSDan Carpenter 		}
1104930c8dfeSAleksei Gimbitskii 
1105155521c9SChangbin Du 		spt = ppgtt_alloc_spt_gfn(vgpu, type, ops->get_pfn(we), ips);
110644b46733SChangbin Du 		if (IS_ERR(spt)) {
110744b46733SChangbin Du 			ret = PTR_ERR(spt);
110880e76ea6SChangbin Du 			goto err;
11092707e444SZhi Wang 		}
11102707e444SZhi Wang 
1111e502a2afSChangbin Du 		ret = intel_vgpu_enable_page_track(vgpu, spt->guest_page.gfn);
11122707e444SZhi Wang 		if (ret)
111380e76ea6SChangbin Du 			goto err_free_spt;
11142707e444SZhi Wang 
1115d87f5ff3SChangbin Du 		ret = ppgtt_populate_spt(spt);
11162707e444SZhi Wang 		if (ret)
111780e76ea6SChangbin Du 			goto err_free_spt;
11182707e444SZhi Wang 
111944b46733SChangbin Du 		trace_spt_change(vgpu->id, "new", spt, spt->guest_page.gfn,
112044b46733SChangbin Du 				 spt->shadow_page.type);
11212707e444SZhi Wang 	}
112244b46733SChangbin Du 	return spt;
112380e76ea6SChangbin Du 
112480e76ea6SChangbin Du err_free_spt:
112580e76ea6SChangbin Du 	ppgtt_free_spt(spt);
11260b32e197SAleksei Gimbitskii 	spt = NULL;
112780e76ea6SChangbin Du err:
1128695fbc08STina Zhang 	gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
112944b46733SChangbin Du 		     spt, we->val64, we->type);
11302707e444SZhi Wang 	return ERR_PTR(ret);
11312707e444SZhi Wang }
11322707e444SZhi Wang 
ppgtt_generate_shadow_entry(struct intel_gvt_gtt_entry * se,struct intel_vgpu_ppgtt_spt * s,struct intel_gvt_gtt_entry * ge)11332707e444SZhi Wang static inline void ppgtt_generate_shadow_entry(struct intel_gvt_gtt_entry *se,
11342707e444SZhi Wang 		struct intel_vgpu_ppgtt_spt *s, struct intel_gvt_gtt_entry *ge)
11352707e444SZhi Wang {
11365512445cSRikard Falkeborn 	const struct intel_gvt_gtt_pte_ops *ops = s->vgpu->gvt->gtt.pte_ops;
11372707e444SZhi Wang 
11382707e444SZhi Wang 	se->type = ge->type;
11392707e444SZhi Wang 	se->val64 = ge->val64;
11402707e444SZhi Wang 
1141eb3a3530SChangbin Du 	/* Because we always split 64KB pages, so clear IPS in shadow PDE. */
1142eb3a3530SChangbin Du 	if (se->type == GTT_TYPE_PPGTT_PDE_ENTRY)
1143eb3a3530SChangbin Du 		ops->clear_ips(se);
1144eb3a3530SChangbin Du 
11452707e444SZhi Wang 	ops->set_pfn(se, s->shadow_page.mfn);
11462707e444SZhi Wang }
11472707e444SZhi Wang 
split_2MB_gtt_entry(struct intel_vgpu * vgpu,struct intel_vgpu_ppgtt_spt * spt,unsigned long index,struct intel_gvt_gtt_entry * se)1148b901b252SChangbin Du static int split_2MB_gtt_entry(struct intel_vgpu *vgpu,
1149b901b252SChangbin Du 	struct intel_vgpu_ppgtt_spt *spt, unsigned long index,
1150b901b252SChangbin Du 	struct intel_gvt_gtt_entry *se)
1151b901b252SChangbin Du {
11525512445cSRikard Falkeborn 	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1153b901b252SChangbin Du 	struct intel_vgpu_ppgtt_spt *sub_spt;
1154b901b252SChangbin Du 	struct intel_gvt_gtt_entry sub_se;
1155b901b252SChangbin Du 	unsigned long start_gfn;
1156b901b252SChangbin Du 	dma_addr_t dma_addr;
1157b901b252SChangbin Du 	unsigned long sub_index;
1158b901b252SChangbin Du 	int ret;
1159b901b252SChangbin Du 
1160b901b252SChangbin Du 	gvt_dbg_mm("Split 2M gtt entry, index %lu\n", index);
1161b901b252SChangbin Du 
1162b901b252SChangbin Du 	start_gfn = ops->get_pfn(se);
1163b901b252SChangbin Du 
1164b901b252SChangbin Du 	sub_spt = ppgtt_alloc_spt(vgpu, GTT_TYPE_PPGTT_PTE_PT);
1165b901b252SChangbin Du 	if (IS_ERR(sub_spt))
1166b901b252SChangbin Du 		return PTR_ERR(sub_spt);
1167b901b252SChangbin Du 
1168b901b252SChangbin Du 	for_each_shadow_entry(sub_spt, &sub_se, sub_index) {
11698398eee8SChristoph Hellwig 		ret = intel_gvt_dma_map_guest_page(vgpu, start_gfn + sub_index,
11708398eee8SChristoph Hellwig 						   PAGE_SIZE, &dma_addr);
11714a61648aSZheng Wang 		if (ret)
11724a61648aSZheng Wang 			goto err;
1173b901b252SChangbin Du 		sub_se.val64 = se->val64;
1174b901b252SChangbin Du 
1175b901b252SChangbin Du 		/* Copy the PAT field from PDE. */
1176b901b252SChangbin Du 		sub_se.val64 &= ~_PAGE_PAT;
1177b901b252SChangbin Du 		sub_se.val64 |= (se->val64 & _PAGE_PAT_LARGE) >> 5;
1178b901b252SChangbin Du 
1179b901b252SChangbin Du 		ops->set_pfn(&sub_se, dma_addr >> PAGE_SHIFT);
1180b901b252SChangbin Du 		ppgtt_set_shadow_entry(sub_spt, &sub_se, sub_index);
1181b901b252SChangbin Du 	}
1182b901b252SChangbin Du 
1183b901b252SChangbin Du 	/* Clear dirty field. */
1184b901b252SChangbin Du 	se->val64 &= ~_PAGE_DIRTY;
1185b901b252SChangbin Du 
1186b901b252SChangbin Du 	ops->clear_pse(se);
1187b901b252SChangbin Du 	ops->clear_ips(se);
1188b901b252SChangbin Du 	ops->set_pfn(se, sub_spt->shadow_page.mfn);
1189b901b252SChangbin Du 	ppgtt_set_shadow_entry(spt, se, index);
1190b901b252SChangbin Du 	return 0;
11914a61648aSZheng Wang err:
11924a61648aSZheng Wang 	/* Cancel the existing addess mappings of DMA addr. */
11934a61648aSZheng Wang 	for_each_present_shadow_entry(sub_spt, &sub_se, sub_index) {
11944a61648aSZheng Wang 		gvt_vdbg_mm("invalidate 4K entry\n");
11954a61648aSZheng Wang 		ppgtt_invalidate_pte(sub_spt, &sub_se);
11964a61648aSZheng Wang 	}
11974a61648aSZheng Wang 	/* Release the new allocated spt. */
11984a61648aSZheng Wang 	trace_spt_change(sub_spt->vgpu->id, "release", sub_spt,
11994a61648aSZheng Wang 		sub_spt->guest_page.gfn, sub_spt->shadow_page.type);
12004a61648aSZheng Wang 	ppgtt_free_spt(sub_spt);
12014a61648aSZheng Wang 	return ret;
1202b901b252SChangbin Du }
1203b901b252SChangbin Du 
split_64KB_gtt_entry(struct intel_vgpu * vgpu,struct intel_vgpu_ppgtt_spt * spt,unsigned long index,struct intel_gvt_gtt_entry * se)1204eb3a3530SChangbin Du static int split_64KB_gtt_entry(struct intel_vgpu *vgpu,
1205eb3a3530SChangbin Du 	struct intel_vgpu_ppgtt_spt *spt, unsigned long index,
1206eb3a3530SChangbin Du 	struct intel_gvt_gtt_entry *se)
1207eb3a3530SChangbin Du {
12085512445cSRikard Falkeborn 	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1209eb3a3530SChangbin Du 	struct intel_gvt_gtt_entry entry = *se;
1210eb3a3530SChangbin Du 	unsigned long start_gfn;
1211eb3a3530SChangbin Du 	dma_addr_t dma_addr;
1212eb3a3530SChangbin Du 	int i, ret;
1213eb3a3530SChangbin Du 
1214eb3a3530SChangbin Du 	gvt_vdbg_mm("Split 64K gtt entry, index %lu\n", index);
1215eb3a3530SChangbin Du 
1216eb3a3530SChangbin Du 	GEM_BUG_ON(index % GTT_64K_PTE_STRIDE);
1217eb3a3530SChangbin Du 
1218eb3a3530SChangbin Du 	start_gfn = ops->get_pfn(se);
1219eb3a3530SChangbin Du 
1220eb3a3530SChangbin Du 	entry.type = GTT_TYPE_PPGTT_PTE_4K_ENTRY;
1221eb3a3530SChangbin Du 	ops->set_64k_splited(&entry);
1222eb3a3530SChangbin Du 
1223eb3a3530SChangbin Du 	for (i = 0; i < GTT_64K_PTE_STRIDE; i++) {
12248398eee8SChristoph Hellwig 		ret = intel_gvt_dma_map_guest_page(vgpu, start_gfn + i,
12258398eee8SChristoph Hellwig 						   PAGE_SIZE, &dma_addr);
1226eb3a3530SChangbin Du 		if (ret)
1227eb3a3530SChangbin Du 			return ret;
1228eb3a3530SChangbin Du 
1229eb3a3530SChangbin Du 		ops->set_pfn(&entry, dma_addr >> PAGE_SHIFT);
1230eb3a3530SChangbin Du 		ppgtt_set_shadow_entry(spt, &entry, index + i);
1231eb3a3530SChangbin Du 	}
1232eb3a3530SChangbin Du 	return 0;
1233eb3a3530SChangbin Du }
1234eb3a3530SChangbin Du 
ppgtt_populate_shadow_entry(struct intel_vgpu * vgpu,struct intel_vgpu_ppgtt_spt * spt,unsigned long index,struct intel_gvt_gtt_entry * ge)123572f03d7eSChangbin Du static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu,
123672f03d7eSChangbin Du 	struct intel_vgpu_ppgtt_spt *spt, unsigned long index,
123772f03d7eSChangbin Du 	struct intel_gvt_gtt_entry *ge)
123872f03d7eSChangbin Du {
12395512445cSRikard Falkeborn 	const struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops;
124072f03d7eSChangbin Du 	struct intel_gvt_gtt_entry se = *ge;
1241*ba193f62SSean Christopherson 	unsigned long gfn;
1242cf4ee73fSChangbin Du 	dma_addr_t dma_addr;
1243cf4ee73fSChangbin Du 	int ret;
124472f03d7eSChangbin Du 
124572f03d7eSChangbin Du 	if (!pte_ops->test_present(ge))
124672f03d7eSChangbin Du 		return 0;
124772f03d7eSChangbin Du 
124872f03d7eSChangbin Du 	gfn = pte_ops->get_pfn(ge);
124972f03d7eSChangbin Du 
125072f03d7eSChangbin Du 	switch (ge->type) {
125172f03d7eSChangbin Du 	case GTT_TYPE_PPGTT_PTE_4K_ENTRY:
125272f03d7eSChangbin Du 		gvt_vdbg_mm("shadow 4K gtt entry\n");
1253*ba193f62SSean Christopherson 		ret = intel_gvt_dma_map_guest_page(vgpu, gfn, PAGE_SIZE, &dma_addr);
1254*ba193f62SSean Christopherson 		if (ret)
1255*ba193f62SSean Christopherson 			return -ENXIO;
125672f03d7eSChangbin Du 		break;
1257b294657dSChangbin Du 	case GTT_TYPE_PPGTT_PTE_64K_ENTRY:
1258eb3a3530SChangbin Du 		gvt_vdbg_mm("shadow 64K gtt entry\n");
1259eb3a3530SChangbin Du 		/*
1260eb3a3530SChangbin Du 		 * The layout of 64K page is special, the page size is
1261eb3a3530SChangbin Du 		 * controlled by uper PDE. To be simple, we always split
1262eb3a3530SChangbin Du 		 * 64K page to smaller 4K pages in shadow PT.
1263eb3a3530SChangbin Du 		 */
1264eb3a3530SChangbin Du 		return split_64KB_gtt_entry(vgpu, spt, index, &se);
126572f03d7eSChangbin Du 	case GTT_TYPE_PPGTT_PTE_2M_ENTRY:
1266b901b252SChangbin Du 		gvt_vdbg_mm("shadow 2M gtt entry\n");
1267*ba193f62SSean Christopherson 		if (!HAS_PAGE_SIZES(vgpu->gvt->gt->i915, I915_GTT_PAGE_SIZE_2M) ||
1268*ba193f62SSean Christopherson 		    intel_gvt_dma_map_guest_page(vgpu, gfn,
1269*ba193f62SSean Christopherson 						 I915_GTT_PAGE_SIZE_2M, &dma_addr))
1270b901b252SChangbin Du 			return split_2MB_gtt_entry(vgpu, spt, index, &se);
1271b901b252SChangbin Du 		break;
127272f03d7eSChangbin Du 	case GTT_TYPE_PPGTT_PTE_1G_ENTRY:
1273b901b252SChangbin Du 		gvt_vgpu_err("GVT doesn't support 1GB entry\n");
127472f03d7eSChangbin Du 		return -EINVAL;
127572f03d7eSChangbin Du 	default:
127672f03d7eSChangbin Du 		GEM_BUG_ON(1);
1277241f0aadSSean Christopherson 		return -EINVAL;
12788b4f2925Szhengbin 	}
127972f03d7eSChangbin Du 
1280*ba193f62SSean Christopherson 	/* Successfully shadowed a 4K or 2M page (without splitting). */
1281cf4ee73fSChangbin Du 	pte_ops->set_pfn(&se, dma_addr >> PAGE_SHIFT);
128272f03d7eSChangbin Du 	ppgtt_set_shadow_entry(spt, &se, index);
128372f03d7eSChangbin Du 	return 0;
128472f03d7eSChangbin Du }
128572f03d7eSChangbin Du 
ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt * spt)1286d87f5ff3SChangbin Du static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt)
12872707e444SZhi Wang {
12882707e444SZhi Wang 	struct intel_vgpu *vgpu = spt->vgpu;
12892707e444SZhi Wang 	struct intel_vgpu_ppgtt_spt *s;
12902707e444SZhi Wang 	struct intel_gvt_gtt_entry se, ge;
12911e557c1cSYan Zhao 	unsigned long i;
12922707e444SZhi Wang 	int ret;
12932707e444SZhi Wang 
12942707e444SZhi Wang 	trace_spt_change(spt->vgpu->id, "born", spt,
1295e502a2afSChangbin Du 			 spt->guest_page.gfn, spt->shadow_page.type);
12962707e444SZhi Wang 
12972707e444SZhi Wang 	for_each_present_guest_entry(spt, &ge, i) {
129872f03d7eSChangbin Du 		if (gtt_type_is_pt(get_next_pt_type(ge.type))) {
1299d87f5ff3SChangbin Du 			s = ppgtt_populate_spt_by_guest_entry(vgpu, &ge);
13002707e444SZhi Wang 			if (IS_ERR(s)) {
13012707e444SZhi Wang 				ret = PTR_ERR(s);
13022707e444SZhi Wang 				goto fail;
13032707e444SZhi Wang 			}
13042707e444SZhi Wang 			ppgtt_get_shadow_entry(spt, &se, i);
13052707e444SZhi Wang 			ppgtt_generate_shadow_entry(&se, s, &ge);
13062707e444SZhi Wang 			ppgtt_set_shadow_entry(spt, &se, i);
130772f03d7eSChangbin Du 		} else {
130872f03d7eSChangbin Du 			ret = ppgtt_populate_shadow_entry(vgpu, spt, i, &ge);
130972f03d7eSChangbin Du 			if (ret)
131072f03d7eSChangbin Du 				goto fail;
131172f03d7eSChangbin Du 		}
13122707e444SZhi Wang 	}
13132707e444SZhi Wang 	return 0;
13142707e444SZhi Wang fail:
1315695fbc08STina Zhang 	gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
1316695fbc08STina Zhang 			spt, ge.val64, ge.type);
13172707e444SZhi Wang 	return ret;
13182707e444SZhi Wang }
13192707e444SZhi Wang 
ppgtt_handle_guest_entry_removal(struct intel_vgpu_ppgtt_spt * spt,struct intel_gvt_gtt_entry * se,unsigned long index)132044b46733SChangbin Du static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_ppgtt_spt *spt,
13216b3816d6STina Zhang 		struct intel_gvt_gtt_entry *se, unsigned long index)
13222707e444SZhi Wang {
13232707e444SZhi Wang 	struct intel_vgpu *vgpu = spt->vgpu;
13245512445cSRikard Falkeborn 	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
13252707e444SZhi Wang 	int ret;
13262707e444SZhi Wang 
132744b46733SChangbin Du 	trace_spt_guest_change(spt->vgpu->id, "remove", spt,
132844b46733SChangbin Du 			       spt->shadow_page.type, se->val64, index);
13299baf0920SBing Niu 
1330bc37ab56SChangbin Du 	gvt_vdbg_mm("destroy old shadow entry, type %d, index %lu, value %llx\n",
1331bc37ab56SChangbin Du 		    se->type, index, se->val64);
1332bc37ab56SChangbin Du 
13336b3816d6STina Zhang 	if (!ops->test_present(se))
13342707e444SZhi Wang 		return 0;
13352707e444SZhi Wang 
133644b46733SChangbin Du 	if (ops->get_pfn(se) ==
133744b46733SChangbin Du 	    vgpu->gtt.scratch_pt[spt->shadow_page.type].page_mfn)
13382707e444SZhi Wang 		return 0;
13392707e444SZhi Wang 
13406b3816d6STina Zhang 	if (gtt_type_is_pt(get_next_pt_type(se->type))) {
13419baf0920SBing Niu 		struct intel_vgpu_ppgtt_spt *s =
134244b46733SChangbin Du 			intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(se));
13439baf0920SBing Niu 		if (!s) {
1344695fbc08STina Zhang 			gvt_vgpu_err("fail to find guest page\n");
13452707e444SZhi Wang 			ret = -ENXIO;
13462707e444SZhi Wang 			goto fail;
13472707e444SZhi Wang 		}
1348d87f5ff3SChangbin Du 		ret = ppgtt_invalidate_spt(s);
13492707e444SZhi Wang 		if (ret)
13502707e444SZhi Wang 			goto fail;
1351eb3a3530SChangbin Du 	} else {
1352eb3a3530SChangbin Du 		/* We don't setup 64K shadow entry so far. */
1353eb3a3530SChangbin Du 		WARN(se->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY,
1354eb3a3530SChangbin Du 		     "suspicious 64K entry\n");
1355cf4ee73fSChangbin Du 		ppgtt_invalidate_pte(spt, se);
1356eb3a3530SChangbin Du 	}
1357cf4ee73fSChangbin Du 
13582707e444SZhi Wang 	return 0;
13592707e444SZhi Wang fail:
1360695fbc08STina Zhang 	gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
13616b3816d6STina Zhang 			spt, se->val64, se->type);
13622707e444SZhi Wang 	return ret;
13632707e444SZhi Wang }
13642707e444SZhi Wang 
ppgtt_handle_guest_entry_add(struct intel_vgpu_ppgtt_spt * spt,struct intel_gvt_gtt_entry * we,unsigned long index)136544b46733SChangbin Du static int ppgtt_handle_guest_entry_add(struct intel_vgpu_ppgtt_spt *spt,
13662707e444SZhi Wang 		struct intel_gvt_gtt_entry *we, unsigned long index)
13672707e444SZhi Wang {
13682707e444SZhi Wang 	struct intel_vgpu *vgpu = spt->vgpu;
13692707e444SZhi Wang 	struct intel_gvt_gtt_entry m;
13702707e444SZhi Wang 	struct intel_vgpu_ppgtt_spt *s;
13712707e444SZhi Wang 	int ret;
13722707e444SZhi Wang 
137344b46733SChangbin Du 	trace_spt_guest_change(spt->vgpu->id, "add", spt, spt->shadow_page.type,
13742707e444SZhi Wang 			       we->val64, index);
13752707e444SZhi Wang 
1376bc37ab56SChangbin Du 	gvt_vdbg_mm("add shadow entry: type %d, index %lu, value %llx\n",
1377bc37ab56SChangbin Du 		    we->type, index, we->val64);
1378bc37ab56SChangbin Du 
13792707e444SZhi Wang 	if (gtt_type_is_pt(get_next_pt_type(we->type))) {
1380d87f5ff3SChangbin Du 		s = ppgtt_populate_spt_by_guest_entry(vgpu, we);
13812707e444SZhi Wang 		if (IS_ERR(s)) {
13822707e444SZhi Wang 			ret = PTR_ERR(s);
13832707e444SZhi Wang 			goto fail;
13842707e444SZhi Wang 		}
13852707e444SZhi Wang 		ppgtt_get_shadow_entry(spt, &m, index);
13862707e444SZhi Wang 		ppgtt_generate_shadow_entry(&m, s, we);
13872707e444SZhi Wang 		ppgtt_set_shadow_entry(spt, &m, index);
13882707e444SZhi Wang 	} else {
138972f03d7eSChangbin Du 		ret = ppgtt_populate_shadow_entry(vgpu, spt, index, we);
13902707e444SZhi Wang 		if (ret)
13912707e444SZhi Wang 			goto fail;
13922707e444SZhi Wang 	}
13932707e444SZhi Wang 	return 0;
13942707e444SZhi Wang fail:
1395695fbc08STina Zhang 	gvt_vgpu_err("fail: spt %p guest entry 0x%llx type %d\n",
13962707e444SZhi Wang 		spt, we->val64, we->type);
13972707e444SZhi Wang 	return ret;
13982707e444SZhi Wang }
13992707e444SZhi Wang 
sync_oos_page(struct intel_vgpu * vgpu,struct intel_vgpu_oos_page * oos_page)14002707e444SZhi Wang static int sync_oos_page(struct intel_vgpu *vgpu,
14012707e444SZhi Wang 		struct intel_vgpu_oos_page *oos_page)
14022707e444SZhi Wang {
14032707e444SZhi Wang 	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
14042707e444SZhi Wang 	struct intel_gvt *gvt = vgpu->gvt;
14055512445cSRikard Falkeborn 	const struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
140644b46733SChangbin Du 	struct intel_vgpu_ppgtt_spt *spt = oos_page->spt;
140772f03d7eSChangbin Du 	struct intel_gvt_gtt_entry old, new;
14082707e444SZhi Wang 	int index;
14092707e444SZhi Wang 	int ret;
14102707e444SZhi Wang 
14112707e444SZhi Wang 	trace_oos_change(vgpu->id, "sync", oos_page->id,
141244b46733SChangbin Du 			 spt, spt->guest_page.type);
14132707e444SZhi Wang 
141444b46733SChangbin Du 	old.type = new.type = get_entry_type(spt->guest_page.type);
14152707e444SZhi Wang 	old.val64 = new.val64 = 0;
14162707e444SZhi Wang 
14179556e118SZhi Wang 	for (index = 0; index < (I915_GTT_PAGE_SIZE >>
14189556e118SZhi Wang 				info->gtt_entry_size_shift); index++) {
14192707e444SZhi Wang 		ops->get_entry(oos_page->mem, &old, index, false, 0, vgpu);
14202707e444SZhi Wang 		ops->get_entry(NULL, &new, index, true,
142144b46733SChangbin Du 			       spt->guest_page.gfn << PAGE_SHIFT, vgpu);
14222707e444SZhi Wang 
14232707e444SZhi Wang 		if (old.val64 == new.val64
14242707e444SZhi Wang 			&& !test_and_clear_bit(index, spt->post_shadow_bitmap))
14252707e444SZhi Wang 			continue;
14262707e444SZhi Wang 
14272707e444SZhi Wang 		trace_oos_sync(vgpu->id, oos_page->id,
142844b46733SChangbin Du 				spt, spt->guest_page.type,
14292707e444SZhi Wang 				new.val64, index);
14302707e444SZhi Wang 
143172f03d7eSChangbin Du 		ret = ppgtt_populate_shadow_entry(vgpu, spt, index, &new);
14322707e444SZhi Wang 		if (ret)
14332707e444SZhi Wang 			return ret;
14342707e444SZhi Wang 
14352707e444SZhi Wang 		ops->set_entry(oos_page->mem, &new, index, false, 0, vgpu);
14362707e444SZhi Wang 	}
14372707e444SZhi Wang 
143844b46733SChangbin Du 	spt->guest_page.write_cnt = 0;
14392707e444SZhi Wang 	list_del_init(&spt->post_shadow_list);
14402707e444SZhi Wang 	return 0;
14412707e444SZhi Wang }
14422707e444SZhi Wang 
detach_oos_page(struct intel_vgpu * vgpu,struct intel_vgpu_oos_page * oos_page)14432707e444SZhi Wang static int detach_oos_page(struct intel_vgpu *vgpu,
14442707e444SZhi Wang 		struct intel_vgpu_oos_page *oos_page)
14452707e444SZhi Wang {
14462707e444SZhi Wang 	struct intel_gvt *gvt = vgpu->gvt;
144744b46733SChangbin Du 	struct intel_vgpu_ppgtt_spt *spt = oos_page->spt;
14482707e444SZhi Wang 
14492707e444SZhi Wang 	trace_oos_change(vgpu->id, "detach", oos_page->id,
145044b46733SChangbin Du 			 spt, spt->guest_page.type);
14512707e444SZhi Wang 
145244b46733SChangbin Du 	spt->guest_page.write_cnt = 0;
145344b46733SChangbin Du 	spt->guest_page.oos_page = NULL;
145444b46733SChangbin Du 	oos_page->spt = NULL;
14552707e444SZhi Wang 
14562707e444SZhi Wang 	list_del_init(&oos_page->vm_list);
14572707e444SZhi Wang 	list_move_tail(&oos_page->list, &gvt->gtt.oos_page_free_list_head);
14582707e444SZhi Wang 
14592707e444SZhi Wang 	return 0;
14602707e444SZhi Wang }
14612707e444SZhi Wang 
attach_oos_page(struct intel_vgpu_oos_page * oos_page,struct intel_vgpu_ppgtt_spt * spt)146244b46733SChangbin Du static int attach_oos_page(struct intel_vgpu_oos_page *oos_page,
146344b46733SChangbin Du 		struct intel_vgpu_ppgtt_spt *spt)
14642707e444SZhi Wang {
146544b46733SChangbin Du 	struct intel_gvt *gvt = spt->vgpu->gvt;
14662707e444SZhi Wang 	int ret;
14672707e444SZhi Wang 
1468e3d7640eSChristoph Hellwig 	ret = intel_gvt_read_gpa(spt->vgpu,
146944b46733SChangbin Du 			spt->guest_page.gfn << I915_GTT_PAGE_SHIFT,
14709556e118SZhi Wang 			oos_page->mem, I915_GTT_PAGE_SIZE);
14712707e444SZhi Wang 	if (ret)
14722707e444SZhi Wang 		return ret;
14732707e444SZhi Wang 
147444b46733SChangbin Du 	oos_page->spt = spt;
147544b46733SChangbin Du 	spt->guest_page.oos_page = oos_page;
14762707e444SZhi Wang 
14772707e444SZhi Wang 	list_move_tail(&oos_page->list, &gvt->gtt.oos_page_use_list_head);
14782707e444SZhi Wang 
147944b46733SChangbin Du 	trace_oos_change(spt->vgpu->id, "attach", oos_page->id,
148044b46733SChangbin Du 			 spt, spt->guest_page.type);
14812707e444SZhi Wang 	return 0;
14822707e444SZhi Wang }
14832707e444SZhi Wang 
ppgtt_set_guest_page_sync(struct intel_vgpu_ppgtt_spt * spt)148444b46733SChangbin Du static int ppgtt_set_guest_page_sync(struct intel_vgpu_ppgtt_spt *spt)
14852707e444SZhi Wang {
148644b46733SChangbin Du 	struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page;
14872707e444SZhi Wang 	int ret;
14882707e444SZhi Wang 
1489e502a2afSChangbin Du 	ret = intel_vgpu_enable_page_track(spt->vgpu, spt->guest_page.gfn);
14902707e444SZhi Wang 	if (ret)
14912707e444SZhi Wang 		return ret;
14922707e444SZhi Wang 
149344b46733SChangbin Du 	trace_oos_change(spt->vgpu->id, "set page sync", oos_page->id,
149444b46733SChangbin Du 			 spt, spt->guest_page.type);
14952707e444SZhi Wang 
149644b46733SChangbin Du 	list_del_init(&oos_page->vm_list);
149744b46733SChangbin Du 	return sync_oos_page(spt->vgpu, oos_page);
14982707e444SZhi Wang }
14992707e444SZhi Wang 
ppgtt_allocate_oos_page(struct intel_vgpu_ppgtt_spt * spt)150044b46733SChangbin Du static int ppgtt_allocate_oos_page(struct intel_vgpu_ppgtt_spt *spt)
15012707e444SZhi Wang {
150244b46733SChangbin Du 	struct intel_gvt *gvt = spt->vgpu->gvt;
15032707e444SZhi Wang 	struct intel_gvt_gtt *gtt = &gvt->gtt;
150444b46733SChangbin Du 	struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page;
15052707e444SZhi Wang 	int ret;
15062707e444SZhi Wang 
15072707e444SZhi Wang 	WARN(oos_page, "shadow PPGTT page has already has a oos page\n");
15082707e444SZhi Wang 
15092707e444SZhi Wang 	if (list_empty(&gtt->oos_page_free_list_head)) {
15102707e444SZhi Wang 		oos_page = container_of(gtt->oos_page_use_list_head.next,
15112707e444SZhi Wang 			struct intel_vgpu_oos_page, list);
151244b46733SChangbin Du 		ret = ppgtt_set_guest_page_sync(oos_page->spt);
15132707e444SZhi Wang 		if (ret)
15142707e444SZhi Wang 			return ret;
151544b46733SChangbin Du 		ret = detach_oos_page(spt->vgpu, oos_page);
15162707e444SZhi Wang 		if (ret)
15172707e444SZhi Wang 			return ret;
15182707e444SZhi Wang 	} else
15192707e444SZhi Wang 		oos_page = container_of(gtt->oos_page_free_list_head.next,
15202707e444SZhi Wang 			struct intel_vgpu_oos_page, list);
152144b46733SChangbin Du 	return attach_oos_page(oos_page, spt);
15222707e444SZhi Wang }
15232707e444SZhi Wang 
ppgtt_set_guest_page_oos(struct intel_vgpu_ppgtt_spt * spt)152444b46733SChangbin Du static int ppgtt_set_guest_page_oos(struct intel_vgpu_ppgtt_spt *spt)
15252707e444SZhi Wang {
152644b46733SChangbin Du 	struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page;
15272707e444SZhi Wang 
15282707e444SZhi Wang 	if (WARN(!oos_page, "shadow PPGTT page should have a oos page\n"))
15292707e444SZhi Wang 		return -EINVAL;
15302707e444SZhi Wang 
153144b46733SChangbin Du 	trace_oos_change(spt->vgpu->id, "set page out of sync", oos_page->id,
153244b46733SChangbin Du 			 spt, spt->guest_page.type);
15332707e444SZhi Wang 
153444b46733SChangbin Du 	list_add_tail(&oos_page->vm_list, &spt->vgpu->gtt.oos_page_list_head);
1535e502a2afSChangbin Du 	return intel_vgpu_disable_page_track(spt->vgpu, spt->guest_page.gfn);
15362707e444SZhi Wang }
15372707e444SZhi Wang 
15382707e444SZhi Wang /**
15392707e444SZhi Wang  * intel_vgpu_sync_oos_pages - sync all the out-of-synced shadow for vGPU
15402707e444SZhi Wang  * @vgpu: a vGPU
15412707e444SZhi Wang  *
15422707e444SZhi Wang  * This function is called before submitting a guest workload to host,
15432707e444SZhi Wang  * to sync all the out-of-synced shadow for vGPU
15442707e444SZhi Wang  *
15452707e444SZhi Wang  * Returns:
15462707e444SZhi Wang  * Zero on success, negative error code if failed.
15472707e444SZhi Wang  */
intel_vgpu_sync_oos_pages(struct intel_vgpu * vgpu)15482707e444SZhi Wang int intel_vgpu_sync_oos_pages(struct intel_vgpu *vgpu)
15492707e444SZhi Wang {
15502707e444SZhi Wang 	struct list_head *pos, *n;
15512707e444SZhi Wang 	struct intel_vgpu_oos_page *oos_page;
15522707e444SZhi Wang 	int ret;
15532707e444SZhi Wang 
15542707e444SZhi Wang 	if (!enable_out_of_sync)
15552707e444SZhi Wang 		return 0;
15562707e444SZhi Wang 
15572707e444SZhi Wang 	list_for_each_safe(pos, n, &vgpu->gtt.oos_page_list_head) {
15582707e444SZhi Wang 		oos_page = container_of(pos,
15592707e444SZhi Wang 				struct intel_vgpu_oos_page, vm_list);
156044b46733SChangbin Du 		ret = ppgtt_set_guest_page_sync(oos_page->spt);
15612707e444SZhi Wang 		if (ret)
15622707e444SZhi Wang 			return ret;
15632707e444SZhi Wang 	}
15642707e444SZhi Wang 	return 0;
15652707e444SZhi Wang }
15662707e444SZhi Wang 
15672707e444SZhi Wang /*
15682707e444SZhi Wang  * The heart of PPGTT shadow page table.
15692707e444SZhi Wang  */
ppgtt_handle_guest_write_page_table(struct intel_vgpu_ppgtt_spt * spt,struct intel_gvt_gtt_entry * we,unsigned long index)15702707e444SZhi Wang static int ppgtt_handle_guest_write_page_table(
157144b46733SChangbin Du 		struct intel_vgpu_ppgtt_spt *spt,
15722707e444SZhi Wang 		struct intel_gvt_gtt_entry *we, unsigned long index)
15732707e444SZhi Wang {
15742707e444SZhi Wang 	struct intel_vgpu *vgpu = spt->vgpu;
15756b3816d6STina Zhang 	int type = spt->shadow_page.type;
15765512445cSRikard Falkeborn 	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
157772f03d7eSChangbin Du 	struct intel_gvt_gtt_entry old_se;
15789baf0920SBing Niu 	int new_present;
1579eb3a3530SChangbin Du 	int i, ret;
15802707e444SZhi Wang 
15812707e444SZhi Wang 	new_present = ops->test_present(we);
15822707e444SZhi Wang 
15836b3816d6STina Zhang 	/*
15846b3816d6STina Zhang 	 * Adding the new entry first and then removing the old one, that can
15856b3816d6STina Zhang 	 * guarantee the ppgtt table is validated during the window between
15866b3816d6STina Zhang 	 * adding and removal.
15876b3816d6STina Zhang 	 */
158872f03d7eSChangbin Du 	ppgtt_get_shadow_entry(spt, &old_se, index);
15899baf0920SBing Niu 
15902707e444SZhi Wang 	if (new_present) {
159144b46733SChangbin Du 		ret = ppgtt_handle_guest_entry_add(spt, we, index);
15922707e444SZhi Wang 		if (ret)
15932707e444SZhi Wang 			goto fail;
15942707e444SZhi Wang 	}
15956b3816d6STina Zhang 
159644b46733SChangbin Du 	ret = ppgtt_handle_guest_entry_removal(spt, &old_se, index);
15976b3816d6STina Zhang 	if (ret)
15986b3816d6STina Zhang 		goto fail;
15996b3816d6STina Zhang 
16006b3816d6STina Zhang 	if (!new_present) {
1601eb3a3530SChangbin Du 		/* For 64KB splited entries, we need clear them all. */
1602eb3a3530SChangbin Du 		if (ops->test_64k_splited(&old_se) &&
1603eb3a3530SChangbin Du 		    !(index % GTT_64K_PTE_STRIDE)) {
1604eb3a3530SChangbin Du 			gvt_vdbg_mm("remove splited 64K shadow entries\n");
1605eb3a3530SChangbin Du 			for (i = 0; i < GTT_64K_PTE_STRIDE; i++) {
1606eb3a3530SChangbin Du 				ops->clear_64k_splited(&old_se);
1607eb3a3530SChangbin Du 				ops->set_pfn(&old_se,
1608eb3a3530SChangbin Du 					vgpu->gtt.scratch_pt[type].page_mfn);
1609eb3a3530SChangbin Du 				ppgtt_set_shadow_entry(spt, &old_se, index + i);
1610eb3a3530SChangbin Du 			}
1611b901b252SChangbin Du 		} else if (old_se.type == GTT_TYPE_PPGTT_PTE_2M_ENTRY ||
1612b901b252SChangbin Du 			   old_se.type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) {
1613b901b252SChangbin Du 			ops->clear_pse(&old_se);
1614b901b252SChangbin Du 			ops->set_pfn(&old_se,
1615b901b252SChangbin Du 				     vgpu->gtt.scratch_pt[type].page_mfn);
161672f03d7eSChangbin Du 			ppgtt_set_shadow_entry(spt, &old_se, index);
1617eb3a3530SChangbin Du 		} else {
1618eb3a3530SChangbin Du 			ops->set_pfn(&old_se,
1619eb3a3530SChangbin Du 				     vgpu->gtt.scratch_pt[type].page_mfn);
16206b3816d6STina Zhang 			ppgtt_set_shadow_entry(spt, &old_se, index);
16216b3816d6STina Zhang 		}
16226b3816d6STina Zhang 	}
16236b3816d6STina Zhang 
16242707e444SZhi Wang 	return 0;
16252707e444SZhi Wang fail:
1626695fbc08STina Zhang 	gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d.\n",
1627695fbc08STina Zhang 			spt, we->val64, we->type);
16282707e444SZhi Wang 	return ret;
16292707e444SZhi Wang }
16302707e444SZhi Wang 
163172f03d7eSChangbin Du 
163272f03d7eSChangbin Du 
can_do_out_of_sync(struct intel_vgpu_ppgtt_spt * spt)163344b46733SChangbin Du static inline bool can_do_out_of_sync(struct intel_vgpu_ppgtt_spt *spt)
16342707e444SZhi Wang {
16352707e444SZhi Wang 	return enable_out_of_sync
163644b46733SChangbin Du 		&& gtt_type_is_pte_pt(spt->guest_page.type)
163744b46733SChangbin Du 		&& spt->guest_page.write_cnt >= 2;
16382707e444SZhi Wang }
16392707e444SZhi Wang 
ppgtt_set_post_shadow(struct intel_vgpu_ppgtt_spt * spt,unsigned long index)16402707e444SZhi Wang static void ppgtt_set_post_shadow(struct intel_vgpu_ppgtt_spt *spt,
16412707e444SZhi Wang 		unsigned long index)
16422707e444SZhi Wang {
16432707e444SZhi Wang 	set_bit(index, spt->post_shadow_bitmap);
16442707e444SZhi Wang 	if (!list_empty(&spt->post_shadow_list))
16452707e444SZhi Wang 		return;
16462707e444SZhi Wang 
16472707e444SZhi Wang 	list_add_tail(&spt->post_shadow_list,
16482707e444SZhi Wang 			&spt->vgpu->gtt.post_shadow_list_head);
16492707e444SZhi Wang }
16502707e444SZhi Wang 
16512707e444SZhi Wang /**
16522707e444SZhi Wang  * intel_vgpu_flush_post_shadow - flush the post shadow transactions
16532707e444SZhi Wang  * @vgpu: a vGPU
16542707e444SZhi Wang  *
16552707e444SZhi Wang  * This function is called before submitting a guest workload to host,
16562707e444SZhi Wang  * to flush all the post shadows for a vGPU.
16572707e444SZhi Wang  *
16582707e444SZhi Wang  * Returns:
16592707e444SZhi Wang  * Zero on success, negative error code if failed.
16602707e444SZhi Wang  */
intel_vgpu_flush_post_shadow(struct intel_vgpu * vgpu)16612707e444SZhi Wang int intel_vgpu_flush_post_shadow(struct intel_vgpu *vgpu)
16622707e444SZhi Wang {
16632707e444SZhi Wang 	struct list_head *pos, *n;
16642707e444SZhi Wang 	struct intel_vgpu_ppgtt_spt *spt;
16659baf0920SBing Niu 	struct intel_gvt_gtt_entry ge;
16662707e444SZhi Wang 	unsigned long index;
16672707e444SZhi Wang 	int ret;
16682707e444SZhi Wang 
16692707e444SZhi Wang 	list_for_each_safe(pos, n, &vgpu->gtt.post_shadow_list_head) {
16702707e444SZhi Wang 		spt = container_of(pos, struct intel_vgpu_ppgtt_spt,
16712707e444SZhi Wang 				post_shadow_list);
16722707e444SZhi Wang 
16732707e444SZhi Wang 		for_each_set_bit(index, spt->post_shadow_bitmap,
16742707e444SZhi Wang 				GTT_ENTRY_NUM_IN_ONE_PAGE) {
16752707e444SZhi Wang 			ppgtt_get_guest_entry(spt, &ge, index);
16762707e444SZhi Wang 
167744b46733SChangbin Du 			ret = ppgtt_handle_guest_write_page_table(spt,
167844b46733SChangbin Du 							&ge, index);
16792707e444SZhi Wang 			if (ret)
16802707e444SZhi Wang 				return ret;
16812707e444SZhi Wang 			clear_bit(index, spt->post_shadow_bitmap);
16822707e444SZhi Wang 		}
16832707e444SZhi Wang 		list_del_init(&spt->post_shadow_list);
16842707e444SZhi Wang 	}
16852707e444SZhi Wang 	return 0;
16862707e444SZhi Wang }
16872707e444SZhi Wang 
ppgtt_handle_guest_write_page_table_bytes(struct intel_vgpu_ppgtt_spt * spt,u64 pa,void * p_data,int bytes)16887d1e5cdfSZhi Wang static int ppgtt_handle_guest_write_page_table_bytes(
168944b46733SChangbin Du 		struct intel_vgpu_ppgtt_spt *spt,
16902707e444SZhi Wang 		u64 pa, void *p_data, int bytes)
16912707e444SZhi Wang {
16922707e444SZhi Wang 	struct intel_vgpu *vgpu = spt->vgpu;
16935512445cSRikard Falkeborn 	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
16942707e444SZhi Wang 	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
16956b3816d6STina Zhang 	struct intel_gvt_gtt_entry we, se;
16962707e444SZhi Wang 	unsigned long index;
16972707e444SZhi Wang 	int ret;
16982707e444SZhi Wang 
16992707e444SZhi Wang 	index = (pa & (PAGE_SIZE - 1)) >> info->gtt_entry_size_shift;
17002707e444SZhi Wang 
17012707e444SZhi Wang 	ppgtt_get_guest_entry(spt, &we, index);
17022707e444SZhi Wang 
1703eb3a3530SChangbin Du 	/*
1704eb3a3530SChangbin Du 	 * For page table which has 64K gtt entry, only PTE#0, PTE#16,
1705eb3a3530SChangbin Du 	 * PTE#32, ... PTE#496 are used. Unused PTEs update should be
1706eb3a3530SChangbin Du 	 * ignored.
1707eb3a3530SChangbin Du 	 */
1708eb3a3530SChangbin Du 	if (we.type == GTT_TYPE_PPGTT_PTE_64K_ENTRY &&
1709eb3a3530SChangbin Du 	    (index % GTT_64K_PTE_STRIDE)) {
1710eb3a3530SChangbin Du 		gvt_vdbg_mm("Ignore write to unused PTE entry, index %lu\n",
1711eb3a3530SChangbin Du 			    index);
1712eb3a3530SChangbin Du 		return 0;
1713eb3a3530SChangbin Du 	}
17142707e444SZhi Wang 
17152707e444SZhi Wang 	if (bytes == info->gtt_entry_size) {
171644b46733SChangbin Du 		ret = ppgtt_handle_guest_write_page_table(spt, &we, index);
17172707e444SZhi Wang 		if (ret)
17182707e444SZhi Wang 			return ret;
17192707e444SZhi Wang 	} else {
17202707e444SZhi Wang 		if (!test_bit(index, spt->post_shadow_bitmap)) {
1721121d760dSZhi Wang 			int type = spt->shadow_page.type;
1722121d760dSZhi Wang 
17236b3816d6STina Zhang 			ppgtt_get_shadow_entry(spt, &se, index);
172444b46733SChangbin Du 			ret = ppgtt_handle_guest_entry_removal(spt, &se, index);
17252707e444SZhi Wang 			if (ret)
17262707e444SZhi Wang 				return ret;
1727121d760dSZhi Wang 			ops->set_pfn(&se, vgpu->gtt.scratch_pt[type].page_mfn);
1728121d760dSZhi Wang 			ppgtt_set_shadow_entry(spt, &se, index);
17292707e444SZhi Wang 		}
17302707e444SZhi Wang 		ppgtt_set_post_shadow(spt, index);
17312707e444SZhi Wang 	}
17322707e444SZhi Wang 
17332707e444SZhi Wang 	if (!enable_out_of_sync)
17342707e444SZhi Wang 		return 0;
17352707e444SZhi Wang 
173644b46733SChangbin Du 	spt->guest_page.write_cnt++;
17372707e444SZhi Wang 
173844b46733SChangbin Du 	if (spt->guest_page.oos_page)
173944b46733SChangbin Du 		ops->set_entry(spt->guest_page.oos_page->mem, &we, index,
17402707e444SZhi Wang 				false, 0, vgpu);
17412707e444SZhi Wang 
174244b46733SChangbin Du 	if (can_do_out_of_sync(spt)) {
174344b46733SChangbin Du 		if (!spt->guest_page.oos_page)
174444b46733SChangbin Du 			ppgtt_allocate_oos_page(spt);
17452707e444SZhi Wang 
174644b46733SChangbin Du 		ret = ppgtt_set_guest_page_oos(spt);
17472707e444SZhi Wang 		if (ret < 0)
17482707e444SZhi Wang 			return ret;
17492707e444SZhi Wang 	}
17502707e444SZhi Wang 	return 0;
17512707e444SZhi Wang }
17522707e444SZhi Wang 
invalidate_ppgtt_mm(struct intel_vgpu_mm * mm)1753ede9d0cfSChangbin Du static void invalidate_ppgtt_mm(struct intel_vgpu_mm *mm)
17542707e444SZhi Wang {
17552707e444SZhi Wang 	struct intel_vgpu *vgpu = mm->vgpu;
17562707e444SZhi Wang 	struct intel_gvt *gvt = vgpu->gvt;
17572707e444SZhi Wang 	struct intel_gvt_gtt *gtt = &gvt->gtt;
17585512445cSRikard Falkeborn 	const struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops;
17592707e444SZhi Wang 	struct intel_gvt_gtt_entry se;
1760ede9d0cfSChangbin Du 	int index;
17612707e444SZhi Wang 
1762ede9d0cfSChangbin Du 	if (!mm->ppgtt_mm.shadowed)
17632707e444SZhi Wang 		return;
17642707e444SZhi Wang 
1765ede9d0cfSChangbin Du 	for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.shadow_pdps); index++) {
1766ede9d0cfSChangbin Du 		ppgtt_get_shadow_root_entry(mm, &se, index);
1767ede9d0cfSChangbin Du 
17682707e444SZhi Wang 		if (!ops->test_present(&se))
17692707e444SZhi Wang 			continue;
1770ede9d0cfSChangbin Du 
1771d87f5ff3SChangbin Du 		ppgtt_invalidate_spt_by_shadow_entry(vgpu, &se);
17722707e444SZhi Wang 		se.val64 = 0;
1773ede9d0cfSChangbin Du 		ppgtt_set_shadow_root_entry(mm, &se, index);
17742707e444SZhi Wang 
177544b46733SChangbin Du 		trace_spt_guest_change(vgpu->id, "destroy root pointer",
1776ede9d0cfSChangbin Du 				       NULL, se.type, se.val64, index);
17772707e444SZhi Wang 	}
17782707e444SZhi Wang 
1779ede9d0cfSChangbin Du 	mm->ppgtt_mm.shadowed = false;
17802707e444SZhi Wang }
17812707e444SZhi Wang 
1782ede9d0cfSChangbin Du 
shadow_ppgtt_mm(struct intel_vgpu_mm * mm)1783ede9d0cfSChangbin Du static int shadow_ppgtt_mm(struct intel_vgpu_mm *mm)
17842707e444SZhi Wang {
17852707e444SZhi Wang 	struct intel_vgpu *vgpu = mm->vgpu;
17862707e444SZhi Wang 	struct intel_gvt *gvt = vgpu->gvt;
17872707e444SZhi Wang 	struct intel_gvt_gtt *gtt = &gvt->gtt;
17885512445cSRikard Falkeborn 	const struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops;
17892707e444SZhi Wang 	struct intel_vgpu_ppgtt_spt *spt;
17902707e444SZhi Wang 	struct intel_gvt_gtt_entry ge, se;
1791ede9d0cfSChangbin Du 	int index, ret;
17922707e444SZhi Wang 
1793ede9d0cfSChangbin Du 	if (mm->ppgtt_mm.shadowed)
17942707e444SZhi Wang 		return 0;
17952707e444SZhi Wang 
179696b138cdSSean Christopherson 	if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status))
179796b138cdSSean Christopherson 		return -EINVAL;
179896b138cdSSean Christopherson 
1799ede9d0cfSChangbin Du 	mm->ppgtt_mm.shadowed = true;
18002707e444SZhi Wang 
1801ede9d0cfSChangbin Du 	for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.guest_pdps); index++) {
1802ede9d0cfSChangbin Du 		ppgtt_get_guest_root_entry(mm, &ge, index);
1803ede9d0cfSChangbin Du 
18042707e444SZhi Wang 		if (!ops->test_present(&ge))
18052707e444SZhi Wang 			continue;
18062707e444SZhi Wang 
180744b46733SChangbin Du 		trace_spt_guest_change(vgpu->id, __func__, NULL,
1808ede9d0cfSChangbin Du 				       ge.type, ge.val64, index);
18092707e444SZhi Wang 
1810d87f5ff3SChangbin Du 		spt = ppgtt_populate_spt_by_guest_entry(vgpu, &ge);
18112707e444SZhi Wang 		if (IS_ERR(spt)) {
1812695fbc08STina Zhang 			gvt_vgpu_err("fail to populate guest root pointer\n");
18132707e444SZhi Wang 			ret = PTR_ERR(spt);
18142707e444SZhi Wang 			goto fail;
18152707e444SZhi Wang 		}
18162707e444SZhi Wang 		ppgtt_generate_shadow_entry(&se, spt, &ge);
1817ede9d0cfSChangbin Du 		ppgtt_set_shadow_root_entry(mm, &se, index);
18182707e444SZhi Wang 
181944b46733SChangbin Du 		trace_spt_guest_change(vgpu->id, "populate root pointer",
1820ede9d0cfSChangbin Du 				       NULL, se.type, se.val64, index);
18212707e444SZhi Wang 	}
1822ede9d0cfSChangbin Du 
18232707e444SZhi Wang 	return 0;
18242707e444SZhi Wang fail:
1825ede9d0cfSChangbin Du 	invalidate_ppgtt_mm(mm);
18262707e444SZhi Wang 	return ret;
18272707e444SZhi Wang }
18282707e444SZhi Wang 
vgpu_alloc_mm(struct intel_vgpu * vgpu)1829ede9d0cfSChangbin Du static struct intel_vgpu_mm *vgpu_alloc_mm(struct intel_vgpu *vgpu)
1830ede9d0cfSChangbin Du {
1831ede9d0cfSChangbin Du 	struct intel_vgpu_mm *mm;
1832ede9d0cfSChangbin Du 
1833ede9d0cfSChangbin Du 	mm = kzalloc(sizeof(*mm), GFP_KERNEL);
1834ede9d0cfSChangbin Du 	if (!mm)
1835ede9d0cfSChangbin Du 		return NULL;
1836ede9d0cfSChangbin Du 
1837ede9d0cfSChangbin Du 	mm->vgpu = vgpu;
1838ede9d0cfSChangbin Du 	kref_init(&mm->ref);
1839ede9d0cfSChangbin Du 	atomic_set(&mm->pincount, 0);
1840ede9d0cfSChangbin Du 
1841ede9d0cfSChangbin Du 	return mm;
1842ede9d0cfSChangbin Du }
1843ede9d0cfSChangbin Du 
vgpu_free_mm(struct intel_vgpu_mm * mm)1844ede9d0cfSChangbin Du static void vgpu_free_mm(struct intel_vgpu_mm *mm)
1845ede9d0cfSChangbin Du {
1846ede9d0cfSChangbin Du 	kfree(mm);
1847ede9d0cfSChangbin Du }
1848ede9d0cfSChangbin Du 
18492707e444SZhi Wang /**
1850ede9d0cfSChangbin Du  * intel_vgpu_create_ppgtt_mm - create a ppgtt mm object for a vGPU
18512707e444SZhi Wang  * @vgpu: a vGPU
1852ede9d0cfSChangbin Du  * @root_entry_type: ppgtt root entry type
1853ede9d0cfSChangbin Du  * @pdps: guest pdps.
18542707e444SZhi Wang  *
1855ede9d0cfSChangbin Du  * This function is used to create a ppgtt mm object for a vGPU.
18562707e444SZhi Wang  *
18572707e444SZhi Wang  * Returns:
18582707e444SZhi Wang  * Zero on success, negative error code in pointer if failed.
18592707e444SZhi Wang  */
intel_vgpu_create_ppgtt_mm(struct intel_vgpu * vgpu,enum intel_gvt_gtt_type root_entry_type,u64 pdps[])1860ede9d0cfSChangbin Du struct intel_vgpu_mm *intel_vgpu_create_ppgtt_mm(struct intel_vgpu *vgpu,
18610cf8f58dSAleksei Gimbitskii 		enum intel_gvt_gtt_type root_entry_type, u64 pdps[])
18622707e444SZhi Wang {
18632707e444SZhi Wang 	struct intel_gvt *gvt = vgpu->gvt;
18642707e444SZhi Wang 	struct intel_vgpu_mm *mm;
18652707e444SZhi Wang 	int ret;
18662707e444SZhi Wang 
1867ede9d0cfSChangbin Du 	mm = vgpu_alloc_mm(vgpu);
1868ede9d0cfSChangbin Du 	if (!mm)
1869ede9d0cfSChangbin Du 		return ERR_PTR(-ENOMEM);
18702707e444SZhi Wang 
1871ede9d0cfSChangbin Du 	mm->type = INTEL_GVT_MM_PPGTT;
18722707e444SZhi Wang 
1873ede9d0cfSChangbin Du 	GEM_BUG_ON(root_entry_type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY &&
1874ede9d0cfSChangbin Du 		   root_entry_type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY);
1875ede9d0cfSChangbin Du 	mm->ppgtt_mm.root_entry_type = root_entry_type;
18762707e444SZhi Wang 
1877ede9d0cfSChangbin Du 	INIT_LIST_HEAD(&mm->ppgtt_mm.list);
1878ede9d0cfSChangbin Du 	INIT_LIST_HEAD(&mm->ppgtt_mm.lru_list);
1879bec3df93SZhenyu Wang 	INIT_LIST_HEAD(&mm->ppgtt_mm.link);
18802707e444SZhi Wang 
1881ede9d0cfSChangbin Du 	if (root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY)
1882ede9d0cfSChangbin Du 		mm->ppgtt_mm.guest_pdps[0] = pdps[0];
1883ede9d0cfSChangbin Du 	else
1884ede9d0cfSChangbin Du 		memcpy(mm->ppgtt_mm.guest_pdps, pdps,
1885ede9d0cfSChangbin Du 		       sizeof(mm->ppgtt_mm.guest_pdps));
18862707e444SZhi Wang 
1887ede9d0cfSChangbin Du 	ret = shadow_ppgtt_mm(mm);
18882707e444SZhi Wang 	if (ret) {
1889ede9d0cfSChangbin Du 		gvt_vgpu_err("failed to shadow ppgtt mm\n");
1890ede9d0cfSChangbin Du 		vgpu_free_mm(mm);
18912707e444SZhi Wang 		return ERR_PTR(ret);
18922707e444SZhi Wang 	}
18932707e444SZhi Wang 
1894ede9d0cfSChangbin Du 	list_add_tail(&mm->ppgtt_mm.list, &vgpu->gtt.ppgtt_mm_list_head);
189572aabfb8SZhenyu Wang 
189672aabfb8SZhenyu Wang 	mutex_lock(&gvt->gtt.ppgtt_mm_lock);
1897ede9d0cfSChangbin Du 	list_add_tail(&mm->ppgtt_mm.lru_list, &gvt->gtt.ppgtt_mm_lru_list_head);
189872aabfb8SZhenyu Wang 	mutex_unlock(&gvt->gtt.ppgtt_mm_lock);
189972aabfb8SZhenyu Wang 
1900ede9d0cfSChangbin Du 	return mm;
1901ede9d0cfSChangbin Du }
1902ede9d0cfSChangbin Du 
intel_vgpu_create_ggtt_mm(struct intel_vgpu * vgpu)1903ede9d0cfSChangbin Du static struct intel_vgpu_mm *intel_vgpu_create_ggtt_mm(struct intel_vgpu *vgpu)
1904ede9d0cfSChangbin Du {
1905ede9d0cfSChangbin Du 	struct intel_vgpu_mm *mm;
1906ede9d0cfSChangbin Du 	unsigned long nr_entries;
1907ede9d0cfSChangbin Du 
1908ede9d0cfSChangbin Du 	mm = vgpu_alloc_mm(vgpu);
1909ede9d0cfSChangbin Du 	if (!mm)
1910ede9d0cfSChangbin Du 		return ERR_PTR(-ENOMEM);
1911ede9d0cfSChangbin Du 
1912ede9d0cfSChangbin Du 	mm->type = INTEL_GVT_MM_GGTT;
1913ede9d0cfSChangbin Du 
1914ede9d0cfSChangbin Du 	nr_entries = gvt_ggtt_gm_sz(vgpu->gvt) >> I915_GTT_PAGE_SHIFT;
1915fad953ceSKees Cook 	mm->ggtt_mm.virtual_ggtt =
1916fad953ceSKees Cook 		vzalloc(array_size(nr_entries,
1917fad953ceSKees Cook 				   vgpu->gvt->device_info.gtt_entry_size));
1918ede9d0cfSChangbin Du 	if (!mm->ggtt_mm.virtual_ggtt) {
1919ede9d0cfSChangbin Du 		vgpu_free_mm(mm);
1920ede9d0cfSChangbin Du 		return ERR_PTR(-ENOMEM);
1921ede9d0cfSChangbin Du 	}
1922ede9d0cfSChangbin Du 
19235f60b12eSColin Xu 	mm->ggtt_mm.host_ggtt_aperture = vzalloc((vgpu_aperture_sz(vgpu) >> PAGE_SHIFT) * sizeof(u64));
19245f60b12eSColin Xu 	if (!mm->ggtt_mm.host_ggtt_aperture) {
19255f60b12eSColin Xu 		vfree(mm->ggtt_mm.virtual_ggtt);
19265f60b12eSColin Xu 		vgpu_free_mm(mm);
19275f60b12eSColin Xu 		return ERR_PTR(-ENOMEM);
19285f60b12eSColin Xu 	}
19295f60b12eSColin Xu 
19305f60b12eSColin Xu 	mm->ggtt_mm.host_ggtt_hidden = vzalloc((vgpu_hidden_sz(vgpu) >> PAGE_SHIFT) * sizeof(u64));
19315f60b12eSColin Xu 	if (!mm->ggtt_mm.host_ggtt_hidden) {
19325f60b12eSColin Xu 		vfree(mm->ggtt_mm.host_ggtt_aperture);
19335f60b12eSColin Xu 		vfree(mm->ggtt_mm.virtual_ggtt);
19345f60b12eSColin Xu 		vgpu_free_mm(mm);
19355f60b12eSColin Xu 		return ERR_PTR(-ENOMEM);
19365f60b12eSColin Xu 	}
19375f60b12eSColin Xu 
1938ede9d0cfSChangbin Du 	return mm;
1939ede9d0cfSChangbin Du }
1940ede9d0cfSChangbin Du 
1941ede9d0cfSChangbin Du /**
19421bc25851SChangbin Du  * _intel_vgpu_mm_release - destroy a mm object
1943ede9d0cfSChangbin Du  * @mm_ref: a kref object
1944ede9d0cfSChangbin Du  *
1945ede9d0cfSChangbin Du  * This function is used to destroy a mm object for vGPU
1946ede9d0cfSChangbin Du  *
1947ede9d0cfSChangbin Du  */
_intel_vgpu_mm_release(struct kref * mm_ref)19481bc25851SChangbin Du void _intel_vgpu_mm_release(struct kref *mm_ref)
1949ede9d0cfSChangbin Du {
1950ede9d0cfSChangbin Du 	struct intel_vgpu_mm *mm = container_of(mm_ref, typeof(*mm), ref);
1951ede9d0cfSChangbin Du 
1952ede9d0cfSChangbin Du 	if (GEM_WARN_ON(atomic_read(&mm->pincount)))
1953ede9d0cfSChangbin Du 		gvt_err("vgpu mm pin count bug detected\n");
1954ede9d0cfSChangbin Du 
1955ede9d0cfSChangbin Du 	if (mm->type == INTEL_GVT_MM_PPGTT) {
1956ede9d0cfSChangbin Du 		list_del(&mm->ppgtt_mm.list);
19570e9d7bb2SIgor Druzhinin 
19580e9d7bb2SIgor Druzhinin 		mutex_lock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock);
1959ede9d0cfSChangbin Du 		list_del(&mm->ppgtt_mm.lru_list);
19600e9d7bb2SIgor Druzhinin 		mutex_unlock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock);
19610e9d7bb2SIgor Druzhinin 
1962ede9d0cfSChangbin Du 		invalidate_ppgtt_mm(mm);
1963ede9d0cfSChangbin Du 	} else {
1964ede9d0cfSChangbin Du 		vfree(mm->ggtt_mm.virtual_ggtt);
19655f60b12eSColin Xu 		vfree(mm->ggtt_mm.host_ggtt_aperture);
19665f60b12eSColin Xu 		vfree(mm->ggtt_mm.host_ggtt_hidden);
1967ede9d0cfSChangbin Du 	}
1968ede9d0cfSChangbin Du 
1969ede9d0cfSChangbin Du 	vgpu_free_mm(mm);
1970ede9d0cfSChangbin Du }
1971ede9d0cfSChangbin Du 
19722707e444SZhi Wang /**
19732707e444SZhi Wang  * intel_vgpu_unpin_mm - decrease the pin count of a vGPU mm object
19742707e444SZhi Wang  * @mm: a vGPU mm object
19752707e444SZhi Wang  *
19762707e444SZhi Wang  * This function is called when user doesn't want to use a vGPU mm object
19772707e444SZhi Wang  */
intel_vgpu_unpin_mm(struct intel_vgpu_mm * mm)19782707e444SZhi Wang void intel_vgpu_unpin_mm(struct intel_vgpu_mm *mm)
19792707e444SZhi Wang {
1980663a50ceSYan Zhao 	atomic_dec_if_positive(&mm->pincount);
19812707e444SZhi Wang }
19822707e444SZhi Wang 
19832707e444SZhi Wang /**
19842707e444SZhi Wang  * intel_vgpu_pin_mm - increase the pin count of a vGPU mm object
1985a752b070SZhenyu Wang  * @mm: target vgpu mm
19862707e444SZhi Wang  *
19872707e444SZhi Wang  * This function is called when user wants to use a vGPU mm object. If this
19882707e444SZhi Wang  * mm object hasn't been shadowed yet, the shadow will be populated at this
19892707e444SZhi Wang  * time.
19902707e444SZhi Wang  *
19912707e444SZhi Wang  * Returns:
19922707e444SZhi Wang  * Zero on success, negative error code if failed.
19932707e444SZhi Wang  */
intel_vgpu_pin_mm(struct intel_vgpu_mm * mm)19942707e444SZhi Wang int intel_vgpu_pin_mm(struct intel_vgpu_mm *mm)
19952707e444SZhi Wang {
19962707e444SZhi Wang 	int ret;
19972707e444SZhi Wang 
1998ede9d0cfSChangbin Du 	atomic_inc(&mm->pincount);
19992707e444SZhi Wang 
2000ede9d0cfSChangbin Du 	if (mm->type == INTEL_GVT_MM_PPGTT) {
2001ede9d0cfSChangbin Du 		ret = shadow_ppgtt_mm(mm);
20022707e444SZhi Wang 		if (ret)
20032707e444SZhi Wang 			return ret;
2004ede9d0cfSChangbin Du 
200572aabfb8SZhenyu Wang 		mutex_lock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock);
2006ede9d0cfSChangbin Du 		list_move_tail(&mm->ppgtt_mm.lru_list,
2007ede9d0cfSChangbin Du 			       &mm->vgpu->gvt->gtt.ppgtt_mm_lru_list_head);
200872aabfb8SZhenyu Wang 		mutex_unlock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock);
20092707e444SZhi Wang 	}
20102707e444SZhi Wang 
20112707e444SZhi Wang 	return 0;
20122707e444SZhi Wang }
20132707e444SZhi Wang 
reclaim_one_ppgtt_mm(struct intel_gvt * gvt)2014ede9d0cfSChangbin Du static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt)
20152707e444SZhi Wang {
20162707e444SZhi Wang 	struct intel_vgpu_mm *mm;
20172707e444SZhi Wang 	struct list_head *pos, *n;
20182707e444SZhi Wang 
201972aabfb8SZhenyu Wang 	mutex_lock(&gvt->gtt.ppgtt_mm_lock);
202072aabfb8SZhenyu Wang 
2021ede9d0cfSChangbin Du 	list_for_each_safe(pos, n, &gvt->gtt.ppgtt_mm_lru_list_head) {
2022ede9d0cfSChangbin Du 		mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.lru_list);
20232707e444SZhi Wang 
20242707e444SZhi Wang 		if (atomic_read(&mm->pincount))
20252707e444SZhi Wang 			continue;
20262707e444SZhi Wang 
2027ede9d0cfSChangbin Du 		list_del_init(&mm->ppgtt_mm.lru_list);
202872aabfb8SZhenyu Wang 		mutex_unlock(&gvt->gtt.ppgtt_mm_lock);
2029ede9d0cfSChangbin Du 		invalidate_ppgtt_mm(mm);
20302707e444SZhi Wang 		return 1;
20312707e444SZhi Wang 	}
203272aabfb8SZhenyu Wang 	mutex_unlock(&gvt->gtt.ppgtt_mm_lock);
20332707e444SZhi Wang 	return 0;
20342707e444SZhi Wang }
20352707e444SZhi Wang 
20362707e444SZhi Wang /*
20372707e444SZhi Wang  * GMA translation APIs.
20382707e444SZhi Wang  */
ppgtt_get_next_level_entry(struct intel_vgpu_mm * mm,struct intel_gvt_gtt_entry * e,unsigned long index,bool guest)20392707e444SZhi Wang static inline int ppgtt_get_next_level_entry(struct intel_vgpu_mm *mm,
20402707e444SZhi Wang 		struct intel_gvt_gtt_entry *e, unsigned long index, bool guest)
20412707e444SZhi Wang {
20422707e444SZhi Wang 	struct intel_vgpu *vgpu = mm->vgpu;
20435512445cSRikard Falkeborn 	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
20442707e444SZhi Wang 	struct intel_vgpu_ppgtt_spt *s;
20452707e444SZhi Wang 
204644b46733SChangbin Du 	s = intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(e));
20472707e444SZhi Wang 	if (!s)
20482707e444SZhi Wang 		return -ENXIO;
20492707e444SZhi Wang 
20502707e444SZhi Wang 	if (!guest)
20512707e444SZhi Wang 		ppgtt_get_shadow_entry(s, e, index);
20522707e444SZhi Wang 	else
20532707e444SZhi Wang 		ppgtt_get_guest_entry(s, e, index);
20542707e444SZhi Wang 	return 0;
20552707e444SZhi Wang }
20562707e444SZhi Wang 
20572707e444SZhi Wang /**
20582707e444SZhi Wang  * intel_vgpu_gma_to_gpa - translate a gma to GPA
20592707e444SZhi Wang  * @mm: mm object. could be a PPGTT or GGTT mm object
20602707e444SZhi Wang  * @gma: graphics memory address in this mm object
20612707e444SZhi Wang  *
20622707e444SZhi Wang  * This function is used to translate a graphics memory address in specific
20632707e444SZhi Wang  * graphics memory space to guest physical address.
20642707e444SZhi Wang  *
20652707e444SZhi Wang  * Returns:
20662707e444SZhi Wang  * Guest physical address on success, INTEL_GVT_INVALID_ADDR if failed.
20672707e444SZhi Wang  */
intel_vgpu_gma_to_gpa(struct intel_vgpu_mm * mm,unsigned long gma)20682707e444SZhi Wang unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm, unsigned long gma)
20692707e444SZhi Wang {
20702707e444SZhi Wang 	struct intel_vgpu *vgpu = mm->vgpu;
20712707e444SZhi Wang 	struct intel_gvt *gvt = vgpu->gvt;
20725512445cSRikard Falkeborn 	const struct intel_gvt_gtt_pte_ops *pte_ops = gvt->gtt.pte_ops;
2073c41aadd2SRikard Falkeborn 	const struct intel_gvt_gtt_gma_ops *gma_ops = gvt->gtt.gma_ops;
20742707e444SZhi Wang 	unsigned long gpa = INTEL_GVT_INVALID_ADDR;
20752707e444SZhi Wang 	unsigned long gma_index[4];
20762707e444SZhi Wang 	struct intel_gvt_gtt_entry e;
2077ede9d0cfSChangbin Du 	int i, levels = 0;
20782707e444SZhi Wang 	int ret;
20792707e444SZhi Wang 
2080ede9d0cfSChangbin Du 	GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT &&
2081ede9d0cfSChangbin Du 		   mm->type != INTEL_GVT_MM_PPGTT);
20822707e444SZhi Wang 
20832707e444SZhi Wang 	if (mm->type == INTEL_GVT_MM_GGTT) {
20842707e444SZhi Wang 		if (!vgpu_gmadr_is_valid(vgpu, gma))
20852707e444SZhi Wang 			goto err;
20862707e444SZhi Wang 
2087ede9d0cfSChangbin Du 		ggtt_get_guest_entry(mm, &e,
20882707e444SZhi Wang 			gma_ops->gma_to_ggtt_pte_index(gma));
2089ede9d0cfSChangbin Du 
20909556e118SZhi Wang 		gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT)
20919556e118SZhi Wang 			+ (gma & ~I915_GTT_PAGE_MASK);
20922707e444SZhi Wang 
20932707e444SZhi Wang 		trace_gma_translate(vgpu->id, "ggtt", 0, 0, gma, gpa);
2094ede9d0cfSChangbin Du 	} else {
2095ede9d0cfSChangbin Du 		switch (mm->ppgtt_mm.root_entry_type) {
2096ede9d0cfSChangbin Du 		case GTT_TYPE_PPGTT_ROOT_L4_ENTRY:
2097ede9d0cfSChangbin Du 			ppgtt_get_shadow_root_entry(mm, &e, 0);
20982707e444SZhi Wang 
20992707e444SZhi Wang 			gma_index[0] = gma_ops->gma_to_pml4_index(gma);
21002707e444SZhi Wang 			gma_index[1] = gma_ops->gma_to_l4_pdp_index(gma);
21012707e444SZhi Wang 			gma_index[2] = gma_ops->gma_to_pde_index(gma);
21022707e444SZhi Wang 			gma_index[3] = gma_ops->gma_to_pte_index(gma);
2103ede9d0cfSChangbin Du 			levels = 4;
21042707e444SZhi Wang 			break;
2105ede9d0cfSChangbin Du 		case GTT_TYPE_PPGTT_ROOT_L3_ENTRY:
2106ede9d0cfSChangbin Du 			ppgtt_get_shadow_root_entry(mm, &e,
21072707e444SZhi Wang 					gma_ops->gma_to_l3_pdp_index(gma));
2108ede9d0cfSChangbin Du 
21092707e444SZhi Wang 			gma_index[0] = gma_ops->gma_to_pde_index(gma);
21102707e444SZhi Wang 			gma_index[1] = gma_ops->gma_to_pte_index(gma);
2111ede9d0cfSChangbin Du 			levels = 2;
21122707e444SZhi Wang 			break;
21132707e444SZhi Wang 		default:
2114ede9d0cfSChangbin Du 			GEM_BUG_ON(1);
21152707e444SZhi Wang 		}
21162707e444SZhi Wang 
2117ede9d0cfSChangbin Du 		/* walk the shadow page table and get gpa from guest entry */
2118ede9d0cfSChangbin Du 		for (i = 0; i < levels; i++) {
21192707e444SZhi Wang 			ret = ppgtt_get_next_level_entry(mm, &e, gma_index[i],
2120ede9d0cfSChangbin Du 				(i == levels - 1));
21212707e444SZhi Wang 			if (ret)
21222707e444SZhi Wang 				goto err;
21234b2dbbc2SChangbin Du 
21244b2dbbc2SChangbin Du 			if (!pte_ops->test_present(&e)) {
21254b2dbbc2SChangbin Du 				gvt_dbg_core("GMA 0x%lx is not present\n", gma);
21264b2dbbc2SChangbin Du 				goto err;
21274b2dbbc2SChangbin Du 			}
21282707e444SZhi Wang 		}
21292707e444SZhi Wang 
2130ede9d0cfSChangbin Du 		gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT) +
2131ede9d0cfSChangbin Du 					(gma & ~I915_GTT_PAGE_MASK);
21322707e444SZhi Wang 		trace_gma_translate(vgpu->id, "ppgtt", 0,
2133ede9d0cfSChangbin Du 				    mm->ppgtt_mm.root_entry_type, gma, gpa);
2134ede9d0cfSChangbin Du 	}
2135ede9d0cfSChangbin Du 
21362707e444SZhi Wang 	return gpa;
21372707e444SZhi Wang err:
2138695fbc08STina Zhang 	gvt_vgpu_err("invalid mm type: %d gma %lx\n", mm->type, gma);
21392707e444SZhi Wang 	return INTEL_GVT_INVALID_ADDR;
21402707e444SZhi Wang }
21412707e444SZhi Wang 
emulate_ggtt_mmio_read(struct intel_vgpu * vgpu,unsigned int off,void * p_data,unsigned int bytes)2142a143cef7SChangbin Du static int emulate_ggtt_mmio_read(struct intel_vgpu *vgpu,
21432707e444SZhi Wang 	unsigned int off, void *p_data, unsigned int bytes)
21442707e444SZhi Wang {
21452707e444SZhi Wang 	struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm;
21462707e444SZhi Wang 	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
21472707e444SZhi Wang 	unsigned long index = off >> info->gtt_entry_size_shift;
2148d18fd057SXiong Zhang 	unsigned long gma;
21492707e444SZhi Wang 	struct intel_gvt_gtt_entry e;
21502707e444SZhi Wang 
21512707e444SZhi Wang 	if (bytes != 4 && bytes != 8)
21522707e444SZhi Wang 		return -EINVAL;
21532707e444SZhi Wang 
2154d18fd057SXiong Zhang 	gma = index << I915_GTT_PAGE_SHIFT;
2155d18fd057SXiong Zhang 	if (!intel_gvt_ggtt_validate_range(vgpu,
2156d18fd057SXiong Zhang 					   gma, 1 << I915_GTT_PAGE_SHIFT)) {
2157d18fd057SXiong Zhang 		gvt_dbg_mm("read invalid ggtt at 0x%lx\n", gma);
2158d18fd057SXiong Zhang 		memset(p_data, 0, bytes);
2159d18fd057SXiong Zhang 		return 0;
2160d18fd057SXiong Zhang 	}
2161d18fd057SXiong Zhang 
21622707e444SZhi Wang 	ggtt_get_guest_entry(ggtt_mm, &e, index);
21632707e444SZhi Wang 	memcpy(p_data, (void *)&e.val64 + (off & (info->gtt_entry_size - 1)),
21642707e444SZhi Wang 			bytes);
21652707e444SZhi Wang 	return 0;
21662707e444SZhi Wang }
21672707e444SZhi Wang 
21682707e444SZhi Wang /**
216991c64a4fSRandy Dunlap  * intel_vgpu_emulate_ggtt_mmio_read - emulate GTT MMIO register read
21702707e444SZhi Wang  * @vgpu: a vGPU
21712707e444SZhi Wang  * @off: register offset
21722707e444SZhi Wang  * @p_data: data will be returned to guest
21732707e444SZhi Wang  * @bytes: data length
21742707e444SZhi Wang  *
21752707e444SZhi Wang  * This function is used to emulate the GTT MMIO register read
21762707e444SZhi Wang  *
21772707e444SZhi Wang  * Returns:
21782707e444SZhi Wang  * Zero on success, error code if failed.
21792707e444SZhi Wang  */
intel_vgpu_emulate_ggtt_mmio_read(struct intel_vgpu * vgpu,unsigned int off,void * p_data,unsigned int bytes)2180a143cef7SChangbin Du int intel_vgpu_emulate_ggtt_mmio_read(struct intel_vgpu *vgpu, unsigned int off,
21812707e444SZhi Wang 	void *p_data, unsigned int bytes)
21822707e444SZhi Wang {
21832707e444SZhi Wang 	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
21842707e444SZhi Wang 	int ret;
21852707e444SZhi Wang 
21862707e444SZhi Wang 	if (bytes != 4 && bytes != 8)
21872707e444SZhi Wang 		return -EINVAL;
21882707e444SZhi Wang 
21892707e444SZhi Wang 	off -= info->gtt_start_offset;
2190a143cef7SChangbin Du 	ret = emulate_ggtt_mmio_read(vgpu, off, p_data, bytes);
21912707e444SZhi Wang 	return ret;
21922707e444SZhi Wang }
21932707e444SZhi Wang 
ggtt_invalidate_pte(struct intel_vgpu * vgpu,struct intel_gvt_gtt_entry * entry)21947598e870SChangbin Du static void ggtt_invalidate_pte(struct intel_vgpu *vgpu,
21957598e870SChangbin Du 		struct intel_gvt_gtt_entry *entry)
21967598e870SChangbin Du {
21975512445cSRikard Falkeborn 	const struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops;
21987598e870SChangbin Du 	unsigned long pfn;
21997598e870SChangbin Du 
22007598e870SChangbin Du 	pfn = pte_ops->get_pfn(entry);
22017598e870SChangbin Du 	if (pfn != vgpu->gvt->gtt.scratch_mfn)
22028398eee8SChristoph Hellwig 		intel_gvt_dma_unmap_guest_page(vgpu, pfn << PAGE_SHIFT);
22037598e870SChangbin Du }
22047598e870SChangbin Du 
emulate_ggtt_mmio_write(struct intel_vgpu * vgpu,unsigned int off,void * p_data,unsigned int bytes)2205a143cef7SChangbin Du static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
22062707e444SZhi Wang 	void *p_data, unsigned int bytes)
22072707e444SZhi Wang {
22082707e444SZhi Wang 	struct intel_gvt *gvt = vgpu->gvt;
22092707e444SZhi Wang 	const struct intel_gvt_device_info *info = &gvt->device_info;
22102707e444SZhi Wang 	struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm;
22115512445cSRikard Falkeborn 	const struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
22122707e444SZhi Wang 	unsigned long g_gtt_index = off >> info->gtt_entry_size_shift;
2213cf4ee73fSChangbin Du 	unsigned long gma, gfn;
2214387a4c2bSTina Zhang 	struct intel_gvt_gtt_entry e = {.val64 = 0, .type = GTT_TYPE_GGTT_PTE};
2215387a4c2bSTina Zhang 	struct intel_gvt_gtt_entry m = {.val64 = 0, .type = GTT_TYPE_GGTT_PTE};
2216cf4ee73fSChangbin Du 	dma_addr_t dma_addr;
2217cf4ee73fSChangbin Du 	int ret;
2218bc0686ffSHang Yuan 	struct intel_gvt_partial_pte *partial_pte, *pos, *n;
2219bc0686ffSHang Yuan 	bool partial_update = false;
22202707e444SZhi Wang 
22212707e444SZhi Wang 	if (bytes != 4 && bytes != 8)
22222707e444SZhi Wang 		return -EINVAL;
22232707e444SZhi Wang 
22249556e118SZhi Wang 	gma = g_gtt_index << I915_GTT_PAGE_SHIFT;
22252707e444SZhi Wang 
22262707e444SZhi Wang 	/* the VM may configure the whole GM space when ballooning is used */
22277c28135cSZhao, Xinda 	if (!vgpu_gmadr_is_valid(vgpu, gma))
22282707e444SZhi Wang 		return 0;
22292707e444SZhi Wang 
2230bc0686ffSHang Yuan 	e.type = GTT_TYPE_GGTT_PTE;
22312707e444SZhi Wang 	memcpy((void *)&e.val64 + (off & (info->gtt_entry_size - 1)), p_data,
22322707e444SZhi Wang 			bytes);
22332707e444SZhi Wang 
2234510fe10bSZhao Yan 	/* If ggtt entry size is 8 bytes, and it's split into two 4 bytes
2235bc0686ffSHang Yuan 	 * write, save the first 4 bytes in a list and update virtual
2236bc0686ffSHang Yuan 	 * PTE. Only update shadow PTE when the second 4 bytes comes.
2237510fe10bSZhao Yan 	 */
2238510fe10bSZhao Yan 	if (bytes < info->gtt_entry_size) {
2239bc0686ffSHang Yuan 		bool found = false;
2240510fe10bSZhao Yan 
2241bc0686ffSHang Yuan 		list_for_each_entry_safe(pos, n,
2242bc0686ffSHang Yuan 				&ggtt_mm->ggtt_mm.partial_pte_list, list) {
2243bc0686ffSHang Yuan 			if (g_gtt_index == pos->offset >>
2244bc0686ffSHang Yuan 					info->gtt_entry_size_shift) {
2245bc0686ffSHang Yuan 				if (off != pos->offset) {
2246bc0686ffSHang Yuan 					/* the second partial part*/
2247bc0686ffSHang Yuan 					int last_off = pos->offset &
2248510fe10bSZhao Yan 						(info->gtt_entry_size - 1);
2249510fe10bSZhao Yan 
2250510fe10bSZhao Yan 					memcpy((void *)&e.val64 + last_off,
2251bc0686ffSHang Yuan 						(void *)&pos->data + last_off,
2252bc0686ffSHang Yuan 						bytes);
2253510fe10bSZhao Yan 
2254bc0686ffSHang Yuan 					list_del(&pos->list);
2255bc0686ffSHang Yuan 					kfree(pos);
2256bc0686ffSHang Yuan 					found = true;
2257bc0686ffSHang Yuan 					break;
2258bc0686ffSHang Yuan 				}
2259510fe10bSZhao Yan 
2260bc0686ffSHang Yuan 				/* update of the first partial part */
2261bc0686ffSHang Yuan 				pos->data = e.val64;
2262bc0686ffSHang Yuan 				ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index);
2263510fe10bSZhao Yan 				return 0;
2264510fe10bSZhao Yan 			}
2265510fe10bSZhao Yan 		}
2266510fe10bSZhao Yan 
2267bc0686ffSHang Yuan 		if (!found) {
2268bc0686ffSHang Yuan 			/* the first partial part */
2269bc0686ffSHang Yuan 			partial_pte = kzalloc(sizeof(*partial_pte), GFP_KERNEL);
2270bc0686ffSHang Yuan 			if (!partial_pte)
2271bc0686ffSHang Yuan 				return -ENOMEM;
2272bc0686ffSHang Yuan 			partial_pte->offset = off;
2273bc0686ffSHang Yuan 			partial_pte->data = e.val64;
2274bc0686ffSHang Yuan 			list_add_tail(&partial_pte->list,
2275bc0686ffSHang Yuan 				&ggtt_mm->ggtt_mm.partial_pte_list);
2276bc0686ffSHang Yuan 			partial_update = true;
2277bc0686ffSHang Yuan 		}
2278bc0686ffSHang Yuan 	}
2279bc0686ffSHang Yuan 
2280bc0686ffSHang Yuan 	if (!partial_update && (ops->test_present(&e))) {
2281cc753fbeSHang Yuan 		gfn = ops->get_pfn(&e);
2282387a4c2bSTina Zhang 		m.val64 = e.val64;
2283387a4c2bSTina Zhang 		m.type = e.type;
2284cc753fbeSHang Yuan 
22858398eee8SChristoph Hellwig 		ret = intel_gvt_dma_map_guest_page(vgpu, gfn, PAGE_SIZE,
22868398eee8SChristoph Hellwig 						   &dma_addr);
2287cf4ee73fSChangbin Du 		if (ret) {
228872f03d7eSChangbin Du 			gvt_vgpu_err("fail to populate guest ggtt entry\n");
2289359b6931SXiaoguang Chen 			/* guest driver may read/write the entry when partial
2290359b6931SXiaoguang Chen 			 * update the entry in this situation p2m will fail
229128adfe40SJulia Lawall 			 * setting the shadow entry to point to a scratch page
2292359b6931SXiaoguang Chen 			 */
229322115cefSZhi Wang 			ops->set_pfn(&m, gvt->gtt.scratch_mfn);
229472f03d7eSChangbin Du 		} else
2295cf4ee73fSChangbin Du 			ops->set_pfn(&m, dma_addr >> PAGE_SHIFT);
22967598e870SChangbin Du 	} else {
229722115cefSZhi Wang 		ops->set_pfn(&m, gvt->gtt.scratch_mfn);
22987598e870SChangbin Du 		ops->clear_present(&m);
22997598e870SChangbin Du 	}
23002707e444SZhi Wang 
2301f42259efSHang Yuan 	ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index);
2302f42259efSHang Yuan 
2303f42259efSHang Yuan 	ggtt_get_host_entry(ggtt_mm, &e, g_gtt_index);
2304f42259efSHang Yuan 	ggtt_invalidate_pte(vgpu, &e);
2305f42259efSHang Yuan 
23063aff3512SChangbin Du 	ggtt_set_host_entry(ggtt_mm, &m, g_gtt_index);
2307a61ac1e7SChris Wilson 	ggtt_invalidate(gvt->gt);
23082707e444SZhi Wang 	return 0;
23092707e444SZhi Wang }
23102707e444SZhi Wang 
23112707e444SZhi Wang /*
2312a143cef7SChangbin Du  * intel_vgpu_emulate_ggtt_mmio_write - emulate GTT MMIO register write
23132707e444SZhi Wang  * @vgpu: a vGPU
23142707e444SZhi Wang  * @off: register offset
23152707e444SZhi Wang  * @p_data: data from guest write
23162707e444SZhi Wang  * @bytes: data length
23172707e444SZhi Wang  *
23182707e444SZhi Wang  * This function is used to emulate the GTT MMIO register write
23192707e444SZhi Wang  *
23202707e444SZhi Wang  * Returns:
23212707e444SZhi Wang  * Zero on success, error code if failed.
23222707e444SZhi Wang  */
intel_vgpu_emulate_ggtt_mmio_write(struct intel_vgpu * vgpu,unsigned int off,void * p_data,unsigned int bytes)2323a143cef7SChangbin Du int intel_vgpu_emulate_ggtt_mmio_write(struct intel_vgpu *vgpu,
2324a143cef7SChangbin Du 		unsigned int off, void *p_data, unsigned int bytes)
23252707e444SZhi Wang {
23262707e444SZhi Wang 	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
23272707e444SZhi Wang 	int ret;
2328fb55c735SYan Zhao 	struct intel_vgpu_submission *s = &vgpu->submission;
2329fb55c735SYan Zhao 	struct intel_engine_cs *engine;
2330fb55c735SYan Zhao 	int i;
23312707e444SZhi Wang 
23322707e444SZhi Wang 	if (bytes != 4 && bytes != 8)
23332707e444SZhi Wang 		return -EINVAL;
23342707e444SZhi Wang 
23352707e444SZhi Wang 	off -= info->gtt_start_offset;
2336a143cef7SChangbin Du 	ret = emulate_ggtt_mmio_write(vgpu, off, p_data, bytes);
2337fb55c735SYan Zhao 
2338fb55c735SYan Zhao 	/* if ggtt of last submitted context is written,
2339fb55c735SYan Zhao 	 * that context is probably got unpinned.
2340fb55c735SYan Zhao 	 * Set last shadowed ctx to invalid.
2341fb55c735SYan Zhao 	 */
2342fb55c735SYan Zhao 	for_each_engine(engine, vgpu->gvt->gt, i) {
2343fb55c735SYan Zhao 		if (!s->last_ctx[i].valid)
2344fb55c735SYan Zhao 			continue;
2345fb55c735SYan Zhao 
2346fb55c735SYan Zhao 		if (s->last_ctx[i].lrca == (off >> info->gtt_entry_size_shift))
2347fb55c735SYan Zhao 			s->last_ctx[i].valid = false;
2348fb55c735SYan Zhao 	}
23492707e444SZhi Wang 	return ret;
23502707e444SZhi Wang }
23512707e444SZhi Wang 
alloc_scratch_pages(struct intel_vgpu * vgpu,enum intel_gvt_gtt_type type)23523b6411c2SPing Gao static int alloc_scratch_pages(struct intel_vgpu *vgpu,
23530cf8f58dSAleksei Gimbitskii 		enum intel_gvt_gtt_type type)
23542707e444SZhi Wang {
2355a61ac1e7SChris Wilson 	struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
23562707e444SZhi Wang 	struct intel_vgpu_gtt *gtt = &vgpu->gtt;
23575512445cSRikard Falkeborn 	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
23585c35258dSZhenyu Wang 	int page_entry_num = I915_GTT_PAGE_SIZE >>
23593b6411c2SPing Gao 				vgpu->gvt->device_info.gtt_entry_size_shift;
23609631739fSJike Song 	void *scratch_pt;
23613b6411c2SPing Gao 	int i;
23629ff06c38SThomas Zimmermann 	struct device *dev = vgpu->gvt->gt->i915->drm.dev;
23635de6bd4cSChuanxiao Dong 	dma_addr_t daddr;
23642707e444SZhi Wang 
236512d58619SPankaj Bharadiya 	if (drm_WARN_ON(&i915->drm,
236612d58619SPankaj Bharadiya 			type < GTT_TYPE_PPGTT_PTE_PT || type >= GTT_TYPE_MAX))
23673b6411c2SPing Gao 		return -EINVAL;
23683b6411c2SPing Gao 
23699631739fSJike Song 	scratch_pt = (void *)get_zeroed_page(GFP_KERNEL);
23703b6411c2SPing Gao 	if (!scratch_pt) {
2371695fbc08STina Zhang 		gvt_vgpu_err("fail to allocate scratch page\n");
23722707e444SZhi Wang 		return -ENOMEM;
23732707e444SZhi Wang 	}
23742707e444SZhi Wang 
2375c4f61203SCai Huoqing 	daddr = dma_map_page(dev, virt_to_page(scratch_pt), 0, 4096, DMA_BIDIRECTIONAL);
23765de6bd4cSChuanxiao Dong 	if (dma_mapping_error(dev, daddr)) {
2377695fbc08STina Zhang 		gvt_vgpu_err("fail to dmamap scratch_pt\n");
23785de6bd4cSChuanxiao Dong 		__free_page(virt_to_page(scratch_pt));
23795de6bd4cSChuanxiao Dong 		return -ENOMEM;
23803b6411c2SPing Gao 	}
23815de6bd4cSChuanxiao Dong 	gtt->scratch_pt[type].page_mfn =
23825c35258dSZhenyu Wang 		(unsigned long)(daddr >> I915_GTT_PAGE_SHIFT);
23839631739fSJike Song 	gtt->scratch_pt[type].page = virt_to_page(scratch_pt);
23843b6411c2SPing Gao 	gvt_dbg_mm("vgpu%d create scratch_pt: type %d mfn=0x%lx\n",
23855de6bd4cSChuanxiao Dong 			vgpu->id, type, gtt->scratch_pt[type].page_mfn);
23863b6411c2SPing Gao 
23873b6411c2SPing Gao 	/* Build the tree by full filled the scratch pt with the entries which
23883b6411c2SPing Gao 	 * point to the next level scratch pt or scratch page. The
23893b6411c2SPing Gao 	 * scratch_pt[type] indicate the scratch pt/scratch page used by the
23903b6411c2SPing Gao 	 * 'type' pt.
23913b6411c2SPing Gao 	 * e.g. scratch_pt[GTT_TYPE_PPGTT_PDE_PT] is used by
23929631739fSJike Song 	 * GTT_TYPE_PPGTT_PDE_PT level pt, that means this scratch_pt it self
23933b6411c2SPing Gao 	 * is GTT_TYPE_PPGTT_PTE_PT, and full filled by scratch page mfn.
23943b6411c2SPing Gao 	 */
239565957195SXinyun Liu 	if (type > GTT_TYPE_PPGTT_PTE_PT) {
23963b6411c2SPing Gao 		struct intel_gvt_gtt_entry se;
23973b6411c2SPing Gao 
23983b6411c2SPing Gao 		memset(&se, 0, sizeof(struct intel_gvt_gtt_entry));
23993b6411c2SPing Gao 		se.type = get_entry_type(type - 1);
24003b6411c2SPing Gao 		ops->set_pfn(&se, gtt->scratch_pt[type - 1].page_mfn);
24013b6411c2SPing Gao 
24023b6411c2SPing Gao 		/* The entry parameters like present/writeable/cache type
24033b6411c2SPing Gao 		 * set to the same as i915's scratch page tree.
24043b6411c2SPing Gao 		 */
24055f978167SMichael Cheng 		se.val64 |= GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
24063b6411c2SPing Gao 		if (type == GTT_TYPE_PPGTT_PDE_PT)
2407c095b97cSZhi Wang 			se.val64 |= PPAT_CACHED;
24083b6411c2SPing Gao 
24093b6411c2SPing Gao 		for (i = 0; i < page_entry_num; i++)
24109631739fSJike Song 			ops->set_entry(scratch_pt, &se, i, false, 0, vgpu);
24112707e444SZhi Wang 	}
24122707e444SZhi Wang 
24132707e444SZhi Wang 	return 0;
24142707e444SZhi Wang }
24152707e444SZhi Wang 
release_scratch_page_tree(struct intel_vgpu * vgpu)24163b6411c2SPing Gao static int release_scratch_page_tree(struct intel_vgpu *vgpu)
24172707e444SZhi Wang {
24183b6411c2SPing Gao 	int i;
24199ff06c38SThomas Zimmermann 	struct device *dev = vgpu->gvt->gt->i915->drm.dev;
24205de6bd4cSChuanxiao Dong 	dma_addr_t daddr;
24213b6411c2SPing Gao 
24223b6411c2SPing Gao 	for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) {
24233b6411c2SPing Gao 		if (vgpu->gtt.scratch_pt[i].page != NULL) {
24245de6bd4cSChuanxiao Dong 			daddr = (dma_addr_t)(vgpu->gtt.scratch_pt[i].page_mfn <<
24255c35258dSZhenyu Wang 					I915_GTT_PAGE_SHIFT);
2426c4f61203SCai Huoqing 			dma_unmap_page(dev, daddr, 4096, DMA_BIDIRECTIONAL);
24273b6411c2SPing Gao 			__free_page(vgpu->gtt.scratch_pt[i].page);
24283b6411c2SPing Gao 			vgpu->gtt.scratch_pt[i].page = NULL;
24293b6411c2SPing Gao 			vgpu->gtt.scratch_pt[i].page_mfn = 0;
24302707e444SZhi Wang 		}
24312707e444SZhi Wang 	}
24322707e444SZhi Wang 
24333b6411c2SPing Gao 	return 0;
24343b6411c2SPing Gao }
24353b6411c2SPing Gao 
create_scratch_page_tree(struct intel_vgpu * vgpu)24363b6411c2SPing Gao static int create_scratch_page_tree(struct intel_vgpu *vgpu)
24373b6411c2SPing Gao {
24383b6411c2SPing Gao 	int i, ret;
24393b6411c2SPing Gao 
24403b6411c2SPing Gao 	for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) {
24413b6411c2SPing Gao 		ret = alloc_scratch_pages(vgpu, i);
24423b6411c2SPing Gao 		if (ret)
24433b6411c2SPing Gao 			goto err;
24443b6411c2SPing Gao 	}
24453b6411c2SPing Gao 
24463b6411c2SPing Gao 	return 0;
24473b6411c2SPing Gao 
24483b6411c2SPing Gao err:
24493b6411c2SPing Gao 	release_scratch_page_tree(vgpu);
24503b6411c2SPing Gao 	return ret;
24513b6411c2SPing Gao }
24523b6411c2SPing Gao 
24532707e444SZhi Wang /**
24542707e444SZhi Wang  * intel_vgpu_init_gtt - initialize per-vGPU graphics memory virulization
24552707e444SZhi Wang  * @vgpu: a vGPU
24562707e444SZhi Wang  *
24572707e444SZhi Wang  * This function is used to initialize per-vGPU graphics memory virtualization
24582707e444SZhi Wang  * components.
24592707e444SZhi Wang  *
24602707e444SZhi Wang  * Returns:
24612707e444SZhi Wang  * Zero on success, error code if failed.
24622707e444SZhi Wang  */
intel_vgpu_init_gtt(struct intel_vgpu * vgpu)24632707e444SZhi Wang int intel_vgpu_init_gtt(struct intel_vgpu *vgpu)
24642707e444SZhi Wang {
24652707e444SZhi Wang 	struct intel_vgpu_gtt *gtt = &vgpu->gtt;
24662707e444SZhi Wang 
2467b6c126a3SChangbin Du 	INIT_RADIX_TREE(&gtt->spt_tree, GFP_KERNEL);
24682707e444SZhi Wang 
2469ede9d0cfSChangbin Du 	INIT_LIST_HEAD(&gtt->ppgtt_mm_list_head);
24702707e444SZhi Wang 	INIT_LIST_HEAD(&gtt->oos_page_list_head);
24712707e444SZhi Wang 	INIT_LIST_HEAD(&gtt->post_shadow_list_head);
24722707e444SZhi Wang 
2473ede9d0cfSChangbin Du 	gtt->ggtt_mm = intel_vgpu_create_ggtt_mm(vgpu);
2474ede9d0cfSChangbin Du 	if (IS_ERR(gtt->ggtt_mm)) {
2475695fbc08STina Zhang 		gvt_vgpu_err("fail to create mm for ggtt.\n");
2476ede9d0cfSChangbin Du 		return PTR_ERR(gtt->ggtt_mm);
24772707e444SZhi Wang 	}
24782707e444SZhi Wang 
2479f4c43db3SChangbin Du 	intel_vgpu_reset_ggtt(vgpu, false);
24802707e444SZhi Wang 
2481bc0686ffSHang Yuan 	INIT_LIST_HEAD(&gtt->ggtt_mm->ggtt_mm.partial_pte_list);
2482bc0686ffSHang Yuan 
24833b6411c2SPing Gao 	return create_scratch_page_tree(vgpu);
24842707e444SZhi Wang }
24852707e444SZhi Wang 
intel_vgpu_destroy_all_ppgtt_mm(struct intel_vgpu * vgpu)2486ba25d977SColin Xu void intel_vgpu_destroy_all_ppgtt_mm(struct intel_vgpu *vgpu)
2487da9cc8deSPing Gao {
2488da9cc8deSPing Gao 	struct list_head *pos, *n;
2489da9cc8deSPing Gao 	struct intel_vgpu_mm *mm;
2490da9cc8deSPing Gao 
2491ede9d0cfSChangbin Du 	list_for_each_safe(pos, n, &vgpu->gtt.ppgtt_mm_list_head) {
2492ede9d0cfSChangbin Du 		mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list);
24931bc25851SChangbin Du 		intel_vgpu_destroy_mm(mm);
2494ede9d0cfSChangbin Du 	}
2495ede9d0cfSChangbin Du 
2496ede9d0cfSChangbin Du 	if (GEM_WARN_ON(!list_empty(&vgpu->gtt.ppgtt_mm_list_head)))
249784f69ba0SColin Ian King 		gvt_err("vgpu ppgtt mm is not fully destroyed\n");
2498ede9d0cfSChangbin Du 
2499b6c126a3SChangbin Du 	if (GEM_WARN_ON(!radix_tree_empty(&vgpu->gtt.spt_tree))) {
2500ede9d0cfSChangbin Du 		gvt_err("Why we still has spt not freed?\n");
2501d87f5ff3SChangbin Du 		ppgtt_free_all_spt(vgpu);
2502da9cc8deSPing Gao 	}
2503da9cc8deSPing Gao }
2504ede9d0cfSChangbin Du 
intel_vgpu_destroy_ggtt_mm(struct intel_vgpu * vgpu)2505ede9d0cfSChangbin Du static void intel_vgpu_destroy_ggtt_mm(struct intel_vgpu *vgpu)
2506ede9d0cfSChangbin Du {
25077513edbcSChris Wilson 	struct intel_gvt_partial_pte *pos, *next;
2508bc0686ffSHang Yuan 
25097513edbcSChris Wilson 	list_for_each_entry_safe(pos, next,
25107513edbcSChris Wilson 				 &vgpu->gtt.ggtt_mm->ggtt_mm.partial_pte_list,
25117513edbcSChris Wilson 				 list) {
2512bc0686ffSHang Yuan 		gvt_dbg_mm("partial PTE update on hold 0x%lx : 0x%llx\n",
2513bc0686ffSHang Yuan 			pos->offset, pos->data);
2514bc0686ffSHang Yuan 		kfree(pos);
2515bc0686ffSHang Yuan 	}
25161bc25851SChangbin Du 	intel_vgpu_destroy_mm(vgpu->gtt.ggtt_mm);
2517ede9d0cfSChangbin Du 	vgpu->gtt.ggtt_mm = NULL;
2518da9cc8deSPing Gao }
2519da9cc8deSPing Gao 
25202707e444SZhi Wang /**
25212707e444SZhi Wang  * intel_vgpu_clean_gtt - clean up per-vGPU graphics memory virulization
25222707e444SZhi Wang  * @vgpu: a vGPU
25232707e444SZhi Wang  *
25242707e444SZhi Wang  * This function is used to clean up per-vGPU graphics memory virtualization
25252707e444SZhi Wang  * components.
25262707e444SZhi Wang  *
25272707e444SZhi Wang  * Returns:
25282707e444SZhi Wang  * Zero on success, error code if failed.
25292707e444SZhi Wang  */
intel_vgpu_clean_gtt(struct intel_vgpu * vgpu)25302707e444SZhi Wang void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu)
25312707e444SZhi Wang {
2532ede9d0cfSChangbin Du 	intel_vgpu_destroy_all_ppgtt_mm(vgpu);
2533ede9d0cfSChangbin Du 	intel_vgpu_destroy_ggtt_mm(vgpu);
25343b6411c2SPing Gao 	release_scratch_page_tree(vgpu);
25352707e444SZhi Wang }
25362707e444SZhi Wang 
clean_spt_oos(struct intel_gvt * gvt)25372707e444SZhi Wang static void clean_spt_oos(struct intel_gvt *gvt)
25382707e444SZhi Wang {
25392707e444SZhi Wang 	struct intel_gvt_gtt *gtt = &gvt->gtt;
25402707e444SZhi Wang 	struct list_head *pos, *n;
25412707e444SZhi Wang 	struct intel_vgpu_oos_page *oos_page;
25422707e444SZhi Wang 
25432707e444SZhi Wang 	WARN(!list_empty(&gtt->oos_page_use_list_head),
25442707e444SZhi Wang 		"someone is still using oos page\n");
25452707e444SZhi Wang 
25462707e444SZhi Wang 	list_for_each_safe(pos, n, &gtt->oos_page_free_list_head) {
25472707e444SZhi Wang 		oos_page = container_of(pos, struct intel_vgpu_oos_page, list);
25482707e444SZhi Wang 		list_del(&oos_page->list);
2549ed47c5cbSZhao Yakui 		free_page((unsigned long)oos_page->mem);
25502707e444SZhi Wang 		kfree(oos_page);
25512707e444SZhi Wang 	}
25522707e444SZhi Wang }
25532707e444SZhi Wang 
setup_spt_oos(struct intel_gvt * gvt)25542707e444SZhi Wang static int setup_spt_oos(struct intel_gvt *gvt)
25552707e444SZhi Wang {
25562707e444SZhi Wang 	struct intel_gvt_gtt *gtt = &gvt->gtt;
25572707e444SZhi Wang 	struct intel_vgpu_oos_page *oos_page;
25582707e444SZhi Wang 	int i;
25592707e444SZhi Wang 	int ret;
25602707e444SZhi Wang 
25612707e444SZhi Wang 	INIT_LIST_HEAD(&gtt->oos_page_free_list_head);
25622707e444SZhi Wang 	INIT_LIST_HEAD(&gtt->oos_page_use_list_head);
25632707e444SZhi Wang 
25642707e444SZhi Wang 	for (i = 0; i < preallocated_oos_pages; i++) {
25652707e444SZhi Wang 		oos_page = kzalloc(sizeof(*oos_page), GFP_KERNEL);
25662707e444SZhi Wang 		if (!oos_page) {
25672707e444SZhi Wang 			ret = -ENOMEM;
25682707e444SZhi Wang 			goto fail;
25692707e444SZhi Wang 		}
2570ed47c5cbSZhao Yakui 		oos_page->mem = (void *)__get_free_pages(GFP_KERNEL, 0);
2571ed47c5cbSZhao Yakui 		if (!oos_page->mem) {
2572ed47c5cbSZhao Yakui 			ret = -ENOMEM;
2573ed47c5cbSZhao Yakui 			kfree(oos_page);
2574ed47c5cbSZhao Yakui 			goto fail;
2575ed47c5cbSZhao Yakui 		}
25762707e444SZhi Wang 
25772707e444SZhi Wang 		INIT_LIST_HEAD(&oos_page->list);
25782707e444SZhi Wang 		INIT_LIST_HEAD(&oos_page->vm_list);
25792707e444SZhi Wang 		oos_page->id = i;
25802707e444SZhi Wang 		list_add_tail(&oos_page->list, &gtt->oos_page_free_list_head);
25812707e444SZhi Wang 	}
25822707e444SZhi Wang 
25832707e444SZhi Wang 	gvt_dbg_mm("%d oos pages preallocated\n", i);
25842707e444SZhi Wang 
25852707e444SZhi Wang 	return 0;
25862707e444SZhi Wang fail:
25872707e444SZhi Wang 	clean_spt_oos(gvt);
25882707e444SZhi Wang 	return ret;
25892707e444SZhi Wang }
25902707e444SZhi Wang 
25912707e444SZhi Wang /**
25922707e444SZhi Wang  * intel_vgpu_find_ppgtt_mm - find a PPGTT mm object
25932707e444SZhi Wang  * @vgpu: a vGPU
2594a752b070SZhenyu Wang  * @pdps: pdp root array
25952707e444SZhi Wang  *
25962707e444SZhi Wang  * This function is used to find a PPGTT mm object from mm object pool
25972707e444SZhi Wang  *
25982707e444SZhi Wang  * Returns:
25992707e444SZhi Wang  * pointer to mm object on success, NULL if failed.
26002707e444SZhi Wang  */
intel_vgpu_find_ppgtt_mm(struct intel_vgpu * vgpu,u64 pdps[])26012707e444SZhi Wang struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu,
2602ede9d0cfSChangbin Du 		u64 pdps[])
26032707e444SZhi Wang {
26042707e444SZhi Wang 	struct intel_vgpu_mm *mm;
2605ede9d0cfSChangbin Du 	struct list_head *pos;
26062707e444SZhi Wang 
2607ede9d0cfSChangbin Du 	list_for_each(pos, &vgpu->gtt.ppgtt_mm_list_head) {
2608ede9d0cfSChangbin Du 		mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list);
26092707e444SZhi Wang 
2610ede9d0cfSChangbin Du 		switch (mm->ppgtt_mm.root_entry_type) {
2611ede9d0cfSChangbin Du 		case GTT_TYPE_PPGTT_ROOT_L4_ENTRY:
2612ede9d0cfSChangbin Du 			if (pdps[0] == mm->ppgtt_mm.guest_pdps[0])
26132707e444SZhi Wang 				return mm;
2614ede9d0cfSChangbin Du 			break;
2615ede9d0cfSChangbin Du 		case GTT_TYPE_PPGTT_ROOT_L3_ENTRY:
2616ede9d0cfSChangbin Du 			if (!memcmp(pdps, mm->ppgtt_mm.guest_pdps,
2617ede9d0cfSChangbin Du 				    sizeof(mm->ppgtt_mm.guest_pdps)))
26182707e444SZhi Wang 				return mm;
2619ede9d0cfSChangbin Du 			break;
2620ede9d0cfSChangbin Du 		default:
2621ede9d0cfSChangbin Du 			GEM_BUG_ON(1);
26222707e444SZhi Wang 		}
26232707e444SZhi Wang 	}
26242707e444SZhi Wang 	return NULL;
26252707e444SZhi Wang }
26262707e444SZhi Wang 
26272707e444SZhi Wang /**
2628e6e9c46fSChangbin Du  * intel_vgpu_get_ppgtt_mm - get or create a PPGTT mm object.
26292707e444SZhi Wang  * @vgpu: a vGPU
2630ede9d0cfSChangbin Du  * @root_entry_type: ppgtt root entry type
2631ede9d0cfSChangbin Du  * @pdps: guest pdps
26322707e444SZhi Wang  *
2633e6e9c46fSChangbin Du  * This function is used to find or create a PPGTT mm object from a guest.
26342707e444SZhi Wang  *
26352707e444SZhi Wang  * Returns:
26362707e444SZhi Wang  * Zero on success, negative error code if failed.
26372707e444SZhi Wang  */
intel_vgpu_get_ppgtt_mm(struct intel_vgpu * vgpu,enum intel_gvt_gtt_type root_entry_type,u64 pdps[])2638e6e9c46fSChangbin Du struct intel_vgpu_mm *intel_vgpu_get_ppgtt_mm(struct intel_vgpu *vgpu,
26390cf8f58dSAleksei Gimbitskii 		enum intel_gvt_gtt_type root_entry_type, u64 pdps[])
26402707e444SZhi Wang {
26412707e444SZhi Wang 	struct intel_vgpu_mm *mm;
26422707e444SZhi Wang 
2643ede9d0cfSChangbin Du 	mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps);
26442707e444SZhi Wang 	if (mm) {
26451bc25851SChangbin Du 		intel_vgpu_mm_get(mm);
26462707e444SZhi Wang 	} else {
2647ede9d0cfSChangbin Du 		mm = intel_vgpu_create_ppgtt_mm(vgpu, root_entry_type, pdps);
2648e6e9c46fSChangbin Du 		if (IS_ERR(mm))
2649695fbc08STina Zhang 			gvt_vgpu_err("fail to create mm\n");
26502707e444SZhi Wang 	}
2651e6e9c46fSChangbin Du 	return mm;
26522707e444SZhi Wang }
26532707e444SZhi Wang 
26542707e444SZhi Wang /**
2655e6e9c46fSChangbin Du  * intel_vgpu_put_ppgtt_mm - find and put a PPGTT mm object.
26562707e444SZhi Wang  * @vgpu: a vGPU
2657ede9d0cfSChangbin Du  * @pdps: guest pdps
26582707e444SZhi Wang  *
2659e6e9c46fSChangbin Du  * This function is used to find a PPGTT mm object from a guest and destroy it.
26602707e444SZhi Wang  *
26612707e444SZhi Wang  * Returns:
26622707e444SZhi Wang  * Zero on success, negative error code if failed.
26632707e444SZhi Wang  */
intel_vgpu_put_ppgtt_mm(struct intel_vgpu * vgpu,u64 pdps[])2664e6e9c46fSChangbin Du int intel_vgpu_put_ppgtt_mm(struct intel_vgpu *vgpu, u64 pdps[])
26652707e444SZhi Wang {
26662707e444SZhi Wang 	struct intel_vgpu_mm *mm;
26672707e444SZhi Wang 
2668ede9d0cfSChangbin Du 	mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps);
26692707e444SZhi Wang 	if (!mm) {
2670695fbc08STina Zhang 		gvt_vgpu_err("fail to find ppgtt instance.\n");
26712707e444SZhi Wang 		return -EINVAL;
26722707e444SZhi Wang 	}
26731bc25851SChangbin Du 	intel_vgpu_mm_put(mm);
26742707e444SZhi Wang 	return 0;
26752707e444SZhi Wang }
26762707e444SZhi Wang 
26772707e444SZhi Wang /**
26782707e444SZhi Wang  * intel_gvt_init_gtt - initialize mm components of a GVT device
26792707e444SZhi Wang  * @gvt: GVT device
26802707e444SZhi Wang  *
26812707e444SZhi Wang  * This function is called at the initialization stage, to initialize
26822707e444SZhi Wang  * the mm components of a GVT device.
26832707e444SZhi Wang  *
26842707e444SZhi Wang  * Returns:
26852707e444SZhi Wang  * zero on success, negative error code if failed.
26862707e444SZhi Wang  */
intel_gvt_init_gtt(struct intel_gvt * gvt)26872707e444SZhi Wang int intel_gvt_init_gtt(struct intel_gvt *gvt)
26882707e444SZhi Wang {
26892707e444SZhi Wang 	int ret;
26909631739fSJike Song 	void *page;
26919ff06c38SThomas Zimmermann 	struct device *dev = gvt->gt->i915->drm.dev;
26925de6bd4cSChuanxiao Dong 	dma_addr_t daddr;
26932707e444SZhi Wang 
26942707e444SZhi Wang 	gvt_dbg_core("init gtt\n");
26952707e444SZhi Wang 
26962707e444SZhi Wang 	gvt->gtt.pte_ops = &gen8_gtt_pte_ops;
26972707e444SZhi Wang 	gvt->gtt.gma_ops = &gen8_gtt_gma_ops;
26982707e444SZhi Wang 
26999631739fSJike Song 	page = (void *)get_zeroed_page(GFP_KERNEL);
27009631739fSJike Song 	if (!page) {
2701d650ac06SPing Gao 		gvt_err("fail to allocate scratch ggtt page\n");
2702d650ac06SPing Gao 		return -ENOMEM;
2703d650ac06SPing Gao 	}
2704d650ac06SPing Gao 
27055de6bd4cSChuanxiao Dong 	daddr = dma_map_page(dev, virt_to_page(page), 0,
2706c4f61203SCai Huoqing 			4096, DMA_BIDIRECTIONAL);
27075de6bd4cSChuanxiao Dong 	if (dma_mapping_error(dev, daddr)) {
27085de6bd4cSChuanxiao Dong 		gvt_err("fail to dmamap scratch ggtt page\n");
27095de6bd4cSChuanxiao Dong 		__free_page(virt_to_page(page));
27105de6bd4cSChuanxiao Dong 		return -ENOMEM;
2711d650ac06SPing Gao 	}
271222115cefSZhi Wang 
271322115cefSZhi Wang 	gvt->gtt.scratch_page = virt_to_page(page);
271422115cefSZhi Wang 	gvt->gtt.scratch_mfn = (unsigned long)(daddr >> I915_GTT_PAGE_SHIFT);
2715d650ac06SPing Gao 
27162707e444SZhi Wang 	if (enable_out_of_sync) {
27172707e444SZhi Wang 		ret = setup_spt_oos(gvt);
27182707e444SZhi Wang 		if (ret) {
27192707e444SZhi Wang 			gvt_err("fail to initialize SPT oos\n");
2720c4f61203SCai Huoqing 			dma_unmap_page(dev, daddr, 4096, DMA_BIDIRECTIONAL);
272122115cefSZhi Wang 			__free_page(gvt->gtt.scratch_page);
27222707e444SZhi Wang 			return ret;
27232707e444SZhi Wang 		}
27242707e444SZhi Wang 	}
2725ede9d0cfSChangbin Du 	INIT_LIST_HEAD(&gvt->gtt.ppgtt_mm_lru_list_head);
272672aabfb8SZhenyu Wang 	mutex_init(&gvt->gtt.ppgtt_mm_lock);
27272707e444SZhi Wang 	return 0;
27282707e444SZhi Wang }
27292707e444SZhi Wang 
27302707e444SZhi Wang /**
27312707e444SZhi Wang  * intel_gvt_clean_gtt - clean up mm components of a GVT device
27322707e444SZhi Wang  * @gvt: GVT device
27332707e444SZhi Wang  *
2734a105fa52Swangjianli  * This function is called at the driver unloading stage, to clean up
27352707e444SZhi Wang  * the mm components of a GVT device.
27362707e444SZhi Wang  *
27372707e444SZhi Wang  */
intel_gvt_clean_gtt(struct intel_gvt * gvt)27382707e444SZhi Wang void intel_gvt_clean_gtt(struct intel_gvt *gvt)
27392707e444SZhi Wang {
27409ff06c38SThomas Zimmermann 	struct device *dev = gvt->gt->i915->drm.dev;
274122115cefSZhi Wang 	dma_addr_t daddr = (dma_addr_t)(gvt->gtt.scratch_mfn <<
27429556e118SZhi Wang 					I915_GTT_PAGE_SHIFT);
27435de6bd4cSChuanxiao Dong 
2744c4f61203SCai Huoqing 	dma_unmap_page(dev, daddr, 4096, DMA_BIDIRECTIONAL);
27455de6bd4cSChuanxiao Dong 
274622115cefSZhi Wang 	__free_page(gvt->gtt.scratch_page);
2747d650ac06SPing Gao 
27482707e444SZhi Wang 	if (enable_out_of_sync)
27492707e444SZhi Wang 		clean_spt_oos(gvt);
27502707e444SZhi Wang }
2751d650ac06SPing Gao 
2752d650ac06SPing Gao /**
2753730c8eadSZhi Wang  * intel_vgpu_invalidate_ppgtt - invalidate PPGTT instances
2754730c8eadSZhi Wang  * @vgpu: a vGPU
2755730c8eadSZhi Wang  *
2756730c8eadSZhi Wang  * This function is called when invalidate all PPGTT instances of a vGPU.
2757730c8eadSZhi Wang  *
2758730c8eadSZhi Wang  */
intel_vgpu_invalidate_ppgtt(struct intel_vgpu * vgpu)2759730c8eadSZhi Wang void intel_vgpu_invalidate_ppgtt(struct intel_vgpu *vgpu)
2760730c8eadSZhi Wang {
2761730c8eadSZhi Wang 	struct list_head *pos, *n;
2762730c8eadSZhi Wang 	struct intel_vgpu_mm *mm;
2763730c8eadSZhi Wang 
2764730c8eadSZhi Wang 	list_for_each_safe(pos, n, &vgpu->gtt.ppgtt_mm_list_head) {
2765730c8eadSZhi Wang 		mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list);
2766730c8eadSZhi Wang 		if (mm->type == INTEL_GVT_MM_PPGTT) {
276772aabfb8SZhenyu Wang 			mutex_lock(&vgpu->gvt->gtt.ppgtt_mm_lock);
2768730c8eadSZhi Wang 			list_del_init(&mm->ppgtt_mm.lru_list);
276972aabfb8SZhenyu Wang 			mutex_unlock(&vgpu->gvt->gtt.ppgtt_mm_lock);
2770730c8eadSZhi Wang 			if (mm->ppgtt_mm.shadowed)
2771730c8eadSZhi Wang 				invalidate_ppgtt_mm(mm);
2772730c8eadSZhi Wang 		}
2773730c8eadSZhi Wang 	}
2774730c8eadSZhi Wang }
2775730c8eadSZhi Wang 
2776730c8eadSZhi Wang /**
2777d650ac06SPing Gao  * intel_vgpu_reset_ggtt - reset the GGTT entry
2778d650ac06SPing Gao  * @vgpu: a vGPU
2779f4c43db3SChangbin Du  * @invalidate_old: invalidate old entries
2780d650ac06SPing Gao  *
2781d650ac06SPing Gao  * This function is called at the vGPU create stage
2782d650ac06SPing Gao  * to reset all the GGTT entries.
2783d650ac06SPing Gao  *
2784d650ac06SPing Gao  */
intel_vgpu_reset_ggtt(struct intel_vgpu * vgpu,bool invalidate_old)2785f4c43db3SChangbin Du void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu, bool invalidate_old)
2786d650ac06SPing Gao {
2787d650ac06SPing Gao 	struct intel_gvt *gvt = vgpu->gvt;
27885512445cSRikard Falkeborn 	const struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops;
2789b0c766bfSChangbin Du 	struct intel_gvt_gtt_entry entry = {.type = GTT_TYPE_GGTT_PTE};
2790f4c43db3SChangbin Du 	struct intel_gvt_gtt_entry old_entry;
2791d650ac06SPing Gao 	u32 index;
2792d650ac06SPing Gao 	u32 num_entries;
2793d650ac06SPing Gao 
2794b0c766bfSChangbin Du 	pte_ops->set_pfn(&entry, gvt->gtt.scratch_mfn);
2795b0c766bfSChangbin Du 	pte_ops->set_present(&entry);
2796d650ac06SPing Gao 
2797d650ac06SPing Gao 	index = vgpu_aperture_gmadr_base(vgpu) >> PAGE_SHIFT;
2798d650ac06SPing Gao 	num_entries = vgpu_aperture_sz(vgpu) >> PAGE_SHIFT;
2799f4c43db3SChangbin Du 	while (num_entries--) {
2800f4c43db3SChangbin Du 		if (invalidate_old) {
2801f4c43db3SChangbin Du 			ggtt_get_host_entry(vgpu->gtt.ggtt_mm, &old_entry, index);
2802f4c43db3SChangbin Du 			ggtt_invalidate_pte(vgpu, &old_entry);
2803f4c43db3SChangbin Du 		}
2804b0c766bfSChangbin Du 		ggtt_set_host_entry(vgpu->gtt.ggtt_mm, &entry, index++);
2805f4c43db3SChangbin Du 	}
2806d650ac06SPing Gao 
2807d650ac06SPing Gao 	index = vgpu_hidden_gmadr_base(vgpu) >> PAGE_SHIFT;
2808d650ac06SPing Gao 	num_entries = vgpu_hidden_sz(vgpu) >> PAGE_SHIFT;
2809f4c43db3SChangbin Du 	while (num_entries--) {
2810f4c43db3SChangbin Du 		if (invalidate_old) {
2811f4c43db3SChangbin Du 			ggtt_get_host_entry(vgpu->gtt.ggtt_mm, &old_entry, index);
2812f4c43db3SChangbin Du 			ggtt_invalidate_pte(vgpu, &old_entry);
2813f4c43db3SChangbin Du 		}
2814b0c766bfSChangbin Du 		ggtt_set_host_entry(vgpu->gtt.ggtt_mm, &entry, index++);
2815f4c43db3SChangbin Du 	}
28165ad59bf0SZhenyu Wang 
2817a61ac1e7SChris Wilson 	ggtt_invalidate(gvt->gt);
2818d650ac06SPing Gao }
2819b611581bSChangbin Du 
2820b611581bSChangbin Du /**
28215f60b12eSColin Xu  * intel_gvt_restore_ggtt - restore all vGPU's ggtt entries
28225f60b12eSColin Xu  * @gvt: intel gvt device
28235f60b12eSColin Xu  *
28245f60b12eSColin Xu  * This function is called at driver resume stage to restore
28255f60b12eSColin Xu  * GGTT entries of every vGPU.
28265f60b12eSColin Xu  *
28275f60b12eSColin Xu  */
intel_gvt_restore_ggtt(struct intel_gvt * gvt)28285f60b12eSColin Xu void intel_gvt_restore_ggtt(struct intel_gvt *gvt)
28295f60b12eSColin Xu {
28305f60b12eSColin Xu 	struct intel_vgpu *vgpu;
28315f60b12eSColin Xu 	struct intel_vgpu_mm *mm;
28325f60b12eSColin Xu 	int id;
28335f60b12eSColin Xu 	gen8_pte_t pte;
28345f60b12eSColin Xu 	u32 idx, num_low, num_hi, offset;
28355f60b12eSColin Xu 
28365f60b12eSColin Xu 	/* Restore dirty host ggtt for all vGPUs */
28375f60b12eSColin Xu 	idr_for_each_entry(&(gvt)->vgpu_idr, vgpu, id) {
28385f60b12eSColin Xu 		mm = vgpu->gtt.ggtt_mm;
28395f60b12eSColin Xu 
28405f60b12eSColin Xu 		num_low = vgpu_aperture_sz(vgpu) >> PAGE_SHIFT;
28415f60b12eSColin Xu 		offset = vgpu_aperture_gmadr_base(vgpu) >> PAGE_SHIFT;
28425f60b12eSColin Xu 		for (idx = 0; idx < num_low; idx++) {
28435f60b12eSColin Xu 			pte = mm->ggtt_mm.host_ggtt_aperture[idx];
28445f978167SMichael Cheng 			if (pte & GEN8_PAGE_PRESENT)
28455f60b12eSColin Xu 				write_pte64(vgpu->gvt->gt->ggtt, offset + idx, pte);
28465f60b12eSColin Xu 		}
28475f60b12eSColin Xu 
28485f60b12eSColin Xu 		num_hi = vgpu_hidden_sz(vgpu) >> PAGE_SHIFT;
28495f60b12eSColin Xu 		offset = vgpu_hidden_gmadr_base(vgpu) >> PAGE_SHIFT;
28505f60b12eSColin Xu 		for (idx = 0; idx < num_hi; idx++) {
28515f60b12eSColin Xu 			pte = mm->ggtt_mm.host_ggtt_hidden[idx];
28525f978167SMichael Cheng 			if (pte & GEN8_PAGE_PRESENT)
28535f60b12eSColin Xu 				write_pte64(vgpu->gvt->gt->ggtt, offset + idx, pte);
28545f60b12eSColin Xu 		}
28555f60b12eSColin Xu 	}
28565f60b12eSColin Xu }
2857