1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Copyright 2020-2022 HabanaLabs, Ltd. 5 * All Rights Reserved. 6 */ 7 8 #include "../habanalabs.h" 9 #include "../../include/hw_ip/mmu/mmu_general.h" 10 11 #include <linux/slab.h> 12 13 static struct pgt_info *hl_mmu_v2_hr_get_pgt_info(struct hl_ctx *ctx, u64 phys_hop_addr) 14 { 15 struct pgt_info *pgt_info = NULL; 16 17 hash_for_each_possible(ctx->hr_mmu_phys_hash, pgt_info, node, 18 (unsigned long) phys_hop_addr) 19 if (phys_hop_addr == pgt_info->phys_addr) 20 break; 21 22 return pgt_info; 23 } 24 25 static void hl_mmu_v2_hr_add_pgt_info(struct hl_ctx *ctx, struct pgt_info *pgt_info, 26 dma_addr_t phys_addr) 27 { 28 hash_add(ctx->hr_mmu_phys_hash, &pgt_info->node, phys_addr); 29 } 30 31 static struct pgt_info *hl_mmu_v2_hr_get_hop0_pgt_info(struct hl_ctx *ctx) 32 { 33 return &ctx->hdev->mmu_priv.hr.mmu_asid_hop0[ctx->asid]; 34 } 35 36 /** 37 * hl_mmu_v2_hr_init() - initialize the MMU module. 38 * @hdev: habanalabs device structure. 39 * 40 * This function does the following: 41 * - Create a pool of pages for pgt_infos. 42 * - Create a shadow table for pgt 43 * 44 * Return: 0 for success, non-zero for failure. 45 */ 46 static inline int hl_mmu_v2_hr_init(struct hl_device *hdev) 47 { 48 struct asic_fixed_properties *prop = &hdev->asic_prop; 49 50 return hl_mmu_hr_init(hdev, &hdev->mmu_priv.hr, prop->mmu_hop_table_size, 51 prop->mmu_pgt_size); 52 } 53 54 /** 55 * hl_mmu_v2_hr_fini() - release the MMU module. 56 * @hdev: habanalabs device structure. 57 * 58 * This function does the following: 59 * - Disable MMU in H/W. 60 * - Free the pgt_infos pool. 61 * 62 * All contexts should be freed before calling this function. 63 */ 64 static inline void hl_mmu_v2_hr_fini(struct hl_device *hdev) 65 { 66 struct asic_fixed_properties *prop = &hdev->asic_prop; 67 68 hl_mmu_hr_fini(hdev, &hdev->mmu_priv.hr, prop->mmu_hop_table_size); 69 } 70 71 /** 72 * hl_mmu_v2_hr_ctx_init() - initialize a context for using the MMU module. 73 * @ctx: pointer to the context structure to initialize. 74 * 75 * Initialize a mutex to protect the concurrent mapping flow, a hash to hold all 76 * page tables hops related to this context. 77 * Return: 0 on success, non-zero otherwise. 78 */ 79 static int hl_mmu_v2_hr_ctx_init(struct hl_ctx *ctx) 80 { 81 hash_init(ctx->hr_mmu_phys_hash); 82 return 0; 83 } 84 85 /* 86 * hl_mmu_v2_hr_ctx_fini - disable a ctx from using the mmu module 87 * 88 * @ctx: pointer to the context structure 89 * 90 * This function does the following: 91 * - Free any pgts which were not freed yet 92 * - Free the mutex 93 * - Free DRAM default page mapping hops 94 */ 95 static void hl_mmu_v2_hr_ctx_fini(struct hl_ctx *ctx) 96 { 97 struct hl_device *hdev = ctx->hdev; 98 struct pgt_info *pgt_info; 99 struct hlist_node *tmp; 100 int i; 101 102 if (!hash_empty(ctx->hr_mmu_phys_hash)) 103 dev_err(hdev->dev, "ctx %d is freed while it has pgts in use\n", 104 ctx->asid); 105 106 hash_for_each_safe(ctx->hr_mmu_phys_hash, i, tmp, pgt_info, node) { 107 dev_err_ratelimited(hdev->dev, 108 "pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n", 109 pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes); 110 hl_mmu_hr_free_hop_remove_pgt(pgt_info, &ctx->hdev->mmu_priv.hr, 111 ctx->hdev->asic_prop.mmu_hop_table_size); 112 } 113 } 114 115 static int _hl_mmu_v2_hr_unmap(struct hl_ctx *ctx, 116 u64 virt_addr, bool is_dram_addr) 117 { 118 u64 curr_pte, scrambled_virt_addr, hop_pte_phys_addr[MMU_ARCH_6_HOPS] = { 0 }; 119 struct pgt_info *hops_pgt_info[MMU_ARCH_6_HOPS] = { NULL }; 120 struct hl_device *hdev = ctx->hdev; 121 struct asic_fixed_properties *prop; 122 struct hl_mmu_properties *mmu_prop; 123 bool is_huge = false; 124 int i, hop_last; 125 126 prop = &hdev->asic_prop; 127 128 /* shifts and masks are the same in PMMU and HMMU, use one of them */ 129 mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; 130 hop_last = mmu_prop->num_hops - 1; 131 132 scrambled_virt_addr = hdev->asic_funcs->scramble_addr(hdev, virt_addr); 133 curr_pte = 0; 134 135 for (i = 0 ; i < mmu_prop->num_hops ; i++) { 136 /* we get HOP0 differently, it doesn't need curr_pte */ 137 if (i == 0) 138 hops_pgt_info[i] = hl_mmu_v2_hr_get_hop0_pgt_info(ctx); 139 else 140 hops_pgt_info[i] = hl_mmu_hr_get_next_hop_pgt_info(ctx, 141 &ctx->hdev->mmu_func[MMU_HR_PGT].hr_funcs, curr_pte); 142 if (!hops_pgt_info[i]) 143 goto not_mapped; 144 145 hop_pte_phys_addr[i] = hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, i, 146 hops_pgt_info[i]->phys_addr, 147 scrambled_virt_addr); 148 if (hop_pte_phys_addr[i] == U64_MAX) 149 return -EFAULT; 150 151 curr_pte = *(u64 *) (uintptr_t) hl_mmu_hr_pte_phys_to_virt(ctx, hops_pgt_info[i], 152 hop_pte_phys_addr[i], 153 ctx->hdev->asic_prop.mmu_hop_table_size); 154 155 if ((i < hop_last) && (curr_pte & mmu_prop->last_mask)) { 156 hop_last = i; 157 is_huge = true; 158 break; 159 } 160 } 161 162 if (is_dram_addr && !is_huge) { 163 dev_err(hdev->dev, "DRAM unmapping should use huge pages only\n"); 164 return -EFAULT; 165 } 166 167 if (!(curr_pte & PAGE_PRESENT_MASK)) 168 goto not_mapped; 169 170 for (i = hop_last ; i > 0 ; i--) { 171 hl_mmu_hr_clear_pte(ctx, hops_pgt_info[i], hop_pte_phys_addr[i], 172 ctx->hdev->asic_prop.mmu_hop_table_size); 173 174 if (hl_mmu_hr_put_pte(ctx, hops_pgt_info[i], &ctx->hdev->mmu_priv.hr, 175 ctx->hdev->asic_prop.mmu_hop_table_size)) 176 goto mapped; 177 } 178 hl_mmu_hr_clear_pte(ctx, hops_pgt_info[0], hop_pte_phys_addr[0], 179 ctx->hdev->asic_prop.mmu_hop_table_size); 180 181 mapped: 182 return 0; 183 184 not_mapped: 185 dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n", virt_addr); 186 187 return -EINVAL; 188 } 189 190 static int hl_mmu_v2_get_last_hop(struct hl_mmu_properties *mmu_prop, u32 page_size) 191 { 192 int hop; 193 194 for (hop = (mmu_prop->num_hops - 1); hop; hop--) { 195 if (mmu_prop->hop_shifts[hop] == 0) 196 continue; 197 198 if (page_size <= (1 << mmu_prop->hop_shifts[hop])) 199 break; 200 } 201 202 return hop; 203 } 204 205 static int _hl_mmu_v2_hr_map(struct hl_ctx *ctx, 206 u64 virt_addr, u64 phys_addr, 207 u32 page_size, bool is_dram_addr) 208 { 209 u64 hop_pte_phys_addr[MMU_ARCH_6_HOPS] = { 0 }, 210 curr_pte = 0, scrambled_virt_addr, scrambled_phys_addr; 211 struct pgt_info *hops_pgt_info[MMU_ARCH_6_HOPS] = { NULL }; 212 bool hop_new[MMU_ARCH_6_HOPS] = { false }; 213 struct hl_device *hdev = ctx->hdev; 214 struct asic_fixed_properties *prop = &hdev->asic_prop; 215 struct hl_mmu_properties *mmu_prop; 216 int i, hop_last, rc = -ENOMEM; 217 218 /* 219 * This mapping function can map a page or a huge page. For huge page 220 * there are only 4 hops rather than 5. Currently the DRAM allocation 221 * uses huge pages only but user memory could have been allocated with 222 * one of the two page sizes. Since this is a common code for all the 223 * three cases, we need this hugs page check. 224 */ 225 if (is_dram_addr) 226 mmu_prop = &prop->dmmu; 227 else if (page_size == prop->pmmu_huge.page_size) 228 mmu_prop = &prop->pmmu_huge; 229 else 230 mmu_prop = &prop->pmmu; 231 232 hop_last = hl_mmu_v2_get_last_hop(mmu_prop, page_size); 233 if (hop_last <= 0) { 234 dev_err(ctx->hdev->dev, "Invalid last HOP %d\n", hop_last); 235 return -EFAULT; 236 } 237 238 scrambled_virt_addr = hdev->asic_funcs->scramble_addr(hdev, virt_addr); 239 scrambled_phys_addr = hdev->asic_funcs->scramble_addr(hdev, phys_addr); 240 241 for (i = 0 ; i <= hop_last ; i++) { 242 243 if (i == 0) 244 hops_pgt_info[i] = hl_mmu_v2_hr_get_hop0_pgt_info(ctx); 245 else 246 hops_pgt_info[i] = hl_mmu_hr_get_alloc_next_hop(ctx, 247 &ctx->hdev->mmu_priv.hr, 248 &ctx->hdev->mmu_func[MMU_HR_PGT].hr_funcs, 249 mmu_prop, curr_pte, &hop_new[i]); 250 if (!hops_pgt_info[i]) 251 goto err; 252 253 hop_pte_phys_addr[i] = hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, i, 254 hops_pgt_info[i]->phys_addr, 255 scrambled_virt_addr); 256 curr_pte = *(u64 *) (uintptr_t) hl_mmu_hr_pte_phys_to_virt(ctx, hops_pgt_info[i], 257 hop_pte_phys_addr[i], 258 ctx->hdev->asic_prop.mmu_hop_table_size); 259 } 260 261 if (curr_pte & PAGE_PRESENT_MASK) { 262 dev_err(hdev->dev, "mapping already exists for virt_addr 0x%llx\n", 263 scrambled_virt_addr); 264 265 for (i = 0 ; i <= hop_last ; i++) 266 dev_dbg(hdev->dev, "hop%d pte: 0x%llx (0x%llx)\n", 267 i, 268 *(u64 *) (uintptr_t) 269 hl_mmu_hr_pte_phys_to_virt(ctx, hops_pgt_info[i], 270 hop_pte_phys_addr[i], 271 ctx->hdev->asic_prop.mmu_hop_table_size), 272 hop_pte_phys_addr[i]); 273 rc = -EINVAL; 274 goto err; 275 } 276 277 curr_pte = (scrambled_phys_addr & HOP_PHYS_ADDR_MASK) | mmu_prop->last_mask 278 | PAGE_PRESENT_MASK; 279 280 /* Write the PTEs */ 281 hl_mmu_hr_write_pte(ctx, hops_pgt_info[hop_last], hop_pte_phys_addr[hop_last], curr_pte, 282 ctx->hdev->asic_prop.mmu_hop_table_size); 283 284 /* for each new hop, add its address to the table of previous-hop */ 285 for (i = 1 ; i <= hop_last ; i++) { 286 if (hop_new[i]) { 287 curr_pte = (hops_pgt_info[i]->phys_addr & HOP_PHYS_ADDR_MASK) | 288 PAGE_PRESENT_MASK; 289 hl_mmu_hr_write_pte(ctx, hops_pgt_info[i - 1], hop_pte_phys_addr[i - 1], 290 curr_pte, ctx->hdev->asic_prop.mmu_hop_table_size); 291 if (i - 1) 292 hl_mmu_hr_get_pte(ctx, &ctx->hdev->mmu_func[MMU_HR_PGT].hr_funcs, 293 hops_pgt_info[i - 1]->phys_addr); 294 } 295 } 296 297 hl_mmu_hr_get_pte(ctx, &ctx->hdev->mmu_func[MMU_HR_PGT].hr_funcs, 298 hops_pgt_info[hop_last]->phys_addr); 299 300 return 0; 301 302 err: 303 for (i = 1 ; i <= hop_last ; i++) 304 if (hop_new[i] && hops_pgt_info[i]) 305 hl_mmu_hr_free_hop_remove_pgt(hops_pgt_info[i], &ctx->hdev->mmu_priv.hr, 306 ctx->hdev->asic_prop.mmu_hop_table_size); 307 308 return rc; 309 } 310 311 /* 312 * hl_mmu_v2_swap_out - marks all mapping of the given ctx as swapped out 313 * 314 * @ctx: pointer to the context structure 315 * 316 */ 317 static void hl_mmu_v2_hr_swap_out(struct hl_ctx *ctx) 318 { 319 320 } 321 322 /* 323 * hl_mmu_v2_swap_in - marks all mapping of the given ctx as swapped in 324 * 325 * @ctx: pointer to the context structure 326 * 327 */ 328 static void hl_mmu_v2_hr_swap_in(struct hl_ctx *ctx) 329 { 330 331 } 332 333 static int hl_mmu_v2_hr_get_tlb_mapping_params(struct hl_device *hdev, 334 struct hl_mmu_properties **mmu_prop, 335 struct hl_mmu_hop_info *hops, 336 u64 virt_addr, bool *is_huge) 337 { 338 struct asic_fixed_properties *prop = &hdev->asic_prop; 339 bool is_dram_addr, is_pmmu_addr, is_pmmu_h_addr; 340 341 is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, 342 prop->dmmu.start_addr, 343 prop->dmmu.end_addr); 344 is_pmmu_addr = hl_mem_area_inside_range(virt_addr, prop->pmmu.page_size, 345 prop->pmmu.start_addr, 346 prop->pmmu.end_addr); 347 is_pmmu_h_addr = hl_mem_area_inside_range(virt_addr, 348 prop->pmmu_huge.page_size, 349 prop->pmmu_huge.start_addr, 350 prop->pmmu_huge.end_addr); 351 if (is_dram_addr) { 352 *mmu_prop = &prop->dmmu; 353 *is_huge = true; 354 hops->range_type = HL_VA_RANGE_TYPE_DRAM; 355 } else if (is_pmmu_addr) { 356 *mmu_prop = &prop->pmmu; 357 *is_huge = false; 358 hops->range_type = HL_VA_RANGE_TYPE_HOST; 359 } else if (is_pmmu_h_addr) { 360 *mmu_prop = &prop->pmmu_huge; 361 *is_huge = true; 362 hops->range_type = HL_VA_RANGE_TYPE_HOST_HUGE; 363 } else { 364 return -EINVAL; 365 } 366 367 return 0; 368 } 369 370 static int hl_mmu_v2_hr_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, 371 struct hl_mmu_hop_info *hops) 372 { 373 return hl_mmu_hr_get_tlb_info(ctx, virt_addr, hops, 374 &ctx->hdev->mmu_func[MMU_HR_PGT].hr_funcs); 375 } 376 377 /* 378 * hl_mmu_v2_prepare - prepare mmu_if for working with mmu v2 379 * 380 * @hdev: pointer to the device structure 381 * @mmu_if: pointer to the mmu interface structure 382 */ 383 void hl_mmu_v2_hr_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu) 384 { 385 mmu->init = hl_mmu_v2_hr_init; 386 mmu->fini = hl_mmu_v2_hr_fini; 387 mmu->ctx_init = hl_mmu_v2_hr_ctx_init; 388 mmu->ctx_fini = hl_mmu_v2_hr_ctx_fini; 389 mmu->map = _hl_mmu_v2_hr_map; 390 mmu->unmap = _hl_mmu_v2_hr_unmap; 391 mmu->flush = hl_mmu_hr_flush; 392 mmu->swap_out = hl_mmu_v2_hr_swap_out; 393 mmu->swap_in = hl_mmu_v2_hr_swap_in; 394 mmu->get_tlb_info = hl_mmu_v2_hr_get_tlb_info; 395 mmu->hr_funcs.get_hop0_pgt_info = hl_mmu_v2_hr_get_hop0_pgt_info; 396 mmu->hr_funcs.get_pgt_info = hl_mmu_v2_hr_get_pgt_info; 397 mmu->hr_funcs.add_pgt_info = hl_mmu_v2_hr_add_pgt_info; 398 mmu->hr_funcs.get_tlb_mapping_params = hl_mmu_v2_hr_get_tlb_mapping_params; 399 } 400