1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4 * Copyright 2020-2022 HabanaLabs, Ltd.
5 * All Rights Reserved.
6 */
7
8 #include "../habanalabs.h"
9 #include "../../include/hw_ip/mmu/mmu_general.h"
10
11 #include <linux/slab.h>
12
hl_mmu_v2_hr_get_pgt_info(struct hl_ctx * ctx,u64 phys_hop_addr)13 static struct pgt_info *hl_mmu_v2_hr_get_pgt_info(struct hl_ctx *ctx, u64 phys_hop_addr)
14 {
15 struct pgt_info *pgt_info = NULL;
16
17 hash_for_each_possible(ctx->hr_mmu_phys_hash, pgt_info, node,
18 (unsigned long) phys_hop_addr)
19 if (phys_hop_addr == pgt_info->phys_addr)
20 break;
21
22 return pgt_info;
23 }
24
hl_mmu_v2_hr_add_pgt_info(struct hl_ctx * ctx,struct pgt_info * pgt_info,dma_addr_t phys_addr)25 static void hl_mmu_v2_hr_add_pgt_info(struct hl_ctx *ctx, struct pgt_info *pgt_info,
26 dma_addr_t phys_addr)
27 {
28 hash_add(ctx->hr_mmu_phys_hash, &pgt_info->node, phys_addr);
29 }
30
hl_mmu_v2_hr_get_hop0_pgt_info(struct hl_ctx * ctx)31 static struct pgt_info *hl_mmu_v2_hr_get_hop0_pgt_info(struct hl_ctx *ctx)
32 {
33 return &ctx->hdev->mmu_priv.hr.mmu_asid_hop0[ctx->asid];
34 }
35
36 /**
37 * hl_mmu_v2_hr_init() - initialize the MMU module.
38 * @hdev: habanalabs device structure.
39 *
40 * This function does the following:
41 * - Create a pool of pages for pgt_infos.
42 * - Create a shadow table for pgt
43 *
44 * Return: 0 for success, non-zero for failure.
45 */
hl_mmu_v2_hr_init(struct hl_device * hdev)46 static inline int hl_mmu_v2_hr_init(struct hl_device *hdev)
47 {
48 struct asic_fixed_properties *prop = &hdev->asic_prop;
49
50 return hl_mmu_hr_init(hdev, &hdev->mmu_priv.hr, prop->mmu_hop_table_size,
51 prop->mmu_pgt_size);
52 }
53
54 /**
55 * hl_mmu_v2_hr_fini() - release the MMU module.
56 * @hdev: habanalabs device structure.
57 *
58 * This function does the following:
59 * - Disable MMU in H/W.
60 * - Free the pgt_infos pool.
61 *
62 * All contexts should be freed before calling this function.
63 */
hl_mmu_v2_hr_fini(struct hl_device * hdev)64 static inline void hl_mmu_v2_hr_fini(struct hl_device *hdev)
65 {
66 struct asic_fixed_properties *prop = &hdev->asic_prop;
67
68 hl_mmu_hr_fini(hdev, &hdev->mmu_priv.hr, prop->mmu_hop_table_size);
69 }
70
71 /**
72 * hl_mmu_v2_hr_ctx_init() - initialize a context for using the MMU module.
73 * @ctx: pointer to the context structure to initialize.
74 *
75 * Initialize a mutex to protect the concurrent mapping flow, a hash to hold all
76 * page tables hops related to this context.
77 * Return: 0 on success, non-zero otherwise.
78 */
hl_mmu_v2_hr_ctx_init(struct hl_ctx * ctx)79 static int hl_mmu_v2_hr_ctx_init(struct hl_ctx *ctx)
80 {
81 hash_init(ctx->hr_mmu_phys_hash);
82 return 0;
83 }
84
85 /*
86 * hl_mmu_v2_hr_ctx_fini - disable a ctx from using the mmu module
87 *
88 * @ctx: pointer to the context structure
89 *
90 * This function does the following:
91 * - Free any pgts which were not freed yet
92 * - Free the mutex
93 * - Free DRAM default page mapping hops
94 */
hl_mmu_v2_hr_ctx_fini(struct hl_ctx * ctx)95 static void hl_mmu_v2_hr_ctx_fini(struct hl_ctx *ctx)
96 {
97 struct hl_device *hdev = ctx->hdev;
98 struct pgt_info *pgt_info;
99 struct hlist_node *tmp;
100 int i;
101
102 if (!hash_empty(ctx->hr_mmu_phys_hash))
103 dev_err(hdev->dev, "ctx %d is freed while it has pgts in use\n",
104 ctx->asid);
105
106 hash_for_each_safe(ctx->hr_mmu_phys_hash, i, tmp, pgt_info, node) {
107 dev_err_ratelimited(hdev->dev,
108 "pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n",
109 pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes);
110 hl_mmu_hr_free_hop_remove_pgt(pgt_info, &ctx->hdev->mmu_priv.hr,
111 ctx->hdev->asic_prop.mmu_hop_table_size);
112 }
113 }
114
_hl_mmu_v2_hr_unmap(struct hl_ctx * ctx,u64 virt_addr,bool is_dram_addr)115 static int _hl_mmu_v2_hr_unmap(struct hl_ctx *ctx,
116 u64 virt_addr, bool is_dram_addr)
117 {
118 u64 curr_pte, scrambled_virt_addr, hop_pte_phys_addr[MMU_ARCH_6_HOPS] = { 0 };
119 struct pgt_info *hops_pgt_info[MMU_ARCH_6_HOPS] = { NULL };
120 struct hl_device *hdev = ctx->hdev;
121 struct asic_fixed_properties *prop;
122 struct hl_mmu_properties *mmu_prop;
123 bool is_huge = false;
124 int i, hop_last;
125
126 prop = &hdev->asic_prop;
127
128 /* shifts and masks are the same in PMMU and HMMU, use one of them */
129 mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
130 hop_last = mmu_prop->num_hops - 1;
131
132 scrambled_virt_addr = hdev->asic_funcs->scramble_addr(hdev, virt_addr);
133 curr_pte = 0;
134
135 for (i = 0 ; i < mmu_prop->num_hops ; i++) {
136 /* we get HOP0 differently, it doesn't need curr_pte */
137 if (i == 0)
138 hops_pgt_info[i] = hl_mmu_v2_hr_get_hop0_pgt_info(ctx);
139 else
140 hops_pgt_info[i] = hl_mmu_hr_get_next_hop_pgt_info(ctx,
141 &ctx->hdev->mmu_func[MMU_HR_PGT].hr_funcs, curr_pte);
142 if (!hops_pgt_info[i])
143 goto not_mapped;
144
145 hop_pte_phys_addr[i] = hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, i,
146 hops_pgt_info[i]->phys_addr,
147 scrambled_virt_addr);
148 if (hop_pte_phys_addr[i] == U64_MAX)
149 return -EFAULT;
150
151 curr_pte = *(u64 *) (uintptr_t) hl_mmu_hr_pte_phys_to_virt(ctx, hops_pgt_info[i],
152 hop_pte_phys_addr[i],
153 ctx->hdev->asic_prop.mmu_hop_table_size);
154
155 if ((i < hop_last) && (curr_pte & mmu_prop->last_mask)) {
156 hop_last = i;
157 is_huge = true;
158 break;
159 }
160 }
161
162 if (is_dram_addr && !is_huge) {
163 dev_err(hdev->dev, "DRAM unmapping should use huge pages only\n");
164 return -EFAULT;
165 }
166
167 if (!(curr_pte & PAGE_PRESENT_MASK))
168 goto not_mapped;
169
170 for (i = hop_last ; i > 0 ; i--) {
171 hl_mmu_hr_clear_pte(ctx, hops_pgt_info[i], hop_pte_phys_addr[i],
172 ctx->hdev->asic_prop.mmu_hop_table_size);
173
174 if (hl_mmu_hr_put_pte(ctx, hops_pgt_info[i], &ctx->hdev->mmu_priv.hr,
175 ctx->hdev->asic_prop.mmu_hop_table_size))
176 goto mapped;
177 }
178 hl_mmu_hr_clear_pte(ctx, hops_pgt_info[0], hop_pte_phys_addr[0],
179 ctx->hdev->asic_prop.mmu_hop_table_size);
180
181 mapped:
182 return 0;
183
184 not_mapped:
185 dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n", virt_addr);
186
187 return -EINVAL;
188 }
189
hl_mmu_v2_get_last_hop(struct hl_mmu_properties * mmu_prop,u32 page_size)190 static int hl_mmu_v2_get_last_hop(struct hl_mmu_properties *mmu_prop, u32 page_size)
191 {
192 int hop;
193
194 for (hop = (mmu_prop->num_hops - 1); hop; hop--) {
195 if (mmu_prop->hop_shifts[hop] == 0)
196 continue;
197
198 if (page_size <= (1 << mmu_prop->hop_shifts[hop]))
199 break;
200 }
201
202 return hop;
203 }
204
_hl_mmu_v2_hr_map(struct hl_ctx * ctx,u64 virt_addr,u64 phys_addr,u32 page_size,bool is_dram_addr)205 static int _hl_mmu_v2_hr_map(struct hl_ctx *ctx,
206 u64 virt_addr, u64 phys_addr,
207 u32 page_size, bool is_dram_addr)
208 {
209 u64 hop_pte_phys_addr[MMU_ARCH_6_HOPS] = { 0 },
210 curr_pte = 0, scrambled_virt_addr, scrambled_phys_addr;
211 struct pgt_info *hops_pgt_info[MMU_ARCH_6_HOPS] = { NULL };
212 bool hop_new[MMU_ARCH_6_HOPS] = { false };
213 struct hl_device *hdev = ctx->hdev;
214 struct asic_fixed_properties *prop = &hdev->asic_prop;
215 struct hl_mmu_properties *mmu_prop;
216 int i, hop_last, rc = -ENOMEM;
217
218 /*
219 * This mapping function can map a page or a huge page. For huge page
220 * there are only 4 hops rather than 5. Currently the DRAM allocation
221 * uses huge pages only but user memory could have been allocated with
222 * one of the two page sizes. Since this is a common code for all the
223 * three cases, we need this hugs page check.
224 */
225 if (is_dram_addr)
226 mmu_prop = &prop->dmmu;
227 else if (page_size == prop->pmmu_huge.page_size)
228 mmu_prop = &prop->pmmu_huge;
229 else
230 mmu_prop = &prop->pmmu;
231
232 hop_last = hl_mmu_v2_get_last_hop(mmu_prop, page_size);
233 if (hop_last <= 0) {
234 dev_err(ctx->hdev->dev, "Invalid last HOP %d\n", hop_last);
235 return -EFAULT;
236 }
237
238 scrambled_virt_addr = hdev->asic_funcs->scramble_addr(hdev, virt_addr);
239 scrambled_phys_addr = hdev->asic_funcs->scramble_addr(hdev, phys_addr);
240
241 for (i = 0 ; i <= hop_last ; i++) {
242
243 if (i == 0)
244 hops_pgt_info[i] = hl_mmu_v2_hr_get_hop0_pgt_info(ctx);
245 else
246 hops_pgt_info[i] = hl_mmu_hr_get_alloc_next_hop(ctx,
247 &ctx->hdev->mmu_priv.hr,
248 &ctx->hdev->mmu_func[MMU_HR_PGT].hr_funcs,
249 mmu_prop, curr_pte, &hop_new[i]);
250 if (!hops_pgt_info[i])
251 goto err;
252
253 hop_pte_phys_addr[i] = hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, i,
254 hops_pgt_info[i]->phys_addr,
255 scrambled_virt_addr);
256 curr_pte = *(u64 *) (uintptr_t) hl_mmu_hr_pte_phys_to_virt(ctx, hops_pgt_info[i],
257 hop_pte_phys_addr[i],
258 ctx->hdev->asic_prop.mmu_hop_table_size);
259 }
260
261 if (curr_pte & PAGE_PRESENT_MASK) {
262 dev_err(hdev->dev, "mapping already exists for virt_addr 0x%llx\n",
263 scrambled_virt_addr);
264
265 for (i = 0 ; i <= hop_last ; i++)
266 dev_dbg(hdev->dev, "hop%d pte: 0x%llx (0x%llx)\n",
267 i,
268 *(u64 *) (uintptr_t)
269 hl_mmu_hr_pte_phys_to_virt(ctx, hops_pgt_info[i],
270 hop_pte_phys_addr[i],
271 ctx->hdev->asic_prop.mmu_hop_table_size),
272 hop_pte_phys_addr[i]);
273 rc = -EINVAL;
274 goto err;
275 }
276
277 curr_pte = (scrambled_phys_addr & HOP_PHYS_ADDR_MASK) | mmu_prop->last_mask
278 | PAGE_PRESENT_MASK;
279
280 /* Write the PTEs */
281 hl_mmu_hr_write_pte(ctx, hops_pgt_info[hop_last], hop_pte_phys_addr[hop_last], curr_pte,
282 ctx->hdev->asic_prop.mmu_hop_table_size);
283
284 /* for each new hop, add its address to the table of previous-hop */
285 for (i = 1 ; i <= hop_last ; i++) {
286 if (hop_new[i]) {
287 curr_pte = (hops_pgt_info[i]->phys_addr & HOP_PHYS_ADDR_MASK) |
288 PAGE_PRESENT_MASK;
289 hl_mmu_hr_write_pte(ctx, hops_pgt_info[i - 1], hop_pte_phys_addr[i - 1],
290 curr_pte, ctx->hdev->asic_prop.mmu_hop_table_size);
291 if (i - 1)
292 hl_mmu_hr_get_pte(ctx, &ctx->hdev->mmu_func[MMU_HR_PGT].hr_funcs,
293 hops_pgt_info[i - 1]->phys_addr);
294 }
295 }
296
297 hl_mmu_hr_get_pte(ctx, &ctx->hdev->mmu_func[MMU_HR_PGT].hr_funcs,
298 hops_pgt_info[hop_last]->phys_addr);
299
300 return 0;
301
302 err:
303 for (i = 1 ; i <= hop_last ; i++)
304 if (hop_new[i] && hops_pgt_info[i])
305 hl_mmu_hr_free_hop_remove_pgt(hops_pgt_info[i], &ctx->hdev->mmu_priv.hr,
306 ctx->hdev->asic_prop.mmu_hop_table_size);
307
308 return rc;
309 }
310
311 /*
312 * hl_mmu_v2_swap_out - marks all mapping of the given ctx as swapped out
313 *
314 * @ctx: pointer to the context structure
315 *
316 */
hl_mmu_v2_hr_swap_out(struct hl_ctx * ctx)317 static void hl_mmu_v2_hr_swap_out(struct hl_ctx *ctx)
318 {
319
320 }
321
322 /*
323 * hl_mmu_v2_swap_in - marks all mapping of the given ctx as swapped in
324 *
325 * @ctx: pointer to the context structure
326 *
327 */
hl_mmu_v2_hr_swap_in(struct hl_ctx * ctx)328 static void hl_mmu_v2_hr_swap_in(struct hl_ctx *ctx)
329 {
330
331 }
332
hl_mmu_v2_hr_get_tlb_mapping_params(struct hl_device * hdev,struct hl_mmu_properties ** mmu_prop,struct hl_mmu_hop_info * hops,u64 virt_addr,bool * is_huge)333 static int hl_mmu_v2_hr_get_tlb_mapping_params(struct hl_device *hdev,
334 struct hl_mmu_properties **mmu_prop,
335 struct hl_mmu_hop_info *hops,
336 u64 virt_addr, bool *is_huge)
337 {
338 struct asic_fixed_properties *prop = &hdev->asic_prop;
339 bool is_dram_addr, is_pmmu_addr, is_pmmu_h_addr;
340
341 is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
342 prop->dmmu.start_addr,
343 prop->dmmu.end_addr);
344 is_pmmu_addr = hl_mem_area_inside_range(virt_addr, prop->pmmu.page_size,
345 prop->pmmu.start_addr,
346 prop->pmmu.end_addr);
347 is_pmmu_h_addr = hl_mem_area_inside_range(virt_addr,
348 prop->pmmu_huge.page_size,
349 prop->pmmu_huge.start_addr,
350 prop->pmmu_huge.end_addr);
351 if (is_dram_addr) {
352 *mmu_prop = &prop->dmmu;
353 *is_huge = true;
354 hops->range_type = HL_VA_RANGE_TYPE_DRAM;
355 } else if (is_pmmu_addr) {
356 *mmu_prop = &prop->pmmu;
357 *is_huge = false;
358 hops->range_type = HL_VA_RANGE_TYPE_HOST;
359 } else if (is_pmmu_h_addr) {
360 *mmu_prop = &prop->pmmu_huge;
361 *is_huge = true;
362 hops->range_type = HL_VA_RANGE_TYPE_HOST_HUGE;
363 } else {
364 return -EINVAL;
365 }
366
367 return 0;
368 }
369
hl_mmu_v2_hr_get_tlb_info(struct hl_ctx * ctx,u64 virt_addr,struct hl_mmu_hop_info * hops)370 static int hl_mmu_v2_hr_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
371 struct hl_mmu_hop_info *hops)
372 {
373 return hl_mmu_hr_get_tlb_info(ctx, virt_addr, hops,
374 &ctx->hdev->mmu_func[MMU_HR_PGT].hr_funcs);
375 }
376
377 /*
378 * hl_mmu_v2_prepare - prepare mmu_if for working with mmu v2
379 *
380 * @hdev: pointer to the device structure
381 * @mmu_if: pointer to the mmu interface structure
382 */
hl_mmu_v2_hr_set_funcs(struct hl_device * hdev,struct hl_mmu_funcs * mmu)383 void hl_mmu_v2_hr_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu)
384 {
385 mmu->init = hl_mmu_v2_hr_init;
386 mmu->fini = hl_mmu_v2_hr_fini;
387 mmu->ctx_init = hl_mmu_v2_hr_ctx_init;
388 mmu->ctx_fini = hl_mmu_v2_hr_ctx_fini;
389 mmu->map = _hl_mmu_v2_hr_map;
390 mmu->unmap = _hl_mmu_v2_hr_unmap;
391 mmu->flush = hl_mmu_hr_flush;
392 mmu->swap_out = hl_mmu_v2_hr_swap_out;
393 mmu->swap_in = hl_mmu_v2_hr_swap_in;
394 mmu->get_tlb_info = hl_mmu_v2_hr_get_tlb_info;
395 mmu->hr_funcs.get_hop0_pgt_info = hl_mmu_v2_hr_get_hop0_pgt_info;
396 mmu->hr_funcs.get_pgt_info = hl_mmu_v2_hr_get_pgt_info;
397 mmu->hr_funcs.add_pgt_info = hl_mmu_v2_hr_add_pgt_info;
398 mmu->hr_funcs.get_tlb_mapping_params = hl_mmu_v2_hr_get_tlb_mapping_params;
399 }
400