1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4 * Copyright 2016-2020 HabanaLabs, Ltd.
5 * All Rights Reserved.
6 */
7
8 #include "../habanalabs.h"
9 #include "../../include/hw_ip/mmu/mmu_general.h"
10 #include "../../include/hw_ip/mmu/mmu_v2_0.h"
11
12 #include <linux/slab.h>
13
14 /**
15 * hl_mmu_v2_ctx_init() - initialize a context for using the MMU module.
16 * @ctx: pointer to the context structure to initialize.
17 *
18 * Initialize a mutex to protect the concurrent mapping flow, a hash to hold all
19 * page tables hops related to this context.
20 * Return: 0 on success, non-zero otherwise.
21 */
hl_mmu_v2_ctx_init(struct hl_ctx * ctx)22 static int hl_mmu_v2_ctx_init(struct hl_ctx *ctx)
23 {
24 hash_init(ctx->mmu_shadow_hash);
25
26 return 0;
27 }
28
29 /*
30 * hl_mmu_v2_ctx_fini - disable a ctx from using the mmu module
31 *
32 * @ctx: pointer to the context structure
33 *
34 * This function does the following:
35 * - Free any pgts which were not freed yet
36 * - Free the mutex
37 * - Free DRAM default page mapping hops
38 */
hl_mmu_v2_ctx_fini(struct hl_ctx * ctx)39 static void hl_mmu_v2_ctx_fini(struct hl_ctx *ctx)
40 {
41 struct hl_device *hdev = ctx->hdev;
42 struct pgt_info *pgt_info;
43 struct hlist_node *tmp;
44 int i;
45
46 if (!hash_empty(ctx->mmu_shadow_hash))
47 dev_err(hdev->dev, "ctx %d is freed while it has pgts in use\n",
48 ctx->asid);
49
50 hash_for_each_safe(ctx->mmu_shadow_hash, i, tmp, pgt_info, node) {
51 dev_err_ratelimited(hdev->dev,
52 "pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n",
53 pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes);
54 hl_mmu_dr_free_pgt_node(ctx, pgt_info);
55 }
56 }
57
hl_mmu_v2_unmap(struct hl_ctx * ctx,u64 virt_addr,bool is_dram_addr)58 static int hl_mmu_v2_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr)
59 {
60 u64 hop_addr[MMU_ARCH_6_HOPS] = { 0 }, hop_pte_addr[MMU_ARCH_6_HOPS] = { 0 }, curr_pte,
61 scrambled_virt_addr;
62 struct asic_fixed_properties *prop = &ctx->hdev->asic_prop;
63 struct hl_device *hdev = ctx->hdev;
64 struct hl_mmu_properties *mmu_prop;
65 bool is_huge = false;
66 int i, hop_last;
67
68 /* device resident in V2 are allowed only for HMMU */
69 if (!is_dram_addr)
70 return -EINVAL;
71
72 mmu_prop = &prop->dmmu;
73
74 hop_last = mmu_prop->num_hops - 1;
75
76 scrambled_virt_addr = hdev->asic_funcs->scramble_addr(hdev, virt_addr);
77
78 hop_addr[0] = hl_mmu_dr_get_hop0_addr(ctx);
79 hop_pte_addr[0] = hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, 0,
80 hop_addr[0], scrambled_virt_addr);
81 if (hop_pte_addr[0] == U64_MAX)
82 return -EFAULT;
83
84 curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[0];
85
86 for (i = 1 ; i < mmu_prop->num_hops ; i++) {
87 hop_addr[i] = hl_mmu_get_next_hop_addr(ctx, curr_pte);
88 if (hop_addr[i] == ULLONG_MAX)
89 goto not_mapped;
90
91 hop_pte_addr[i] = hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, i,
92 hop_addr[i], scrambled_virt_addr);
93 if (hop_pte_addr[i] == U64_MAX)
94 return -EFAULT;
95
96 curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[i];
97
98 if ((i <= hop_last) && (curr_pte & mmu_prop->last_mask)) {
99 hop_last = i;
100 is_huge = true;
101 break;
102 }
103 }
104
105 if (is_dram_addr && !is_huge) {
106 dev_err(hdev->dev, "DRAM unmapping should use huge pages only\n");
107 return -EFAULT;
108 }
109
110 if (!(curr_pte & PAGE_PRESENT_MASK))
111 goto not_mapped;
112
113 for (i = hop_last ; i > 0 ; i--) {
114 hl_mmu_dr_clear_pte(ctx, hop_pte_addr[i]);
115 if (hl_mmu_dr_put_pte(ctx, hop_addr[i]))
116 goto mapped;
117 }
118 hl_mmu_dr_clear_pte(ctx, hop_pte_addr[0]);
119
120 mapped:
121 return 0;
122
123 not_mapped:
124 dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n",
125 virt_addr);
126
127 return -EINVAL;
128 }
129
hl_mmu_v2_map(struct hl_ctx * ctx,u64 virt_addr,u64 phys_addr,u32 page_size,bool is_dram_addr)130 static int hl_mmu_v2_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
131 u32 page_size, bool is_dram_addr)
132 {
133 u64 hop_addr[MMU_ARCH_6_HOPS] = { 0 }, hop_pte_addr[MMU_ARCH_6_HOPS] = { 0 },
134 curr_pte = 0, scrambled_virt_addr, scrambled_phys_addr;
135 struct asic_fixed_properties *prop = &ctx->hdev->asic_prop;
136 bool hop_new[MMU_ARCH_6_HOPS] = { false };
137 struct hl_device *hdev = ctx->hdev;
138 struct hl_mmu_properties *mmu_prop;
139 int rc, i, hop_last;
140
141 /* device resident in V2 are allowed only for HMMU */
142 if (!is_dram_addr)
143 return -EINVAL;
144
145 mmu_prop = &prop->dmmu;
146
147 hop_last = mmu_prop->num_hops - 1;
148
149 scrambled_virt_addr = hdev->asic_funcs->scramble_addr(hdev, virt_addr);
150 scrambled_phys_addr = hdev->asic_funcs->scramble_addr(hdev, phys_addr);
151
152 /* First hop is preallocated therefore it is treated differently */
153 hop_addr[0] = hl_mmu_dr_get_hop0_addr(ctx);
154 hop_pte_addr[0] = hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, 0,
155 hop_addr[0], scrambled_virt_addr);
156 curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[0];
157
158 /* Handle hop1 to hop_last */
159 for (i = 1 ; i <= hop_last ; i++) {
160 hop_addr[i] = hl_mmu_dr_get_alloc_next_hop_addr(ctx, curr_pte, &hop_new[i]);
161 if (hop_addr[i] == ULLONG_MAX) {
162 rc = -ENOMEM;
163 goto err;
164 }
165
166 hop_pte_addr[i] = hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, i,
167 hop_addr[i], scrambled_virt_addr);
168 if (hop_pte_addr[i] == U64_MAX) {
169 rc = -EINVAL;
170 goto err;
171 }
172
173 if (!hop_pte_addr[i]) {
174 rc = -EINVAL;
175 goto err;
176 }
177
178 curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[i];
179 }
180
181 if (curr_pte & PAGE_PRESENT_MASK) {
182 dev_err(hdev->dev,
183 "mapping already exists for virt_addr 0x%llx\n",
184 virt_addr);
185
186 for (i = 0 ; i <= hop_last ; i++)
187 dev_dbg(hdev->dev, "hop%d pte: 0x%llx (0x%llx)\n",
188 i, *(u64 *) (uintptr_t) hop_pte_addr[i],
189 hop_pte_addr[i]);
190
191 rc = -EINVAL;
192 goto err;
193 }
194
195 curr_pte = (scrambled_phys_addr & HOP_PHYS_ADDR_MASK)
196 | mmu_prop->last_mask | PAGE_PRESENT_MASK;
197
198 /* Write the PTEs */
199 hl_mmu_dr_write_final_pte(ctx, hop_pte_addr[hop_last], curr_pte);
200
201 /* for each new hop, add its address to the table of previous-hop */
202 for (i = 1 ; i <= hop_last ; i++) {
203 if (hop_new[i]) {
204 curr_pte = (hop_addr[i] & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
205 hl_mmu_dr_write_pte(ctx, hop_pte_addr[i - 1], curr_pte);
206
207 if (i - 1)
208 hl_mmu_dr_get_pte(ctx, hop_addr[i - 1]);
209 }
210 }
211 hl_mmu_dr_get_pte(ctx, hop_addr[hop_last]);
212
213 return 0;
214
215 err:
216 for (i = 1 ; i <= hop_last ; i++)
217 if (hop_new[i] && (hop_addr[i] != U64_MAX))
218 hl_mmu_dr_free_hop(ctx, hop_addr[i]);
219
220 return rc;
221 }
222
223 /*
224 * hl_mmu_v2_swap_out - marks all mapping of the given ctx as swapped out
225 *
226 * @ctx: pointer to the context structure
227 *
228 */
hl_mmu_v2_swap_out(struct hl_ctx * ctx)229 static void hl_mmu_v2_swap_out(struct hl_ctx *ctx)
230 {
231
232 }
233
234 /*
235 * hl_mmu_v2_swap_in - marks all mapping of the given ctx as swapped in
236 *
237 * @ctx: pointer to the context structure
238 *
239 */
hl_mmu_v2_swap_in(struct hl_ctx * ctx)240 static void hl_mmu_v2_swap_in(struct hl_ctx *ctx)
241 {
242
243 }
244
hl_mmu_v2_get_tlb_info(struct hl_ctx * ctx,u64 virt_addr,struct hl_mmu_hop_info * hops)245 static int hl_mmu_v2_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, struct hl_mmu_hop_info *hops)
246 {
247 struct asic_fixed_properties *prop = &ctx->hdev->asic_prop;
248 struct hl_device *hdev = ctx->hdev;
249 struct hl_mmu_properties *mmu_prop;
250 bool is_dram_addr;
251 int i;
252
253 is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
254 prop->dmmu.start_addr,
255 prop->dmmu.end_addr);
256
257 /* device resident in V2 are allowed only for HMMU */
258 if (!is_dram_addr)
259 return -EINVAL;
260
261 mmu_prop = &prop->dmmu;
262 hops->range_type = HL_VA_RANGE_TYPE_DRAM;
263
264 hops->scrambled_vaddr = hdev->asic_funcs->scramble_addr(hdev, virt_addr);
265
266 hops->hop_info[0].hop_addr = hl_mmu_dr_get_phys_hop0_addr(ctx);
267 hops->hop_info[0].hop_pte_addr = hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, 0,
268 hops->hop_info[0].hop_addr,
269 hops->scrambled_vaddr);
270 if (hops->hop_info[0].hop_pte_addr == U64_MAX)
271 return -EFAULT;
272
273 hops->hop_info[0].hop_pte_val = hdev->asic_funcs->read_pte(hdev,
274 hops->hop_info[0].hop_pte_addr);
275 if (hops->hop_info[0].hop_pte_val == U64_MAX)
276 return -EFAULT;
277
278 for (i = 1 ; i < mmu_prop->num_hops ; i++) {
279 hops->hop_info[i].hop_addr =
280 hl_mmu_get_next_hop_addr(ctx, hops->hop_info[i - 1].hop_pte_val);
281 if (hops->hop_info[i].hop_addr == ULLONG_MAX)
282 return -EFAULT;
283
284 hops->hop_info[i].hop_pte_addr =
285 hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, i,
286 hops->hop_info[i].hop_addr,
287 hops->scrambled_vaddr);
288 if (hops->hop_info[i].hop_pte_addr == U64_MAX)
289 return -EFAULT;
290
291 hops->hop_info[i].hop_pte_val =
292 hdev->asic_funcs->read_pte(hdev,
293 hops->hop_info[i].hop_pte_addr);
294
295 if (!(hops->hop_info[i].hop_pte_val & PAGE_PRESENT_MASK))
296 return -EFAULT;
297
298 if (hops->hop_info[i].hop_pte_val & mmu_prop->last_mask)
299 break;
300 }
301
302 /* if passed over all hops then no last hop was found */
303 if (i == mmu_prop->num_hops)
304 return -EFAULT;
305
306 if (!(hops->hop_info[i].hop_pte_val & PAGE_PRESENT_MASK))
307 return -EFAULT;
308
309 if (hops->scrambled_vaddr != virt_addr)
310 hops->unscrambled_paddr = hdev->asic_funcs->descramble_addr
311 (hdev, hops->hop_info[i].hop_pte_val);
312 else
313 hops->unscrambled_paddr = hops->hop_info[i].hop_pte_val;
314
315 hops->used_hops = i + 1;
316
317 return 0;
318 }
319
320 /*
321 * hl_mmu_v2_prepare - prepare mmu_if for working with mmu v2
322 *
323 * @hdev: pointer to the device structure
324 * @mmu_if: pointer to the mmu interface structure
325 */
hl_mmu_v2_set_funcs(struct hl_device * hdev,struct hl_mmu_funcs * mmu)326 void hl_mmu_v2_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu)
327 {
328 mmu->init = hl_mmu_dr_init;
329 mmu->fini = hl_mmu_dr_fini;
330 mmu->ctx_init = hl_mmu_v2_ctx_init;
331 mmu->ctx_fini = hl_mmu_v2_ctx_fini;
332 mmu->map = hl_mmu_v2_map;
333 mmu->unmap = hl_mmu_v2_unmap;
334 mmu->flush = hl_mmu_dr_flush;
335 mmu->swap_out = hl_mmu_v2_swap_out;
336 mmu->swap_in = hl_mmu_v2_swap_in;
337 mmu->get_tlb_info = hl_mmu_v2_get_tlb_info;
338 }
339