1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright © 2015 Intel Corporation.
4 *
5 * Authors: David Woodhouse <dwmw2@infradead.org>
6 */
7
8 #include <linux/mmu_notifier.h>
9 #include <linux/sched.h>
10 #include <linux/sched/mm.h>
11 #include <linux/slab.h>
12 #include <linux/rculist.h>
13 #include <linux/pci.h>
14 #include <linux/pci-ats.h>
15 #include <linux/dmar.h>
16 #include <linux/interrupt.h>
17 #include <linux/mm_types.h>
18 #include <linux/xarray.h>
19 #include <asm/page.h>
20 #include <asm/fpu/api.h>
21
22 #include "iommu.h"
23 #include "pasid.h"
24 #include "perf.h"
25 #include "../iommu-pages.h"
26 #include "trace.h"
27
28 static irqreturn_t prq_event_thread(int irq, void *d);
29
intel_svm_enable_prq(struct intel_iommu * iommu)30 int intel_svm_enable_prq(struct intel_iommu *iommu)
31 {
32 struct iopf_queue *iopfq;
33 int irq, ret;
34
35 iommu->prq = iommu_alloc_pages_node(iommu->node, GFP_KERNEL, PRQ_ORDER);
36 if (!iommu->prq) {
37 pr_warn("IOMMU: %s: Failed to allocate page request queue\n",
38 iommu->name);
39 return -ENOMEM;
40 }
41
42 irq = dmar_alloc_hwirq(IOMMU_IRQ_ID_OFFSET_PRQ + iommu->seq_id, iommu->node, iommu);
43 if (irq <= 0) {
44 pr_err("IOMMU: %s: Failed to create IRQ vector for page request queue\n",
45 iommu->name);
46 ret = -EINVAL;
47 goto free_prq;
48 }
49 iommu->pr_irq = irq;
50
51 snprintf(iommu->iopfq_name, sizeof(iommu->iopfq_name),
52 "dmar%d-iopfq", iommu->seq_id);
53 iopfq = iopf_queue_alloc(iommu->iopfq_name);
54 if (!iopfq) {
55 pr_err("IOMMU: %s: Failed to allocate iopf queue\n", iommu->name);
56 ret = -ENOMEM;
57 goto free_hwirq;
58 }
59 iommu->iopf_queue = iopfq;
60
61 snprintf(iommu->prq_name, sizeof(iommu->prq_name), "dmar%d-prq", iommu->seq_id);
62
63 ret = request_threaded_irq(irq, NULL, prq_event_thread, IRQF_ONESHOT,
64 iommu->prq_name, iommu);
65 if (ret) {
66 pr_err("IOMMU: %s: Failed to request IRQ for page request queue\n",
67 iommu->name);
68 goto free_iopfq;
69 }
70 dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
71 dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
72 dmar_writeq(iommu->reg + DMAR_PQA_REG, virt_to_phys(iommu->prq) | PRQ_ORDER);
73
74 init_completion(&iommu->prq_complete);
75
76 return 0;
77
78 free_iopfq:
79 iopf_queue_free(iommu->iopf_queue);
80 iommu->iopf_queue = NULL;
81 free_hwirq:
82 dmar_free_hwirq(irq);
83 iommu->pr_irq = 0;
84 free_prq:
85 iommu_free_pages(iommu->prq, PRQ_ORDER);
86 iommu->prq = NULL;
87
88 return ret;
89 }
90
intel_svm_finish_prq(struct intel_iommu * iommu)91 int intel_svm_finish_prq(struct intel_iommu *iommu)
92 {
93 dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
94 dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
95 dmar_writeq(iommu->reg + DMAR_PQA_REG, 0ULL);
96
97 if (iommu->pr_irq) {
98 free_irq(iommu->pr_irq, iommu);
99 dmar_free_hwirq(iommu->pr_irq);
100 iommu->pr_irq = 0;
101 }
102
103 if (iommu->iopf_queue) {
104 iopf_queue_free(iommu->iopf_queue);
105 iommu->iopf_queue = NULL;
106 }
107
108 iommu_free_pages(iommu->prq, PRQ_ORDER);
109 iommu->prq = NULL;
110
111 return 0;
112 }
113
intel_svm_check(struct intel_iommu * iommu)114 void intel_svm_check(struct intel_iommu *iommu)
115 {
116 if (!pasid_supported(iommu))
117 return;
118
119 if (cpu_feature_enabled(X86_FEATURE_GBPAGES) &&
120 !cap_fl1gp_support(iommu->cap)) {
121 pr_err("%s SVM disabled, incompatible 1GB page capability\n",
122 iommu->name);
123 return;
124 }
125
126 if (cpu_feature_enabled(X86_FEATURE_LA57) &&
127 !cap_fl5lp_support(iommu->cap)) {
128 pr_err("%s SVM disabled, incompatible paging mode\n",
129 iommu->name);
130 return;
131 }
132
133 iommu->flags |= VTD_FLAG_SVM_CAPABLE;
134 }
135
136 /* Pages have been freed at this point */
intel_arch_invalidate_secondary_tlbs(struct mmu_notifier * mn,struct mm_struct * mm,unsigned long start,unsigned long end)137 static void intel_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn,
138 struct mm_struct *mm,
139 unsigned long start, unsigned long end)
140 {
141 struct dmar_domain *domain = container_of(mn, struct dmar_domain, notifier);
142
143 if (start == 0 && end == ULONG_MAX) {
144 cache_tag_flush_all(domain);
145 return;
146 }
147
148 /*
149 * The mm_types defines vm_end as the first byte after the end address,
150 * different from IOMMU subsystem using the last address of an address
151 * range.
152 */
153 cache_tag_flush_range(domain, start, end - 1, 0);
154 }
155
intel_mm_release(struct mmu_notifier * mn,struct mm_struct * mm)156 static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
157 {
158 struct dmar_domain *domain = container_of(mn, struct dmar_domain, notifier);
159 struct dev_pasid_info *dev_pasid;
160 struct device_domain_info *info;
161 unsigned long flags;
162
163 /* This might end up being called from exit_mmap(), *before* the page
164 * tables are cleared. And __mmu_notifier_release() will delete us from
165 * the list of notifiers so that our invalidate_range() callback doesn't
166 * get called when the page tables are cleared. So we need to protect
167 * against hardware accessing those page tables.
168 *
169 * We do it by clearing the entry in the PASID table and then flushing
170 * the IOTLB and the PASID table caches. This might upset hardware;
171 * perhaps we'll want to point the PASID to a dummy PGD (like the zero
172 * page) so that we end up taking a fault that the hardware really
173 * *has* to handle gracefully without affecting other processes.
174 */
175 spin_lock_irqsave(&domain->lock, flags);
176 list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain) {
177 info = dev_iommu_priv_get(dev_pasid->dev);
178 intel_pasid_tear_down_entry(info->iommu, dev_pasid->dev,
179 dev_pasid->pasid, true);
180 }
181 spin_unlock_irqrestore(&domain->lock, flags);
182
183 }
184
intel_mm_free_notifier(struct mmu_notifier * mn)185 static void intel_mm_free_notifier(struct mmu_notifier *mn)
186 {
187 struct dmar_domain *domain = container_of(mn, struct dmar_domain, notifier);
188
189 kfree(domain->qi_batch);
190 kfree(domain);
191 }
192
193 static const struct mmu_notifier_ops intel_mmuops = {
194 .release = intel_mm_release,
195 .arch_invalidate_secondary_tlbs = intel_arch_invalidate_secondary_tlbs,
196 .free_notifier = intel_mm_free_notifier,
197 };
198
intel_svm_set_dev_pasid(struct iommu_domain * domain,struct device * dev,ioasid_t pasid)199 static int intel_svm_set_dev_pasid(struct iommu_domain *domain,
200 struct device *dev, ioasid_t pasid)
201 {
202 struct device_domain_info *info = dev_iommu_priv_get(dev);
203 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
204 struct intel_iommu *iommu = info->iommu;
205 struct mm_struct *mm = domain->mm;
206 struct dev_pasid_info *dev_pasid;
207 unsigned long sflags;
208 unsigned long flags;
209 int ret = 0;
210
211 dev_pasid = kzalloc(sizeof(*dev_pasid), GFP_KERNEL);
212 if (!dev_pasid)
213 return -ENOMEM;
214
215 dev_pasid->dev = dev;
216 dev_pasid->pasid = pasid;
217
218 ret = cache_tag_assign_domain(to_dmar_domain(domain), dev, pasid);
219 if (ret)
220 goto free_dev_pasid;
221
222 /* Setup the pasid table: */
223 sflags = cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0;
224 ret = intel_pasid_setup_first_level(iommu, dev, mm->pgd, pasid,
225 FLPT_DEFAULT_DID, sflags);
226 if (ret)
227 goto unassign_tag;
228
229 spin_lock_irqsave(&dmar_domain->lock, flags);
230 list_add(&dev_pasid->link_domain, &dmar_domain->dev_pasids);
231 spin_unlock_irqrestore(&dmar_domain->lock, flags);
232
233 return 0;
234
235 unassign_tag:
236 cache_tag_unassign_domain(to_dmar_domain(domain), dev, pasid);
237 free_dev_pasid:
238 kfree(dev_pasid);
239
240 return ret;
241 }
242
243 /* Page request queue descriptor */
244 struct page_req_dsc {
245 union {
246 struct {
247 u64 type:8;
248 u64 pasid_present:1;
249 u64 rsvd:7;
250 u64 rid:16;
251 u64 pasid:20;
252 u64 exe_req:1;
253 u64 pm_req:1;
254 u64 rsvd2:10;
255 };
256 u64 qw_0;
257 };
258 union {
259 struct {
260 u64 rd_req:1;
261 u64 wr_req:1;
262 u64 lpig:1;
263 u64 prg_index:9;
264 u64 addr:52;
265 };
266 u64 qw_1;
267 };
268 u64 qw_2;
269 u64 qw_3;
270 };
271
is_canonical_address(u64 addr)272 static bool is_canonical_address(u64 addr)
273 {
274 int shift = 64 - (__VIRTUAL_MASK_SHIFT + 1);
275 long saddr = (long) addr;
276
277 return (((saddr << shift) >> shift) == saddr);
278 }
279
280 /**
281 * intel_drain_pasid_prq - Drain page requests and responses for a pasid
282 * @dev: target device
283 * @pasid: pasid for draining
284 *
285 * Drain all pending page requests and responses related to @pasid in both
286 * software and hardware. This is supposed to be called after the device
287 * driver has stopped DMA, the pasid entry has been cleared, and both IOTLB
288 * and DevTLB have been invalidated.
289 *
290 * It waits until all pending page requests for @pasid in the page fault
291 * queue are completed by the prq handling thread. Then follow the steps
292 * described in VT-d spec CH7.10 to drain all page requests and page
293 * responses pending in the hardware.
294 */
intel_drain_pasid_prq(struct device * dev,u32 pasid)295 void intel_drain_pasid_prq(struct device *dev, u32 pasid)
296 {
297 struct device_domain_info *info;
298 struct dmar_domain *domain;
299 struct intel_iommu *iommu;
300 struct qi_desc desc[3];
301 struct pci_dev *pdev;
302 int head, tail;
303 u16 sid, did;
304 int qdep;
305
306 info = dev_iommu_priv_get(dev);
307 if (WARN_ON(!info || !dev_is_pci(dev)))
308 return;
309
310 if (!info->pri_enabled)
311 return;
312
313 iommu = info->iommu;
314 domain = info->domain;
315 pdev = to_pci_dev(dev);
316 sid = PCI_DEVID(info->bus, info->devfn);
317 did = domain ? domain_id_iommu(domain, iommu) : FLPT_DEFAULT_DID;
318 qdep = pci_ats_queue_depth(pdev);
319
320 /*
321 * Check and wait until all pending page requests in the queue are
322 * handled by the prq handling thread.
323 */
324 prq_retry:
325 reinit_completion(&iommu->prq_complete);
326 tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
327 head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
328 while (head != tail) {
329 struct page_req_dsc *req;
330
331 req = &iommu->prq[head / sizeof(*req)];
332 if (!req->pasid_present || req->pasid != pasid) {
333 head = (head + sizeof(*req)) & PRQ_RING_MASK;
334 continue;
335 }
336
337 wait_for_completion(&iommu->prq_complete);
338 goto prq_retry;
339 }
340
341 iopf_queue_flush_dev(dev);
342
343 /*
344 * Perform steps described in VT-d spec CH7.10 to drain page
345 * requests and responses in hardware.
346 */
347 memset(desc, 0, sizeof(desc));
348 desc[0].qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
349 QI_IWD_FENCE |
350 QI_IWD_TYPE;
351 desc[1].qw0 = QI_EIOTLB_PASID(pasid) |
352 QI_EIOTLB_DID(did) |
353 QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
354 QI_EIOTLB_TYPE;
355 desc[2].qw0 = QI_DEV_EIOTLB_PASID(pasid) |
356 QI_DEV_EIOTLB_SID(sid) |
357 QI_DEV_EIOTLB_QDEP(qdep) |
358 QI_DEIOTLB_TYPE |
359 QI_DEV_IOTLB_PFSID(info->pfsid);
360 qi_retry:
361 reinit_completion(&iommu->prq_complete);
362 qi_submit_sync(iommu, desc, 3, QI_OPT_WAIT_DRAIN);
363 if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) {
364 wait_for_completion(&iommu->prq_complete);
365 goto qi_retry;
366 }
367 }
368
prq_to_iommu_prot(struct page_req_dsc * req)369 static int prq_to_iommu_prot(struct page_req_dsc *req)
370 {
371 int prot = 0;
372
373 if (req->rd_req)
374 prot |= IOMMU_FAULT_PERM_READ;
375 if (req->wr_req)
376 prot |= IOMMU_FAULT_PERM_WRITE;
377 if (req->exe_req)
378 prot |= IOMMU_FAULT_PERM_EXEC;
379 if (req->pm_req)
380 prot |= IOMMU_FAULT_PERM_PRIV;
381
382 return prot;
383 }
384
intel_svm_prq_report(struct intel_iommu * iommu,struct device * dev,struct page_req_dsc * desc)385 static void intel_svm_prq_report(struct intel_iommu *iommu, struct device *dev,
386 struct page_req_dsc *desc)
387 {
388 struct iopf_fault event = { };
389
390 /* Fill in event data for device specific processing */
391 event.fault.type = IOMMU_FAULT_PAGE_REQ;
392 event.fault.prm.addr = (u64)desc->addr << VTD_PAGE_SHIFT;
393 event.fault.prm.pasid = desc->pasid;
394 event.fault.prm.grpid = desc->prg_index;
395 event.fault.prm.perm = prq_to_iommu_prot(desc);
396
397 if (desc->lpig)
398 event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
399 if (desc->pasid_present) {
400 event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
401 event.fault.prm.flags |= IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID;
402 }
403
404 iommu_report_device_fault(dev, &event);
405 }
406
handle_bad_prq_event(struct intel_iommu * iommu,struct page_req_dsc * req,int result)407 static void handle_bad_prq_event(struct intel_iommu *iommu,
408 struct page_req_dsc *req, int result)
409 {
410 struct qi_desc desc = { };
411
412 pr_err("%s: Invalid page request: %08llx %08llx\n",
413 iommu->name, ((unsigned long long *)req)[0],
414 ((unsigned long long *)req)[1]);
415
416 if (!req->lpig)
417 return;
418
419 desc.qw0 = QI_PGRP_PASID(req->pasid) |
420 QI_PGRP_DID(req->rid) |
421 QI_PGRP_PASID_P(req->pasid_present) |
422 QI_PGRP_RESP_CODE(result) |
423 QI_PGRP_RESP_TYPE;
424 desc.qw1 = QI_PGRP_IDX(req->prg_index) |
425 QI_PGRP_LPIG(req->lpig);
426
427 qi_submit_sync(iommu, &desc, 1, 0);
428 }
429
prq_event_thread(int irq,void * d)430 static irqreturn_t prq_event_thread(int irq, void *d)
431 {
432 struct intel_iommu *iommu = d;
433 struct page_req_dsc *req;
434 int head, tail, handled;
435 struct device *dev;
436 u64 address;
437
438 /*
439 * Clear PPR bit before reading head/tail registers, to ensure that
440 * we get a new interrupt if needed.
441 */
442 writel(DMA_PRS_PPR, iommu->reg + DMAR_PRS_REG);
443
444 tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
445 head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
446 handled = (head != tail);
447 while (head != tail) {
448 req = &iommu->prq[head / sizeof(*req)];
449 address = (u64)req->addr << VTD_PAGE_SHIFT;
450
451 if (unlikely(!req->pasid_present)) {
452 pr_err("IOMMU: %s: Page request without PASID\n",
453 iommu->name);
454 bad_req:
455 handle_bad_prq_event(iommu, req, QI_RESP_INVALID);
456 goto prq_advance;
457 }
458
459 if (unlikely(!is_canonical_address(address))) {
460 pr_err("IOMMU: %s: Address is not canonical\n",
461 iommu->name);
462 goto bad_req;
463 }
464
465 if (unlikely(req->pm_req && (req->rd_req | req->wr_req))) {
466 pr_err("IOMMU: %s: Page request in Privilege Mode\n",
467 iommu->name);
468 goto bad_req;
469 }
470
471 if (unlikely(req->exe_req && req->rd_req)) {
472 pr_err("IOMMU: %s: Execution request not supported\n",
473 iommu->name);
474 goto bad_req;
475 }
476
477 /* Drop Stop Marker message. No need for a response. */
478 if (unlikely(req->lpig && !req->rd_req && !req->wr_req))
479 goto prq_advance;
480
481 /*
482 * If prq is to be handled outside iommu driver via receiver of
483 * the fault notifiers, we skip the page response here.
484 */
485 mutex_lock(&iommu->iopf_lock);
486 dev = device_rbtree_find(iommu, req->rid);
487 if (!dev) {
488 mutex_unlock(&iommu->iopf_lock);
489 goto bad_req;
490 }
491
492 intel_svm_prq_report(iommu, dev, req);
493 trace_prq_report(iommu, dev, req->qw_0, req->qw_1,
494 req->qw_2, req->qw_3,
495 iommu->prq_seq_number++);
496 mutex_unlock(&iommu->iopf_lock);
497 prq_advance:
498 head = (head + sizeof(*req)) & PRQ_RING_MASK;
499 }
500
501 dmar_writeq(iommu->reg + DMAR_PQH_REG, tail);
502
503 /*
504 * Clear the page request overflow bit and wake up all threads that
505 * are waiting for the completion of this handling.
506 */
507 if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) {
508 pr_info_ratelimited("IOMMU: %s: PRQ overflow detected\n",
509 iommu->name);
510 head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
511 tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
512 if (head == tail) {
513 iopf_queue_discard_partial(iommu->iopf_queue);
514 writel(DMA_PRS_PRO, iommu->reg + DMAR_PRS_REG);
515 pr_info_ratelimited("IOMMU: %s: PRQ overflow cleared",
516 iommu->name);
517 }
518 }
519
520 if (!completion_done(&iommu->prq_complete))
521 complete(&iommu->prq_complete);
522
523 return IRQ_RETVAL(handled);
524 }
525
intel_svm_page_response(struct device * dev,struct iopf_fault * evt,struct iommu_page_response * msg)526 void intel_svm_page_response(struct device *dev, struct iopf_fault *evt,
527 struct iommu_page_response *msg)
528 {
529 struct device_domain_info *info = dev_iommu_priv_get(dev);
530 struct intel_iommu *iommu = info->iommu;
531 u8 bus = info->bus, devfn = info->devfn;
532 struct iommu_fault_page_request *prm;
533 struct qi_desc desc;
534 bool pasid_present;
535 bool last_page;
536 u16 sid;
537
538 prm = &evt->fault.prm;
539 sid = PCI_DEVID(bus, devfn);
540 pasid_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
541 last_page = prm->flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
542
543 desc.qw0 = QI_PGRP_PASID(prm->pasid) | QI_PGRP_DID(sid) |
544 QI_PGRP_PASID_P(pasid_present) |
545 QI_PGRP_RESP_CODE(msg->code) |
546 QI_PGRP_RESP_TYPE;
547 desc.qw1 = QI_PGRP_IDX(prm->grpid) | QI_PGRP_LPIG(last_page);
548 desc.qw2 = 0;
549 desc.qw3 = 0;
550
551 qi_submit_sync(iommu, &desc, 1, 0);
552 }
553
intel_svm_domain_free(struct iommu_domain * domain)554 static void intel_svm_domain_free(struct iommu_domain *domain)
555 {
556 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
557
558 /* dmar_domain free is deferred to the mmu free_notifier callback. */
559 mmu_notifier_put(&dmar_domain->notifier);
560 }
561
562 static const struct iommu_domain_ops intel_svm_domain_ops = {
563 .set_dev_pasid = intel_svm_set_dev_pasid,
564 .free = intel_svm_domain_free
565 };
566
intel_svm_domain_alloc(struct device * dev,struct mm_struct * mm)567 struct iommu_domain *intel_svm_domain_alloc(struct device *dev,
568 struct mm_struct *mm)
569 {
570 struct dmar_domain *domain;
571 int ret;
572
573 domain = kzalloc(sizeof(*domain), GFP_KERNEL);
574 if (!domain)
575 return ERR_PTR(-ENOMEM);
576
577 domain->domain.ops = &intel_svm_domain_ops;
578 domain->use_first_level = true;
579 INIT_LIST_HEAD(&domain->dev_pasids);
580 INIT_LIST_HEAD(&domain->cache_tags);
581 spin_lock_init(&domain->cache_lock);
582 spin_lock_init(&domain->lock);
583
584 domain->notifier.ops = &intel_mmuops;
585 ret = mmu_notifier_register(&domain->notifier, mm);
586 if (ret) {
587 kfree(domain);
588 return ERR_PTR(ret);
589 }
590
591 return &domain->domain;
592 }
593