1  // SPDX-License-Identifier: GPL-2.0
2  /*
3   * Handle device page faults
4   *
5   * Copyright (C) 2020 ARM Ltd.
6   */
7  
8  #include <linux/iommu.h>
9  #include <linux/list.h>
10  #include <linux/sched/mm.h>
11  #include <linux/slab.h>
12  #include <linux/workqueue.h>
13  
14  #include "iommu-priv.h"
15  
16  /*
17   * Return the fault parameter of a device if it exists. Otherwise, return NULL.
18   * On a successful return, the caller takes a reference of this parameter and
19   * should put it after use by calling iopf_put_dev_fault_param().
20   */
iopf_get_dev_fault_param(struct device * dev)21  static struct iommu_fault_param *iopf_get_dev_fault_param(struct device *dev)
22  {
23  	struct dev_iommu *param = dev->iommu;
24  	struct iommu_fault_param *fault_param;
25  
26  	rcu_read_lock();
27  	fault_param = rcu_dereference(param->fault_param);
28  	if (fault_param && !refcount_inc_not_zero(&fault_param->users))
29  		fault_param = NULL;
30  	rcu_read_unlock();
31  
32  	return fault_param;
33  }
34  
35  /* Caller must hold a reference of the fault parameter. */
iopf_put_dev_fault_param(struct iommu_fault_param * fault_param)36  static void iopf_put_dev_fault_param(struct iommu_fault_param *fault_param)
37  {
38  	if (refcount_dec_and_test(&fault_param->users))
39  		kfree_rcu(fault_param, rcu);
40  }
41  
__iopf_free_group(struct iopf_group * group)42  static void __iopf_free_group(struct iopf_group *group)
43  {
44  	struct iopf_fault *iopf, *next;
45  
46  	list_for_each_entry_safe(iopf, next, &group->faults, list) {
47  		if (!(iopf->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE))
48  			kfree(iopf);
49  	}
50  
51  	/* Pair with iommu_report_device_fault(). */
52  	iopf_put_dev_fault_param(group->fault_param);
53  }
54  
iopf_free_group(struct iopf_group * group)55  void iopf_free_group(struct iopf_group *group)
56  {
57  	__iopf_free_group(group);
58  	kfree(group);
59  }
60  EXPORT_SYMBOL_GPL(iopf_free_group);
61  
62  /* Non-last request of a group. Postpone until the last one. */
report_partial_fault(struct iommu_fault_param * fault_param,struct iommu_fault * fault)63  static int report_partial_fault(struct iommu_fault_param *fault_param,
64  				struct iommu_fault *fault)
65  {
66  	struct iopf_fault *iopf;
67  
68  	iopf = kzalloc(sizeof(*iopf), GFP_KERNEL);
69  	if (!iopf)
70  		return -ENOMEM;
71  
72  	iopf->fault = *fault;
73  
74  	mutex_lock(&fault_param->lock);
75  	list_add(&iopf->list, &fault_param->partial);
76  	mutex_unlock(&fault_param->lock);
77  
78  	return 0;
79  }
80  
iopf_group_alloc(struct iommu_fault_param * iopf_param,struct iopf_fault * evt,struct iopf_group * abort_group)81  static struct iopf_group *iopf_group_alloc(struct iommu_fault_param *iopf_param,
82  					   struct iopf_fault *evt,
83  					   struct iopf_group *abort_group)
84  {
85  	struct iopf_fault *iopf, *next;
86  	struct iopf_group *group;
87  
88  	group = kzalloc(sizeof(*group), GFP_KERNEL);
89  	if (!group) {
90  		/*
91  		 * We always need to construct the group as we need it to abort
92  		 * the request at the driver if it can't be handled.
93  		 */
94  		group = abort_group;
95  	}
96  
97  	group->fault_param = iopf_param;
98  	group->last_fault.fault = evt->fault;
99  	INIT_LIST_HEAD(&group->faults);
100  	INIT_LIST_HEAD(&group->pending_node);
101  	list_add(&group->last_fault.list, &group->faults);
102  
103  	/* See if we have partial faults for this group */
104  	mutex_lock(&iopf_param->lock);
105  	list_for_each_entry_safe(iopf, next, &iopf_param->partial, list) {
106  		if (iopf->fault.prm.grpid == evt->fault.prm.grpid)
107  			/* Insert *before* the last fault */
108  			list_move(&iopf->list, &group->faults);
109  	}
110  	list_add(&group->pending_node, &iopf_param->faults);
111  	mutex_unlock(&iopf_param->lock);
112  
113  	group->fault_count = list_count_nodes(&group->faults);
114  
115  	return group;
116  }
117  
find_fault_handler(struct device * dev,struct iopf_fault * evt)118  static struct iommu_attach_handle *find_fault_handler(struct device *dev,
119  						     struct iopf_fault *evt)
120  {
121  	struct iommu_fault *fault = &evt->fault;
122  	struct iommu_attach_handle *attach_handle;
123  
124  	if (fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) {
125  		attach_handle = iommu_attach_handle_get(dev->iommu_group,
126  				fault->prm.pasid, 0);
127  		if (IS_ERR(attach_handle)) {
128  			const struct iommu_ops *ops = dev_iommu_ops(dev);
129  
130  			if (!ops->user_pasid_table)
131  				return NULL;
132  			/*
133  			 * The iommu driver for this device supports user-
134  			 * managed PASID table. Therefore page faults for
135  			 * any PASID should go through the NESTING domain
136  			 * attached to the device RID.
137  			 */
138  			attach_handle = iommu_attach_handle_get(
139  					dev->iommu_group, IOMMU_NO_PASID,
140  					IOMMU_DOMAIN_NESTED);
141  			if (IS_ERR(attach_handle))
142  				return NULL;
143  		}
144  	} else {
145  		attach_handle = iommu_attach_handle_get(dev->iommu_group,
146  				IOMMU_NO_PASID, 0);
147  
148  		if (IS_ERR(attach_handle))
149  			return NULL;
150  	}
151  
152  	if (!attach_handle->domain->iopf_handler)
153  		return NULL;
154  
155  	return attach_handle;
156  }
157  
iopf_error_response(struct device * dev,struct iopf_fault * evt)158  static void iopf_error_response(struct device *dev, struct iopf_fault *evt)
159  {
160  	const struct iommu_ops *ops = dev_iommu_ops(dev);
161  	struct iommu_fault *fault = &evt->fault;
162  	struct iommu_page_response resp = {
163  		.pasid = fault->prm.pasid,
164  		.grpid = fault->prm.grpid,
165  		.code = IOMMU_PAGE_RESP_INVALID
166  	};
167  
168  	ops->page_response(dev, evt, &resp);
169  }
170  
171  /**
172   * iommu_report_device_fault() - Report fault event to device driver
173   * @dev: the device
174   * @evt: fault event data
175   *
176   * Called by IOMMU drivers when a fault is detected, typically in a threaded IRQ
177   * handler. If this function fails then ops->page_response() was called to
178   * complete evt if required.
179   *
180   * This module doesn't handle PCI PASID Stop Marker; IOMMU drivers must discard
181   * them before reporting faults. A PASID Stop Marker (LRW = 0b100) doesn't
182   * expect a response. It may be generated when disabling a PASID (issuing a
183   * PASID stop request) by some PCI devices.
184   *
185   * The PASID stop request is issued by the device driver before unbind(). Once
186   * it completes, no page request is generated for this PASID anymore and
187   * outstanding ones have been pushed to the IOMMU (as per PCIe 4.0r1.0 - 6.20.1
188   * and 10.4.1.2 - Managing PASID TLP Prefix Usage). Some PCI devices will wait
189   * for all outstanding page requests to come back with a response before
190   * completing the PASID stop request. Others do not wait for page responses, and
191   * instead issue this Stop Marker that tells us when the PASID can be
192   * reallocated.
193   *
194   * It is safe to discard the Stop Marker because it is an optimization.
195   * a. Page requests, which are posted requests, have been flushed to the IOMMU
196   *    when the stop request completes.
197   * b. The IOMMU driver flushes all fault queues on unbind() before freeing the
198   *    PASID.
199   *
200   * So even though the Stop Marker might be issued by the device *after* the stop
201   * request completes, outstanding faults will have been dealt with by the time
202   * the PASID is freed.
203   *
204   * Any valid page fault will be eventually routed to an iommu domain and the
205   * page fault handler installed there will get called. The users of this
206   * handling framework should guarantee that the iommu domain could only be
207   * freed after the device has stopped generating page faults (or the iommu
208   * hardware has been set to block the page faults) and the pending page faults
209   * have been flushed. In case no page fault handler is attached or no iopf params
210   * are setup, then the ops->page_response() is called to complete the evt.
211   *
212   * Returns 0 on success, or an error in case of a bad/failed iopf setup.
213   */
iommu_report_device_fault(struct device * dev,struct iopf_fault * evt)214  int iommu_report_device_fault(struct device *dev, struct iopf_fault *evt)
215  {
216  	struct iommu_attach_handle *attach_handle;
217  	struct iommu_fault *fault = &evt->fault;
218  	struct iommu_fault_param *iopf_param;
219  	struct iopf_group abort_group = {};
220  	struct iopf_group *group;
221  
222  	attach_handle = find_fault_handler(dev, evt);
223  	if (!attach_handle)
224  		goto err_bad_iopf;
225  
226  	/*
227  	 * Something has gone wrong if a fault capable domain is attached but no
228  	 * iopf_param is setup
229  	 */
230  	iopf_param = iopf_get_dev_fault_param(dev);
231  	if (WARN_ON(!iopf_param))
232  		goto err_bad_iopf;
233  
234  	if (!(fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) {
235  		int ret;
236  
237  		ret = report_partial_fault(iopf_param, fault);
238  		iopf_put_dev_fault_param(iopf_param);
239  		/* A request that is not the last does not need to be ack'd */
240  
241  		return ret;
242  	}
243  
244  	/*
245  	 * This is the last page fault of a group. Allocate an iopf group and
246  	 * pass it to domain's page fault handler. The group holds a reference
247  	 * count of the fault parameter. It will be released after response or
248  	 * error path of this function. If an error is returned, the caller
249  	 * will send a response to the hardware. We need to clean up before
250  	 * leaving, otherwise partial faults will be stuck.
251  	 */
252  	group = iopf_group_alloc(iopf_param, evt, &abort_group);
253  	if (group == &abort_group)
254  		goto err_abort;
255  
256  	group->attach_handle = attach_handle;
257  
258  	/*
259  	 * On success iopf_handler must call iopf_group_response() and
260  	 * iopf_free_group()
261  	 */
262  	if (group->attach_handle->domain->iopf_handler(group))
263  		goto err_abort;
264  
265  	return 0;
266  
267  err_abort:
268  	dev_warn_ratelimited(dev, "iopf with pasid %d aborted\n",
269  			     fault->prm.pasid);
270  	iopf_group_response(group, IOMMU_PAGE_RESP_FAILURE);
271  	if (group == &abort_group)
272  		__iopf_free_group(group);
273  	else
274  		iopf_free_group(group);
275  
276  	return 0;
277  
278  err_bad_iopf:
279  	if (fault->type == IOMMU_FAULT_PAGE_REQ)
280  		iopf_error_response(dev, evt);
281  
282  	return -EINVAL;
283  }
284  EXPORT_SYMBOL_GPL(iommu_report_device_fault);
285  
286  /**
287   * iopf_queue_flush_dev - Ensure that all queued faults have been processed
288   * @dev: the endpoint whose faults need to be flushed.
289   *
290   * The IOMMU driver calls this before releasing a PASID, to ensure that all
291   * pending faults for this PASID have been handled, and won't hit the address
292   * space of the next process that uses this PASID. The driver must make sure
293   * that no new fault is added to the queue. In particular it must flush its
294   * low-level queue before calling this function.
295   *
296   * Return: 0 on success and <0 on error.
297   */
iopf_queue_flush_dev(struct device * dev)298  int iopf_queue_flush_dev(struct device *dev)
299  {
300  	struct iommu_fault_param *iopf_param;
301  
302  	/*
303  	 * It's a driver bug to be here after iopf_queue_remove_device().
304  	 * Therefore, it's safe to dereference the fault parameter without
305  	 * holding the lock.
306  	 */
307  	iopf_param = rcu_dereference_check(dev->iommu->fault_param, true);
308  	if (WARN_ON(!iopf_param))
309  		return -ENODEV;
310  
311  	flush_workqueue(iopf_param->queue->wq);
312  
313  	return 0;
314  }
315  EXPORT_SYMBOL_GPL(iopf_queue_flush_dev);
316  
317  /**
318   * iopf_group_response - Respond a group of page faults
319   * @group: the group of faults with the same group id
320   * @status: the response code
321   */
iopf_group_response(struct iopf_group * group,enum iommu_page_response_code status)322  void iopf_group_response(struct iopf_group *group,
323  			 enum iommu_page_response_code status)
324  {
325  	struct iommu_fault_param *fault_param = group->fault_param;
326  	struct iopf_fault *iopf = &group->last_fault;
327  	struct device *dev = group->fault_param->dev;
328  	const struct iommu_ops *ops = dev_iommu_ops(dev);
329  	struct iommu_page_response resp = {
330  		.pasid = iopf->fault.prm.pasid,
331  		.grpid = iopf->fault.prm.grpid,
332  		.code = status,
333  	};
334  
335  	/* Only send response if there is a fault report pending */
336  	mutex_lock(&fault_param->lock);
337  	if (!list_empty(&group->pending_node)) {
338  		ops->page_response(dev, &group->last_fault, &resp);
339  		list_del_init(&group->pending_node);
340  	}
341  	mutex_unlock(&fault_param->lock);
342  }
343  EXPORT_SYMBOL_GPL(iopf_group_response);
344  
345  /**
346   * iopf_queue_discard_partial - Remove all pending partial fault
347   * @queue: the queue whose partial faults need to be discarded
348   *
349   * When the hardware queue overflows, last page faults in a group may have been
350   * lost and the IOMMU driver calls this to discard all partial faults. The
351   * driver shouldn't be adding new faults to this queue concurrently.
352   *
353   * Return: 0 on success and <0 on error.
354   */
iopf_queue_discard_partial(struct iopf_queue * queue)355  int iopf_queue_discard_partial(struct iopf_queue *queue)
356  {
357  	struct iopf_fault *iopf, *next;
358  	struct iommu_fault_param *iopf_param;
359  
360  	if (!queue)
361  		return -EINVAL;
362  
363  	mutex_lock(&queue->lock);
364  	list_for_each_entry(iopf_param, &queue->devices, queue_list) {
365  		mutex_lock(&iopf_param->lock);
366  		list_for_each_entry_safe(iopf, next, &iopf_param->partial,
367  					 list) {
368  			list_del(&iopf->list);
369  			kfree(iopf);
370  		}
371  		mutex_unlock(&iopf_param->lock);
372  	}
373  	mutex_unlock(&queue->lock);
374  	return 0;
375  }
376  EXPORT_SYMBOL_GPL(iopf_queue_discard_partial);
377  
378  /**
379   * iopf_queue_add_device - Add producer to the fault queue
380   * @queue: IOPF queue
381   * @dev: device to add
382   *
383   * Return: 0 on success and <0 on error.
384   */
iopf_queue_add_device(struct iopf_queue * queue,struct device * dev)385  int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev)
386  {
387  	int ret = 0;
388  	struct dev_iommu *param = dev->iommu;
389  	struct iommu_fault_param *fault_param;
390  	const struct iommu_ops *ops = dev_iommu_ops(dev);
391  
392  	if (!ops->page_response)
393  		return -ENODEV;
394  
395  	mutex_lock(&queue->lock);
396  	mutex_lock(&param->lock);
397  	if (rcu_dereference_check(param->fault_param,
398  				  lockdep_is_held(&param->lock))) {
399  		ret = -EBUSY;
400  		goto done_unlock;
401  	}
402  
403  	fault_param = kzalloc(sizeof(*fault_param), GFP_KERNEL);
404  	if (!fault_param) {
405  		ret = -ENOMEM;
406  		goto done_unlock;
407  	}
408  
409  	mutex_init(&fault_param->lock);
410  	INIT_LIST_HEAD(&fault_param->faults);
411  	INIT_LIST_HEAD(&fault_param->partial);
412  	fault_param->dev = dev;
413  	refcount_set(&fault_param->users, 1);
414  	list_add(&fault_param->queue_list, &queue->devices);
415  	fault_param->queue = queue;
416  
417  	rcu_assign_pointer(param->fault_param, fault_param);
418  
419  done_unlock:
420  	mutex_unlock(&param->lock);
421  	mutex_unlock(&queue->lock);
422  
423  	return ret;
424  }
425  EXPORT_SYMBOL_GPL(iopf_queue_add_device);
426  
427  /**
428   * iopf_queue_remove_device - Remove producer from fault queue
429   * @queue: IOPF queue
430   * @dev: device to remove
431   *
432   * Removing a device from an iopf_queue. It's recommended to follow these
433   * steps when removing a device:
434   *
435   * - Disable new PRI reception: Turn off PRI generation in the IOMMU hardware
436   *   and flush any hardware page request queues. This should be done before
437   *   calling into this helper.
438   * - Acknowledge all outstanding PRQs to the device: Respond to all outstanding
439   *   page requests with IOMMU_PAGE_RESP_INVALID, indicating the device should
440   *   not retry. This helper function handles this.
441   * - Disable PRI on the device: After calling this helper, the caller could
442   *   then disable PRI on the device.
443   *
444   * Calling iopf_queue_remove_device() essentially disassociates the device.
445   * The fault_param might still exist, but iommu_page_response() will do
446   * nothing. The device fault parameter reference count has been properly
447   * passed from iommu_report_device_fault() to the fault handling work, and
448   * will eventually be released after iommu_page_response().
449   */
iopf_queue_remove_device(struct iopf_queue * queue,struct device * dev)450  void iopf_queue_remove_device(struct iopf_queue *queue, struct device *dev)
451  {
452  	struct iopf_fault *partial_iopf;
453  	struct iopf_fault *next;
454  	struct iopf_group *group, *temp;
455  	struct dev_iommu *param = dev->iommu;
456  	struct iommu_fault_param *fault_param;
457  	const struct iommu_ops *ops = dev_iommu_ops(dev);
458  
459  	mutex_lock(&queue->lock);
460  	mutex_lock(&param->lock);
461  	fault_param = rcu_dereference_check(param->fault_param,
462  					    lockdep_is_held(&param->lock));
463  
464  	if (WARN_ON(!fault_param || fault_param->queue != queue))
465  		goto unlock;
466  
467  	mutex_lock(&fault_param->lock);
468  	list_for_each_entry_safe(partial_iopf, next, &fault_param->partial, list)
469  		kfree(partial_iopf);
470  
471  	list_for_each_entry_safe(group, temp, &fault_param->faults, pending_node) {
472  		struct iopf_fault *iopf = &group->last_fault;
473  		struct iommu_page_response resp = {
474  			.pasid = iopf->fault.prm.pasid,
475  			.grpid = iopf->fault.prm.grpid,
476  			.code = IOMMU_PAGE_RESP_INVALID
477  		};
478  
479  		ops->page_response(dev, iopf, &resp);
480  		list_del_init(&group->pending_node);
481  	}
482  	mutex_unlock(&fault_param->lock);
483  
484  	list_del(&fault_param->queue_list);
485  
486  	/* dec the ref owned by iopf_queue_add_device() */
487  	rcu_assign_pointer(param->fault_param, NULL);
488  	iopf_put_dev_fault_param(fault_param);
489  unlock:
490  	mutex_unlock(&param->lock);
491  	mutex_unlock(&queue->lock);
492  }
493  EXPORT_SYMBOL_GPL(iopf_queue_remove_device);
494  
495  /**
496   * iopf_queue_alloc - Allocate and initialize a fault queue
497   * @name: a unique string identifying the queue (for workqueue)
498   *
499   * Return: the queue on success and NULL on error.
500   */
iopf_queue_alloc(const char * name)501  struct iopf_queue *iopf_queue_alloc(const char *name)
502  {
503  	struct iopf_queue *queue;
504  
505  	queue = kzalloc(sizeof(*queue), GFP_KERNEL);
506  	if (!queue)
507  		return NULL;
508  
509  	/*
510  	 * The WQ is unordered because the low-level handler enqueues faults by
511  	 * group. PRI requests within a group have to be ordered, but once
512  	 * that's dealt with, the high-level function can handle groups out of
513  	 * order.
514  	 */
515  	queue->wq = alloc_workqueue("iopf_queue/%s", WQ_UNBOUND, 0, name);
516  	if (!queue->wq) {
517  		kfree(queue);
518  		return NULL;
519  	}
520  
521  	INIT_LIST_HEAD(&queue->devices);
522  	mutex_init(&queue->lock);
523  
524  	return queue;
525  }
526  EXPORT_SYMBOL_GPL(iopf_queue_alloc);
527  
528  /**
529   * iopf_queue_free - Free IOPF queue
530   * @queue: queue to free
531   *
532   * Counterpart to iopf_queue_alloc(). The driver must not be queuing faults or
533   * adding/removing devices on this queue anymore.
534   */
iopf_queue_free(struct iopf_queue * queue)535  void iopf_queue_free(struct iopf_queue *queue)
536  {
537  	struct iommu_fault_param *iopf_param, *next;
538  
539  	if (!queue)
540  		return;
541  
542  	list_for_each_entry_safe(iopf_param, next, &queue->devices, queue_list)
543  		iopf_queue_remove_device(queue, iopf_param->dev);
544  
545  	destroy_workqueue(queue->wq);
546  	kfree(queue);
547  }
548  EXPORT_SYMBOL_GPL(iopf_queue_free);
549