1  // SPDX-License-Identifier: GPL-2.0-only
2  /*
3   * VFIO core
4   *
5   * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
6   *     Author: Alex Williamson <alex.williamson@redhat.com>
7   *
8   * Derived from original vfio:
9   * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
10   * Author: Tom Lyon, pugs@cisco.com
11   */
12  
13  #include <linux/vfio.h>
14  #include <linux/iommufd.h>
15  #include <linux/anon_inodes.h>
16  #include "vfio.h"
17  
18  static struct vfio {
19  	struct class			*class;
20  	struct list_head		group_list;
21  	struct mutex			group_lock; /* locks group_list */
22  	struct ida			group_ida;
23  	dev_t				group_devt;
24  } vfio;
25  
vfio_device_get_from_name(struct vfio_group * group,char * buf)26  static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group,
27  						     char *buf)
28  {
29  	struct vfio_device *it, *device = ERR_PTR(-ENODEV);
30  
31  	mutex_lock(&group->device_lock);
32  	list_for_each_entry(it, &group->device_list, group_next) {
33  		int ret;
34  
35  		if (it->ops->match) {
36  			ret = it->ops->match(it, buf);
37  			if (ret < 0) {
38  				device = ERR_PTR(ret);
39  				break;
40  			}
41  		} else {
42  			ret = !strcmp(dev_name(it->dev), buf);
43  		}
44  
45  		if (ret && vfio_device_try_get_registration(it)) {
46  			device = it;
47  			break;
48  		}
49  	}
50  	mutex_unlock(&group->device_lock);
51  
52  	return device;
53  }
54  
55  /*
56   * VFIO Group fd, /dev/vfio/$GROUP
57   */
vfio_group_has_iommu(struct vfio_group * group)58  static bool vfio_group_has_iommu(struct vfio_group *group)
59  {
60  	lockdep_assert_held(&group->group_lock);
61  	/*
62  	 * There can only be users if there is a container, and if there is a
63  	 * container there must be users.
64  	 */
65  	WARN_ON(!group->container != !group->container_users);
66  
67  	return group->container || group->iommufd;
68  }
69  
70  /*
71   * VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or
72   * if there was no container to unset.  Since the ioctl is called on
73   * the group, we know that still exists, therefore the only valid
74   * transition here is 1->0.
75   */
vfio_group_ioctl_unset_container(struct vfio_group * group)76  static int vfio_group_ioctl_unset_container(struct vfio_group *group)
77  {
78  	int ret = 0;
79  
80  	mutex_lock(&group->group_lock);
81  	if (!vfio_group_has_iommu(group)) {
82  		ret = -EINVAL;
83  		goto out_unlock;
84  	}
85  	if (group->container) {
86  		if (group->container_users != 1) {
87  			ret = -EBUSY;
88  			goto out_unlock;
89  		}
90  		vfio_group_detach_container(group);
91  	}
92  	if (group->iommufd) {
93  		iommufd_ctx_put(group->iommufd);
94  		group->iommufd = NULL;
95  	}
96  
97  out_unlock:
98  	mutex_unlock(&group->group_lock);
99  	return ret;
100  }
101  
vfio_group_ioctl_set_container(struct vfio_group * group,int __user * arg)102  static int vfio_group_ioctl_set_container(struct vfio_group *group,
103  					  int __user *arg)
104  {
105  	struct vfio_container *container;
106  	struct iommufd_ctx *iommufd;
107  	struct fd f;
108  	int ret;
109  	int fd;
110  
111  	if (get_user(fd, arg))
112  		return -EFAULT;
113  
114  	f = fdget(fd);
115  	if (!fd_file(f))
116  		return -EBADF;
117  
118  	mutex_lock(&group->group_lock);
119  	if (vfio_group_has_iommu(group)) {
120  		ret = -EINVAL;
121  		goto out_unlock;
122  	}
123  	if (!group->iommu_group) {
124  		ret = -ENODEV;
125  		goto out_unlock;
126  	}
127  
128  	container = vfio_container_from_file(fd_file(f));
129  	if (container) {
130  		ret = vfio_container_attach_group(container, group);
131  		goto out_unlock;
132  	}
133  
134  	iommufd = iommufd_ctx_from_file(fd_file(f));
135  	if (!IS_ERR(iommufd)) {
136  		if (IS_ENABLED(CONFIG_VFIO_NOIOMMU) &&
137  		    group->type == VFIO_NO_IOMMU)
138  			ret = iommufd_vfio_compat_set_no_iommu(iommufd);
139  		else
140  			ret = iommufd_vfio_compat_ioas_create(iommufd);
141  
142  		if (ret) {
143  			iommufd_ctx_put(iommufd);
144  			goto out_unlock;
145  		}
146  
147  		group->iommufd = iommufd;
148  		goto out_unlock;
149  	}
150  
151  	/* The FD passed is not recognized. */
152  	ret = -EBADFD;
153  
154  out_unlock:
155  	mutex_unlock(&group->group_lock);
156  	fdput(f);
157  	return ret;
158  }
159  
vfio_device_group_get_kvm_safe(struct vfio_device * device)160  static void vfio_device_group_get_kvm_safe(struct vfio_device *device)
161  {
162  	spin_lock(&device->group->kvm_ref_lock);
163  	vfio_device_get_kvm_safe(device, device->group->kvm);
164  	spin_unlock(&device->group->kvm_ref_lock);
165  }
166  
vfio_df_group_open(struct vfio_device_file * df)167  static int vfio_df_group_open(struct vfio_device_file *df)
168  {
169  	struct vfio_device *device = df->device;
170  	int ret;
171  
172  	mutex_lock(&device->group->group_lock);
173  	if (!vfio_group_has_iommu(device->group)) {
174  		ret = -EINVAL;
175  		goto out_unlock;
176  	}
177  
178  	mutex_lock(&device->dev_set->lock);
179  
180  	/*
181  	 * Before the first device open, get the KVM pointer currently
182  	 * associated with the group (if there is one) and obtain a reference
183  	 * now that will be held until the open_count reaches 0 again.  Save
184  	 * the pointer in the device for use by drivers.
185  	 */
186  	if (device->open_count == 0)
187  		vfio_device_group_get_kvm_safe(device);
188  
189  	df->iommufd = device->group->iommufd;
190  	if (df->iommufd && vfio_device_is_noiommu(device) && device->open_count == 0) {
191  		/*
192  		 * Require no compat ioas to be assigned to proceed.  The basic
193  		 * statement is that the user cannot have done something that
194  		 * implies they expected translation to exist
195  		 */
196  		if (!capable(CAP_SYS_RAWIO) ||
197  		    vfio_iommufd_device_has_compat_ioas(device, df->iommufd))
198  			ret = -EPERM;
199  		else
200  			ret = 0;
201  		goto out_put_kvm;
202  	}
203  
204  	ret = vfio_df_open(df);
205  	if (ret)
206  		goto out_put_kvm;
207  
208  	if (df->iommufd && device->open_count == 1) {
209  		ret = vfio_iommufd_compat_attach_ioas(device, df->iommufd);
210  		if (ret)
211  			goto out_close_device;
212  	}
213  
214  	/*
215  	 * Paired with smp_load_acquire() in vfio_device_fops::ioctl/
216  	 * read/write/mmap and vfio_file_has_device_access()
217  	 */
218  	smp_store_release(&df->access_granted, true);
219  
220  	mutex_unlock(&device->dev_set->lock);
221  	mutex_unlock(&device->group->group_lock);
222  	return 0;
223  
224  out_close_device:
225  	vfio_df_close(df);
226  out_put_kvm:
227  	df->iommufd = NULL;
228  	if (device->open_count == 0)
229  		vfio_device_put_kvm(device);
230  	mutex_unlock(&device->dev_set->lock);
231  out_unlock:
232  	mutex_unlock(&device->group->group_lock);
233  	return ret;
234  }
235  
vfio_df_group_close(struct vfio_device_file * df)236  void vfio_df_group_close(struct vfio_device_file *df)
237  {
238  	struct vfio_device *device = df->device;
239  
240  	mutex_lock(&device->group->group_lock);
241  	mutex_lock(&device->dev_set->lock);
242  
243  	vfio_df_close(df);
244  	df->iommufd = NULL;
245  
246  	if (device->open_count == 0)
247  		vfio_device_put_kvm(device);
248  
249  	mutex_unlock(&device->dev_set->lock);
250  	mutex_unlock(&device->group->group_lock);
251  }
252  
vfio_device_open_file(struct vfio_device * device)253  static struct file *vfio_device_open_file(struct vfio_device *device)
254  {
255  	struct vfio_device_file *df;
256  	struct file *filep;
257  	int ret;
258  
259  	df = vfio_allocate_device_file(device);
260  	if (IS_ERR(df)) {
261  		ret = PTR_ERR(df);
262  		goto err_out;
263  	}
264  
265  	df->group = device->group;
266  
267  	ret = vfio_df_group_open(df);
268  	if (ret)
269  		goto err_free;
270  
271  	/*
272  	 * We can't use anon_inode_getfd() because we need to modify
273  	 * the f_mode flags directly to allow more than just ioctls
274  	 */
275  	filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops,
276  				   df, O_RDWR);
277  	if (IS_ERR(filep)) {
278  		ret = PTR_ERR(filep);
279  		goto err_close_device;
280  	}
281  
282  	/*
283  	 * TODO: add an anon_inode interface to do this.
284  	 * Appears to be missing by lack of need rather than
285  	 * explicitly prevented.  Now there's need.
286  	 */
287  	filep->f_mode |= (FMODE_PREAD | FMODE_PWRITE);
288  
289  	/*
290  	 * Use the pseudo fs inode on the device to link all mmaps
291  	 * to the same address space, allowing us to unmap all vmas
292  	 * associated to this device using unmap_mapping_range().
293  	 */
294  	filep->f_mapping = device->inode->i_mapping;
295  
296  	if (device->group->type == VFIO_NO_IOMMU)
297  		dev_warn(device->dev, "vfio-noiommu device opened by user "
298  			 "(%s:%d)\n", current->comm, task_pid_nr(current));
299  	/*
300  	 * On success the ref of device is moved to the file and
301  	 * put in vfio_device_fops_release()
302  	 */
303  	return filep;
304  
305  err_close_device:
306  	vfio_df_group_close(df);
307  err_free:
308  	kfree(df);
309  err_out:
310  	return ERR_PTR(ret);
311  }
312  
vfio_group_ioctl_get_device_fd(struct vfio_group * group,char __user * arg)313  static int vfio_group_ioctl_get_device_fd(struct vfio_group *group,
314  					  char __user *arg)
315  {
316  	struct vfio_device *device;
317  	struct file *filep;
318  	char *buf;
319  	int fdno;
320  	int ret;
321  
322  	buf = strndup_user(arg, PAGE_SIZE);
323  	if (IS_ERR(buf))
324  		return PTR_ERR(buf);
325  
326  	device = vfio_device_get_from_name(group, buf);
327  	kfree(buf);
328  	if (IS_ERR(device))
329  		return PTR_ERR(device);
330  
331  	fdno = get_unused_fd_flags(O_CLOEXEC);
332  	if (fdno < 0) {
333  		ret = fdno;
334  		goto err_put_device;
335  	}
336  
337  	filep = vfio_device_open_file(device);
338  	if (IS_ERR(filep)) {
339  		ret = PTR_ERR(filep);
340  		goto err_put_fdno;
341  	}
342  
343  	fd_install(fdno, filep);
344  	return fdno;
345  
346  err_put_fdno:
347  	put_unused_fd(fdno);
348  err_put_device:
349  	vfio_device_put_registration(device);
350  	return ret;
351  }
352  
vfio_group_ioctl_get_status(struct vfio_group * group,struct vfio_group_status __user * arg)353  static int vfio_group_ioctl_get_status(struct vfio_group *group,
354  				       struct vfio_group_status __user *arg)
355  {
356  	unsigned long minsz = offsetofend(struct vfio_group_status, flags);
357  	struct vfio_group_status status;
358  
359  	if (copy_from_user(&status, arg, minsz))
360  		return -EFAULT;
361  
362  	if (status.argsz < minsz)
363  		return -EINVAL;
364  
365  	status.flags = 0;
366  
367  	mutex_lock(&group->group_lock);
368  	if (!group->iommu_group) {
369  		mutex_unlock(&group->group_lock);
370  		return -ENODEV;
371  	}
372  
373  	/*
374  	 * With the container FD the iommu_group_claim_dma_owner() is done
375  	 * during SET_CONTAINER but for IOMMFD this is done during
376  	 * VFIO_GROUP_GET_DEVICE_FD. Meaning that with iommufd
377  	 * VFIO_GROUP_FLAGS_VIABLE could be set but GET_DEVICE_FD will fail due
378  	 * to viability.
379  	 */
380  	if (vfio_group_has_iommu(group))
381  		status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET |
382  				VFIO_GROUP_FLAGS_VIABLE;
383  	else if (!iommu_group_dma_owner_claimed(group->iommu_group))
384  		status.flags |= VFIO_GROUP_FLAGS_VIABLE;
385  	mutex_unlock(&group->group_lock);
386  
387  	if (copy_to_user(arg, &status, minsz))
388  		return -EFAULT;
389  	return 0;
390  }
391  
vfio_group_fops_unl_ioctl(struct file * filep,unsigned int cmd,unsigned long arg)392  static long vfio_group_fops_unl_ioctl(struct file *filep,
393  				      unsigned int cmd, unsigned long arg)
394  {
395  	struct vfio_group *group = filep->private_data;
396  	void __user *uarg = (void __user *)arg;
397  
398  	switch (cmd) {
399  	case VFIO_GROUP_GET_DEVICE_FD:
400  		return vfio_group_ioctl_get_device_fd(group, uarg);
401  	case VFIO_GROUP_GET_STATUS:
402  		return vfio_group_ioctl_get_status(group, uarg);
403  	case VFIO_GROUP_SET_CONTAINER:
404  		return vfio_group_ioctl_set_container(group, uarg);
405  	case VFIO_GROUP_UNSET_CONTAINER:
406  		return vfio_group_ioctl_unset_container(group);
407  	default:
408  		return -ENOTTY;
409  	}
410  }
411  
vfio_device_block_group(struct vfio_device * device)412  int vfio_device_block_group(struct vfio_device *device)
413  {
414  	struct vfio_group *group = device->group;
415  	int ret = 0;
416  
417  	mutex_lock(&group->group_lock);
418  	if (group->opened_file) {
419  		ret = -EBUSY;
420  		goto out_unlock;
421  	}
422  
423  	group->cdev_device_open_cnt++;
424  
425  out_unlock:
426  	mutex_unlock(&group->group_lock);
427  	return ret;
428  }
429  
vfio_device_unblock_group(struct vfio_device * device)430  void vfio_device_unblock_group(struct vfio_device *device)
431  {
432  	struct vfio_group *group = device->group;
433  
434  	mutex_lock(&group->group_lock);
435  	group->cdev_device_open_cnt--;
436  	mutex_unlock(&group->group_lock);
437  }
438  
vfio_group_fops_open(struct inode * inode,struct file * filep)439  static int vfio_group_fops_open(struct inode *inode, struct file *filep)
440  {
441  	struct vfio_group *group =
442  		container_of(inode->i_cdev, struct vfio_group, cdev);
443  	int ret;
444  
445  	mutex_lock(&group->group_lock);
446  
447  	/*
448  	 * drivers can be zero if this races with vfio_device_remove_group(), it
449  	 * will be stable at 0 under the group rwsem
450  	 */
451  	if (refcount_read(&group->drivers) == 0) {
452  		ret = -ENODEV;
453  		goto out_unlock;
454  	}
455  
456  	if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) {
457  		ret = -EPERM;
458  		goto out_unlock;
459  	}
460  
461  	if (group->cdev_device_open_cnt) {
462  		ret = -EBUSY;
463  		goto out_unlock;
464  	}
465  
466  	/*
467  	 * Do we need multiple instances of the group open?  Seems not.
468  	 */
469  	if (group->opened_file) {
470  		ret = -EBUSY;
471  		goto out_unlock;
472  	}
473  	group->opened_file = filep;
474  	filep->private_data = group;
475  	ret = 0;
476  out_unlock:
477  	mutex_unlock(&group->group_lock);
478  	return ret;
479  }
480  
vfio_group_fops_release(struct inode * inode,struct file * filep)481  static int vfio_group_fops_release(struct inode *inode, struct file *filep)
482  {
483  	struct vfio_group *group = filep->private_data;
484  
485  	filep->private_data = NULL;
486  
487  	mutex_lock(&group->group_lock);
488  	/*
489  	 * Device FDs hold a group file reference, therefore the group release
490  	 * is only called when there are no open devices.
491  	 */
492  	WARN_ON(group->notifier.head);
493  	if (group->container)
494  		vfio_group_detach_container(group);
495  	if (group->iommufd) {
496  		iommufd_ctx_put(group->iommufd);
497  		group->iommufd = NULL;
498  	}
499  	group->opened_file = NULL;
500  	mutex_unlock(&group->group_lock);
501  	return 0;
502  }
503  
504  static const struct file_operations vfio_group_fops = {
505  	.owner		= THIS_MODULE,
506  	.unlocked_ioctl	= vfio_group_fops_unl_ioctl,
507  	.compat_ioctl	= compat_ptr_ioctl,
508  	.open		= vfio_group_fops_open,
509  	.release	= vfio_group_fops_release,
510  };
511  
512  /*
513   * Group objects - create, release, get, put, search
514   */
515  static struct vfio_group *
vfio_group_find_from_iommu(struct iommu_group * iommu_group)516  vfio_group_find_from_iommu(struct iommu_group *iommu_group)
517  {
518  	struct vfio_group *group;
519  
520  	lockdep_assert_held(&vfio.group_lock);
521  
522  	/*
523  	 * group->iommu_group from the vfio.group_list cannot be NULL
524  	 * under the vfio.group_lock.
525  	 */
526  	list_for_each_entry(group, &vfio.group_list, vfio_next) {
527  		if (group->iommu_group == iommu_group)
528  			return group;
529  	}
530  	return NULL;
531  }
532  
vfio_group_release(struct device * dev)533  static void vfio_group_release(struct device *dev)
534  {
535  	struct vfio_group *group = container_of(dev, struct vfio_group, dev);
536  
537  	mutex_destroy(&group->device_lock);
538  	mutex_destroy(&group->group_lock);
539  	WARN_ON(group->iommu_group);
540  	WARN_ON(group->cdev_device_open_cnt);
541  	ida_free(&vfio.group_ida, MINOR(group->dev.devt));
542  	kfree(group);
543  }
544  
vfio_group_alloc(struct iommu_group * iommu_group,enum vfio_group_type type)545  static struct vfio_group *vfio_group_alloc(struct iommu_group *iommu_group,
546  					   enum vfio_group_type type)
547  {
548  	struct vfio_group *group;
549  	int minor;
550  
551  	group = kzalloc(sizeof(*group), GFP_KERNEL);
552  	if (!group)
553  		return ERR_PTR(-ENOMEM);
554  
555  	minor = ida_alloc_max(&vfio.group_ida, MINORMASK, GFP_KERNEL);
556  	if (minor < 0) {
557  		kfree(group);
558  		return ERR_PTR(minor);
559  	}
560  
561  	device_initialize(&group->dev);
562  	group->dev.devt = MKDEV(MAJOR(vfio.group_devt), minor);
563  	group->dev.class = vfio.class;
564  	group->dev.release = vfio_group_release;
565  	cdev_init(&group->cdev, &vfio_group_fops);
566  	group->cdev.owner = THIS_MODULE;
567  
568  	refcount_set(&group->drivers, 1);
569  	mutex_init(&group->group_lock);
570  	spin_lock_init(&group->kvm_ref_lock);
571  	INIT_LIST_HEAD(&group->device_list);
572  	mutex_init(&group->device_lock);
573  	group->iommu_group = iommu_group;
574  	/* put in vfio_group_release() */
575  	iommu_group_ref_get(iommu_group);
576  	group->type = type;
577  	BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier);
578  
579  	return group;
580  }
581  
vfio_create_group(struct iommu_group * iommu_group,enum vfio_group_type type)582  static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
583  		enum vfio_group_type type)
584  {
585  	struct vfio_group *group;
586  	struct vfio_group *ret;
587  	int err;
588  
589  	lockdep_assert_held(&vfio.group_lock);
590  
591  	group = vfio_group_alloc(iommu_group, type);
592  	if (IS_ERR(group))
593  		return group;
594  
595  	err = dev_set_name(&group->dev, "%s%d",
596  			   group->type == VFIO_NO_IOMMU ? "noiommu-" : "",
597  			   iommu_group_id(iommu_group));
598  	if (err) {
599  		ret = ERR_PTR(err);
600  		goto err_put;
601  	}
602  
603  	err = cdev_device_add(&group->cdev, &group->dev);
604  	if (err) {
605  		ret = ERR_PTR(err);
606  		goto err_put;
607  	}
608  
609  	list_add(&group->vfio_next, &vfio.group_list);
610  
611  	return group;
612  
613  err_put:
614  	put_device(&group->dev);
615  	return ret;
616  }
617  
vfio_noiommu_group_alloc(struct device * dev,enum vfio_group_type type)618  static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev,
619  		enum vfio_group_type type)
620  {
621  	struct iommu_group *iommu_group;
622  	struct vfio_group *group;
623  	int ret;
624  
625  	iommu_group = iommu_group_alloc();
626  	if (IS_ERR(iommu_group))
627  		return ERR_CAST(iommu_group);
628  
629  	ret = iommu_group_set_name(iommu_group, "vfio-noiommu");
630  	if (ret)
631  		goto out_put_group;
632  	ret = iommu_group_add_device(iommu_group, dev);
633  	if (ret)
634  		goto out_put_group;
635  
636  	mutex_lock(&vfio.group_lock);
637  	group = vfio_create_group(iommu_group, type);
638  	mutex_unlock(&vfio.group_lock);
639  	if (IS_ERR(group)) {
640  		ret = PTR_ERR(group);
641  		goto out_remove_device;
642  	}
643  	iommu_group_put(iommu_group);
644  	return group;
645  
646  out_remove_device:
647  	iommu_group_remove_device(dev);
648  out_put_group:
649  	iommu_group_put(iommu_group);
650  	return ERR_PTR(ret);
651  }
652  
vfio_group_has_device(struct vfio_group * group,struct device * dev)653  static bool vfio_group_has_device(struct vfio_group *group, struct device *dev)
654  {
655  	struct vfio_device *device;
656  
657  	mutex_lock(&group->device_lock);
658  	list_for_each_entry(device, &group->device_list, group_next) {
659  		if (device->dev == dev) {
660  			mutex_unlock(&group->device_lock);
661  			return true;
662  		}
663  	}
664  	mutex_unlock(&group->device_lock);
665  	return false;
666  }
667  
vfio_group_find_or_alloc(struct device * dev)668  static struct vfio_group *vfio_group_find_or_alloc(struct device *dev)
669  {
670  	struct iommu_group *iommu_group;
671  	struct vfio_group *group;
672  
673  	iommu_group = iommu_group_get(dev);
674  	if (!iommu_group && vfio_noiommu) {
675  		/*
676  		 * With noiommu enabled, create an IOMMU group for devices that
677  		 * don't already have one, implying no IOMMU hardware/driver
678  		 * exists.  Taint the kernel because we're about to give a DMA
679  		 * capable device to a user without IOMMU protection.
680  		 */
681  		group = vfio_noiommu_group_alloc(dev, VFIO_NO_IOMMU);
682  		if (!IS_ERR(group)) {
683  			add_taint(TAINT_USER, LOCKDEP_STILL_OK);
684  			dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n");
685  		}
686  		return group;
687  	}
688  
689  	if (!iommu_group)
690  		return ERR_PTR(-EINVAL);
691  
692  	mutex_lock(&vfio.group_lock);
693  	group = vfio_group_find_from_iommu(iommu_group);
694  	if (group) {
695  		if (WARN_ON(vfio_group_has_device(group, dev)))
696  			group = ERR_PTR(-EINVAL);
697  		else
698  			refcount_inc(&group->drivers);
699  	} else {
700  		group = vfio_create_group(iommu_group, VFIO_IOMMU);
701  	}
702  	mutex_unlock(&vfio.group_lock);
703  
704  	/* The vfio_group holds a reference to the iommu_group */
705  	iommu_group_put(iommu_group);
706  	return group;
707  }
708  
vfio_device_set_group(struct vfio_device * device,enum vfio_group_type type)709  int vfio_device_set_group(struct vfio_device *device,
710  			  enum vfio_group_type type)
711  {
712  	struct vfio_group *group;
713  
714  	if (type == VFIO_IOMMU)
715  		group = vfio_group_find_or_alloc(device->dev);
716  	else
717  		group = vfio_noiommu_group_alloc(device->dev, type);
718  
719  	if (IS_ERR(group))
720  		return PTR_ERR(group);
721  
722  	/* Our reference on group is moved to the device */
723  	device->group = group;
724  	return 0;
725  }
726  
vfio_device_remove_group(struct vfio_device * device)727  void vfio_device_remove_group(struct vfio_device *device)
728  {
729  	struct vfio_group *group = device->group;
730  	struct iommu_group *iommu_group;
731  
732  	if (group->type == VFIO_NO_IOMMU || group->type == VFIO_EMULATED_IOMMU)
733  		iommu_group_remove_device(device->dev);
734  
735  	/* Pairs with vfio_create_group() / vfio_group_get_from_iommu() */
736  	if (!refcount_dec_and_mutex_lock(&group->drivers, &vfio.group_lock))
737  		return;
738  	list_del(&group->vfio_next);
739  
740  	/*
741  	 * We could concurrently probe another driver in the group that might
742  	 * race vfio_device_remove_group() with vfio_get_group(), so we have to
743  	 * ensure that the sysfs is all cleaned up under lock otherwise the
744  	 * cdev_device_add() will fail due to the name aready existing.
745  	 */
746  	cdev_device_del(&group->cdev, &group->dev);
747  
748  	mutex_lock(&group->group_lock);
749  	/*
750  	 * These data structures all have paired operations that can only be
751  	 * undone when the caller holds a live reference on the device. Since
752  	 * all pairs must be undone these WARN_ON's indicate some caller did not
753  	 * properly hold the group reference.
754  	 */
755  	WARN_ON(!list_empty(&group->device_list));
756  	WARN_ON(group->notifier.head);
757  
758  	/*
759  	 * Revoke all users of group->iommu_group. At this point we know there
760  	 * are no devices active because we are unplugging the last one. Setting
761  	 * iommu_group to NULL blocks all new users.
762  	 */
763  	if (group->container)
764  		vfio_group_detach_container(group);
765  	iommu_group = group->iommu_group;
766  	group->iommu_group = NULL;
767  	mutex_unlock(&group->group_lock);
768  	mutex_unlock(&vfio.group_lock);
769  
770  	iommu_group_put(iommu_group);
771  	put_device(&group->dev);
772  }
773  
vfio_device_group_register(struct vfio_device * device)774  void vfio_device_group_register(struct vfio_device *device)
775  {
776  	mutex_lock(&device->group->device_lock);
777  	list_add(&device->group_next, &device->group->device_list);
778  	mutex_unlock(&device->group->device_lock);
779  }
780  
vfio_device_group_unregister(struct vfio_device * device)781  void vfio_device_group_unregister(struct vfio_device *device)
782  {
783  	mutex_lock(&device->group->device_lock);
784  	list_del(&device->group_next);
785  	mutex_unlock(&device->group->device_lock);
786  }
787  
vfio_device_group_use_iommu(struct vfio_device * device)788  int vfio_device_group_use_iommu(struct vfio_device *device)
789  {
790  	struct vfio_group *group = device->group;
791  	int ret = 0;
792  
793  	lockdep_assert_held(&group->group_lock);
794  
795  	if (WARN_ON(!group->container))
796  		return -EINVAL;
797  
798  	ret = vfio_group_use_container(group);
799  	if (ret)
800  		return ret;
801  	vfio_device_container_register(device);
802  	return 0;
803  }
804  
vfio_device_group_unuse_iommu(struct vfio_device * device)805  void vfio_device_group_unuse_iommu(struct vfio_device *device)
806  {
807  	struct vfio_group *group = device->group;
808  
809  	lockdep_assert_held(&group->group_lock);
810  
811  	if (WARN_ON(!group->container))
812  		return;
813  
814  	vfio_device_container_unregister(device);
815  	vfio_group_unuse_container(group);
816  }
817  
vfio_device_has_container(struct vfio_device * device)818  bool vfio_device_has_container(struct vfio_device *device)
819  {
820  	return device->group->container;
821  }
822  
vfio_group_from_file(struct file * file)823  struct vfio_group *vfio_group_from_file(struct file *file)
824  {
825  	struct vfio_group *group = file->private_data;
826  
827  	if (file->f_op != &vfio_group_fops)
828  		return NULL;
829  	return group;
830  }
831  
832  /**
833   * vfio_file_iommu_group - Return the struct iommu_group for the vfio group file
834   * @file: VFIO group file
835   *
836   * The returned iommu_group is valid as long as a ref is held on the file. This
837   * returns a reference on the group. This function is deprecated, only the SPAPR
838   * path in kvm should call it.
839   */
vfio_file_iommu_group(struct file * file)840  struct iommu_group *vfio_file_iommu_group(struct file *file)
841  {
842  	struct vfio_group *group = vfio_group_from_file(file);
843  	struct iommu_group *iommu_group = NULL;
844  
845  	if (!IS_ENABLED(CONFIG_SPAPR_TCE_IOMMU))
846  		return NULL;
847  
848  	if (!group)
849  		return NULL;
850  
851  	mutex_lock(&group->group_lock);
852  	if (group->iommu_group) {
853  		iommu_group = group->iommu_group;
854  		iommu_group_ref_get(iommu_group);
855  	}
856  	mutex_unlock(&group->group_lock);
857  	return iommu_group;
858  }
859  EXPORT_SYMBOL_GPL(vfio_file_iommu_group);
860  
861  /**
862   * vfio_file_is_group - True if the file is a vfio group file
863   * @file: VFIO group file
864   */
vfio_file_is_group(struct file * file)865  bool vfio_file_is_group(struct file *file)
866  {
867  	return vfio_group_from_file(file);
868  }
869  EXPORT_SYMBOL_GPL(vfio_file_is_group);
870  
vfio_group_enforced_coherent(struct vfio_group * group)871  bool vfio_group_enforced_coherent(struct vfio_group *group)
872  {
873  	struct vfio_device *device;
874  	bool ret = true;
875  
876  	/*
877  	 * If the device does not have IOMMU_CAP_ENFORCE_CACHE_COHERENCY then
878  	 * any domain later attached to it will also not support it. If the cap
879  	 * is set then the iommu_domain eventually attached to the device/group
880  	 * must use a domain with enforce_cache_coherency().
881  	 */
882  	mutex_lock(&group->device_lock);
883  	list_for_each_entry(device, &group->device_list, group_next) {
884  		if (!device_iommu_capable(device->dev,
885  					  IOMMU_CAP_ENFORCE_CACHE_COHERENCY)) {
886  			ret = false;
887  			break;
888  		}
889  	}
890  	mutex_unlock(&group->device_lock);
891  	return ret;
892  }
893  
vfio_group_set_kvm(struct vfio_group * group,struct kvm * kvm)894  void vfio_group_set_kvm(struct vfio_group *group, struct kvm *kvm)
895  {
896  	spin_lock(&group->kvm_ref_lock);
897  	group->kvm = kvm;
898  	spin_unlock(&group->kvm_ref_lock);
899  }
900  
901  /**
902   * vfio_file_has_dev - True if the VFIO file is a handle for device
903   * @file: VFIO file to check
904   * @device: Device that must be part of the file
905   *
906   * Returns true if given file has permission to manipulate the given device.
907   */
vfio_file_has_dev(struct file * file,struct vfio_device * device)908  bool vfio_file_has_dev(struct file *file, struct vfio_device *device)
909  {
910  	struct vfio_group *group = vfio_group_from_file(file);
911  
912  	if (!group)
913  		return false;
914  
915  	return group == device->group;
916  }
917  EXPORT_SYMBOL_GPL(vfio_file_has_dev);
918  
vfio_devnode(const struct device * dev,umode_t * mode)919  static char *vfio_devnode(const struct device *dev, umode_t *mode)
920  {
921  	return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev));
922  }
923  
vfio_group_init(void)924  int __init vfio_group_init(void)
925  {
926  	int ret;
927  
928  	ida_init(&vfio.group_ida);
929  	mutex_init(&vfio.group_lock);
930  	INIT_LIST_HEAD(&vfio.group_list);
931  
932  	ret = vfio_container_init();
933  	if (ret)
934  		return ret;
935  
936  	/* /dev/vfio/$GROUP */
937  	vfio.class = class_create("vfio");
938  	if (IS_ERR(vfio.class)) {
939  		ret = PTR_ERR(vfio.class);
940  		goto err_group_class;
941  	}
942  
943  	vfio.class->devnode = vfio_devnode;
944  
945  	ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK + 1, "vfio");
946  	if (ret)
947  		goto err_alloc_chrdev;
948  	return 0;
949  
950  err_alloc_chrdev:
951  	class_destroy(vfio.class);
952  	vfio.class = NULL;
953  err_group_class:
954  	vfio_container_cleanup();
955  	return ret;
956  }
957  
vfio_group_cleanup(void)958  void vfio_group_cleanup(void)
959  {
960  	WARN_ON(!list_empty(&vfio.group_list));
961  	ida_destroy(&vfio.group_ida);
962  	unregister_chrdev_region(vfio.group_devt, MINORMASK + 1);
963  	class_destroy(vfio.class);
964  	vfio.class = NULL;
965  	vfio_container_cleanup();
966  }
967