1  // SPDX-License-Identifier: GPL-2.0-only
2  
3  #include <linux/export.h>
4  #include <linux/nsproxy.h>
5  #include <linux/slab.h>
6  #include <linux/sched/signal.h>
7  #include <linux/user_namespace.h>
8  #include <linux/proc_ns.h>
9  #include <linux/highuid.h>
10  #include <linux/cred.h>
11  #include <linux/securebits.h>
12  #include <linux/security.h>
13  #include <linux/keyctl.h>
14  #include <linux/key-type.h>
15  #include <keys/user-type.h>
16  #include <linux/seq_file.h>
17  #include <linux/fs.h>
18  #include <linux/uaccess.h>
19  #include <linux/ctype.h>
20  #include <linux/projid.h>
21  #include <linux/fs_struct.h>
22  #include <linux/bsearch.h>
23  #include <linux/sort.h>
24  
25  static struct kmem_cache *user_ns_cachep __ro_after_init;
26  static DEFINE_MUTEX(userns_state_mutex);
27  
28  static bool new_idmap_permitted(const struct file *file,
29  				struct user_namespace *ns, int cap_setid,
30  				struct uid_gid_map *map);
31  static void free_user_ns(struct work_struct *work);
32  
inc_user_namespaces(struct user_namespace * ns,kuid_t uid)33  static struct ucounts *inc_user_namespaces(struct user_namespace *ns, kuid_t uid)
34  {
35  	return inc_ucount(ns, uid, UCOUNT_USER_NAMESPACES);
36  }
37  
dec_user_namespaces(struct ucounts * ucounts)38  static void dec_user_namespaces(struct ucounts *ucounts)
39  {
40  	return dec_ucount(ucounts, UCOUNT_USER_NAMESPACES);
41  }
42  
set_cred_user_ns(struct cred * cred,struct user_namespace * user_ns)43  static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns)
44  {
45  	/* Start with the same capabilities as init but useless for doing
46  	 * anything as the capabilities are bound to the new user namespace.
47  	 */
48  	cred->securebits = SECUREBITS_DEFAULT;
49  	cred->cap_inheritable = CAP_EMPTY_SET;
50  	cred->cap_permitted = CAP_FULL_SET;
51  	cred->cap_effective = CAP_FULL_SET;
52  	cred->cap_ambient = CAP_EMPTY_SET;
53  	cred->cap_bset = CAP_FULL_SET;
54  #ifdef CONFIG_KEYS
55  	key_put(cred->request_key_auth);
56  	cred->request_key_auth = NULL;
57  #endif
58  	/* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */
59  	cred->user_ns = user_ns;
60  }
61  
enforced_nproc_rlimit(void)62  static unsigned long enforced_nproc_rlimit(void)
63  {
64  	unsigned long limit = RLIM_INFINITY;
65  
66  	/* Is RLIMIT_NPROC currently enforced? */
67  	if (!uid_eq(current_uid(), GLOBAL_ROOT_UID) ||
68  	    (current_user_ns() != &init_user_ns))
69  		limit = rlimit(RLIMIT_NPROC);
70  
71  	return limit;
72  }
73  
74  /*
75   * Create a new user namespace, deriving the creator from the user in the
76   * passed credentials, and replacing that user with the new root user for the
77   * new namespace.
78   *
79   * This is called by copy_creds(), which will finish setting the target task's
80   * credentials.
81   */
create_user_ns(struct cred * new)82  int create_user_ns(struct cred *new)
83  {
84  	struct user_namespace *ns, *parent_ns = new->user_ns;
85  	kuid_t owner = new->euid;
86  	kgid_t group = new->egid;
87  	struct ucounts *ucounts;
88  	int ret, i;
89  
90  	ret = -ENOSPC;
91  	if (parent_ns->level > 32)
92  		goto fail;
93  
94  	ucounts = inc_user_namespaces(parent_ns, owner);
95  	if (!ucounts)
96  		goto fail;
97  
98  	/*
99  	 * Verify that we can not violate the policy of which files
100  	 * may be accessed that is specified by the root directory,
101  	 * by verifying that the root directory is at the root of the
102  	 * mount namespace which allows all files to be accessed.
103  	 */
104  	ret = -EPERM;
105  	if (current_chrooted())
106  		goto fail_dec;
107  
108  	/* The creator needs a mapping in the parent user namespace
109  	 * or else we won't be able to reasonably tell userspace who
110  	 * created a user_namespace.
111  	 */
112  	ret = -EPERM;
113  	if (!kuid_has_mapping(parent_ns, owner) ||
114  	    !kgid_has_mapping(parent_ns, group))
115  		goto fail_dec;
116  
117  	ret = security_create_user_ns(new);
118  	if (ret < 0)
119  		goto fail_dec;
120  
121  	ret = -ENOMEM;
122  	ns = kmem_cache_zalloc(user_ns_cachep, GFP_KERNEL);
123  	if (!ns)
124  		goto fail_dec;
125  
126  	ns->parent_could_setfcap = cap_raised(new->cap_effective, CAP_SETFCAP);
127  	ret = ns_alloc_inum(&ns->ns);
128  	if (ret)
129  		goto fail_free;
130  	ns->ns.ops = &userns_operations;
131  
132  	refcount_set(&ns->ns.count, 1);
133  	/* Leave the new->user_ns reference with the new user namespace. */
134  	ns->parent = parent_ns;
135  	ns->level = parent_ns->level + 1;
136  	ns->owner = owner;
137  	ns->group = group;
138  	INIT_WORK(&ns->work, free_user_ns);
139  	for (i = 0; i < UCOUNT_COUNTS; i++) {
140  		ns->ucount_max[i] = INT_MAX;
141  	}
142  	set_userns_rlimit_max(ns, UCOUNT_RLIMIT_NPROC, enforced_nproc_rlimit());
143  	set_userns_rlimit_max(ns, UCOUNT_RLIMIT_MSGQUEUE, rlimit(RLIMIT_MSGQUEUE));
144  	set_userns_rlimit_max(ns, UCOUNT_RLIMIT_SIGPENDING, rlimit(RLIMIT_SIGPENDING));
145  	set_userns_rlimit_max(ns, UCOUNT_RLIMIT_MEMLOCK, rlimit(RLIMIT_MEMLOCK));
146  	ns->ucounts = ucounts;
147  
148  	/* Inherit USERNS_SETGROUPS_ALLOWED from our parent */
149  	mutex_lock(&userns_state_mutex);
150  	ns->flags = parent_ns->flags;
151  	mutex_unlock(&userns_state_mutex);
152  
153  #ifdef CONFIG_KEYS
154  	INIT_LIST_HEAD(&ns->keyring_name_list);
155  	init_rwsem(&ns->keyring_sem);
156  #endif
157  	ret = -ENOMEM;
158  	if (!setup_userns_sysctls(ns))
159  		goto fail_keyring;
160  
161  	set_cred_user_ns(new, ns);
162  	return 0;
163  fail_keyring:
164  #ifdef CONFIG_PERSISTENT_KEYRINGS
165  	key_put(ns->persistent_keyring_register);
166  #endif
167  	ns_free_inum(&ns->ns);
168  fail_free:
169  	kmem_cache_free(user_ns_cachep, ns);
170  fail_dec:
171  	dec_user_namespaces(ucounts);
172  fail:
173  	return ret;
174  }
175  
unshare_userns(unsigned long unshare_flags,struct cred ** new_cred)176  int unshare_userns(unsigned long unshare_flags, struct cred **new_cred)
177  {
178  	struct cred *cred;
179  	int err = -ENOMEM;
180  
181  	if (!(unshare_flags & CLONE_NEWUSER))
182  		return 0;
183  
184  	cred = prepare_creds();
185  	if (cred) {
186  		err = create_user_ns(cred);
187  		if (err)
188  			put_cred(cred);
189  		else
190  			*new_cred = cred;
191  	}
192  
193  	return err;
194  }
195  
free_user_ns(struct work_struct * work)196  static void free_user_ns(struct work_struct *work)
197  {
198  	struct user_namespace *parent, *ns =
199  		container_of(work, struct user_namespace, work);
200  
201  	do {
202  		struct ucounts *ucounts = ns->ucounts;
203  		parent = ns->parent;
204  		if (ns->gid_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) {
205  			kfree(ns->gid_map.forward);
206  			kfree(ns->gid_map.reverse);
207  		}
208  		if (ns->uid_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) {
209  			kfree(ns->uid_map.forward);
210  			kfree(ns->uid_map.reverse);
211  		}
212  		if (ns->projid_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) {
213  			kfree(ns->projid_map.forward);
214  			kfree(ns->projid_map.reverse);
215  		}
216  #if IS_ENABLED(CONFIG_BINFMT_MISC)
217  		kfree(ns->binfmt_misc);
218  #endif
219  		retire_userns_sysctls(ns);
220  		key_free_user_ns(ns);
221  		ns_free_inum(&ns->ns);
222  		kmem_cache_free(user_ns_cachep, ns);
223  		dec_user_namespaces(ucounts);
224  		ns = parent;
225  	} while (refcount_dec_and_test(&parent->ns.count));
226  }
227  
__put_user_ns(struct user_namespace * ns)228  void __put_user_ns(struct user_namespace *ns)
229  {
230  	schedule_work(&ns->work);
231  }
232  EXPORT_SYMBOL(__put_user_ns);
233  
234  /*
235   * struct idmap_key - holds the information necessary to find an idmapping in a
236   * sorted idmap array. It is passed to cmp_map_id() as first argument.
237   */
238  struct idmap_key {
239  	bool map_up; /* true  -> id from kid; false -> kid from id */
240  	u32 id; /* id to find */
241  	u32 count; /* == 0 unless used with map_id_range_down() */
242  };
243  
244  /*
245   * cmp_map_id - Function to be passed to bsearch() to find the requested
246   * idmapping. Expects struct idmap_key to be passed via @k.
247   */
cmp_map_id(const void * k,const void * e)248  static int cmp_map_id(const void *k, const void *e)
249  {
250  	u32 first, last, id2;
251  	const struct idmap_key *key = k;
252  	const struct uid_gid_extent *el = e;
253  
254  	id2 = key->id + key->count - 1;
255  
256  	/* handle map_id_{down,up}() */
257  	if (key->map_up)
258  		first = el->lower_first;
259  	else
260  		first = el->first;
261  
262  	last = first + el->count - 1;
263  
264  	if (key->id >= first && key->id <= last &&
265  	    (id2 >= first && id2 <= last))
266  		return 0;
267  
268  	if (key->id < first || id2 < first)
269  		return -1;
270  
271  	return 1;
272  }
273  
274  /*
275   * map_id_range_down_max - Find idmap via binary search in ordered idmap array.
276   * Can only be called if number of mappings exceeds UID_GID_MAP_MAX_BASE_EXTENTS.
277   */
278  static struct uid_gid_extent *
map_id_range_down_max(unsigned extents,struct uid_gid_map * map,u32 id,u32 count)279  map_id_range_down_max(unsigned extents, struct uid_gid_map *map, u32 id, u32 count)
280  {
281  	struct idmap_key key;
282  
283  	key.map_up = false;
284  	key.count = count;
285  	key.id = id;
286  
287  	return bsearch(&key, map->forward, extents,
288  		       sizeof(struct uid_gid_extent), cmp_map_id);
289  }
290  
291  /*
292   * map_id_range_down_base - Find idmap via binary search in static extent array.
293   * Can only be called if number of mappings is equal or less than
294   * UID_GID_MAP_MAX_BASE_EXTENTS.
295   */
296  static struct uid_gid_extent *
map_id_range_down_base(unsigned extents,struct uid_gid_map * map,u32 id,u32 count)297  map_id_range_down_base(unsigned extents, struct uid_gid_map *map, u32 id, u32 count)
298  {
299  	unsigned idx;
300  	u32 first, last, id2;
301  
302  	id2 = id + count - 1;
303  
304  	/* Find the matching extent */
305  	for (idx = 0; idx < extents; idx++) {
306  		first = map->extent[idx].first;
307  		last = first + map->extent[idx].count - 1;
308  		if (id >= first && id <= last &&
309  		    (id2 >= first && id2 <= last))
310  			return &map->extent[idx];
311  	}
312  	return NULL;
313  }
314  
map_id_range_down(struct uid_gid_map * map,u32 id,u32 count)315  static u32 map_id_range_down(struct uid_gid_map *map, u32 id, u32 count)
316  {
317  	struct uid_gid_extent *extent;
318  	unsigned extents = map->nr_extents;
319  	smp_rmb();
320  
321  	if (extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
322  		extent = map_id_range_down_base(extents, map, id, count);
323  	else
324  		extent = map_id_range_down_max(extents, map, id, count);
325  
326  	/* Map the id or note failure */
327  	if (extent)
328  		id = (id - extent->first) + extent->lower_first;
329  	else
330  		id = (u32) -1;
331  
332  	return id;
333  }
334  
map_id_down(struct uid_gid_map * map,u32 id)335  u32 map_id_down(struct uid_gid_map *map, u32 id)
336  {
337  	return map_id_range_down(map, id, 1);
338  }
339  
340  /*
341   * map_id_up_base - Find idmap via binary search in static extent array.
342   * Can only be called if number of mappings is equal or less than
343   * UID_GID_MAP_MAX_BASE_EXTENTS.
344   */
345  static struct uid_gid_extent *
map_id_up_base(unsigned extents,struct uid_gid_map * map,u32 id)346  map_id_up_base(unsigned extents, struct uid_gid_map *map, u32 id)
347  {
348  	unsigned idx;
349  	u32 first, last;
350  
351  	/* Find the matching extent */
352  	for (idx = 0; idx < extents; idx++) {
353  		first = map->extent[idx].lower_first;
354  		last = first + map->extent[idx].count - 1;
355  		if (id >= first && id <= last)
356  			return &map->extent[idx];
357  	}
358  	return NULL;
359  }
360  
361  /*
362   * map_id_up_max - Find idmap via binary search in ordered idmap array.
363   * Can only be called if number of mappings exceeds UID_GID_MAP_MAX_BASE_EXTENTS.
364   */
365  static struct uid_gid_extent *
map_id_up_max(unsigned extents,struct uid_gid_map * map,u32 id)366  map_id_up_max(unsigned extents, struct uid_gid_map *map, u32 id)
367  {
368  	struct idmap_key key;
369  
370  	key.map_up = true;
371  	key.count = 1;
372  	key.id = id;
373  
374  	return bsearch(&key, map->reverse, extents,
375  		       sizeof(struct uid_gid_extent), cmp_map_id);
376  }
377  
map_id_up(struct uid_gid_map * map,u32 id)378  u32 map_id_up(struct uid_gid_map *map, u32 id)
379  {
380  	struct uid_gid_extent *extent;
381  	unsigned extents = map->nr_extents;
382  	smp_rmb();
383  
384  	if (extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
385  		extent = map_id_up_base(extents, map, id);
386  	else
387  		extent = map_id_up_max(extents, map, id);
388  
389  	/* Map the id or note failure */
390  	if (extent)
391  		id = (id - extent->lower_first) + extent->first;
392  	else
393  		id = (u32) -1;
394  
395  	return id;
396  }
397  
398  /**
399   *	make_kuid - Map a user-namespace uid pair into a kuid.
400   *	@ns:  User namespace that the uid is in
401   *	@uid: User identifier
402   *
403   *	Maps a user-namespace uid pair into a kernel internal kuid,
404   *	and returns that kuid.
405   *
406   *	When there is no mapping defined for the user-namespace uid
407   *	pair INVALID_UID is returned.  Callers are expected to test
408   *	for and handle INVALID_UID being returned.  INVALID_UID
409   *	may be tested for using uid_valid().
410   */
make_kuid(struct user_namespace * ns,uid_t uid)411  kuid_t make_kuid(struct user_namespace *ns, uid_t uid)
412  {
413  	/* Map the uid to a global kernel uid */
414  	return KUIDT_INIT(map_id_down(&ns->uid_map, uid));
415  }
416  EXPORT_SYMBOL(make_kuid);
417  
418  /**
419   *	from_kuid - Create a uid from a kuid user-namespace pair.
420   *	@targ: The user namespace we want a uid in.
421   *	@kuid: The kernel internal uid to start with.
422   *
423   *	Map @kuid into the user-namespace specified by @targ and
424   *	return the resulting uid.
425   *
426   *	There is always a mapping into the initial user_namespace.
427   *
428   *	If @kuid has no mapping in @targ (uid_t)-1 is returned.
429   */
from_kuid(struct user_namespace * targ,kuid_t kuid)430  uid_t from_kuid(struct user_namespace *targ, kuid_t kuid)
431  {
432  	/* Map the uid from a global kernel uid */
433  	return map_id_up(&targ->uid_map, __kuid_val(kuid));
434  }
435  EXPORT_SYMBOL(from_kuid);
436  
437  /**
438   *	from_kuid_munged - Create a uid from a kuid user-namespace pair.
439   *	@targ: The user namespace we want a uid in.
440   *	@kuid: The kernel internal uid to start with.
441   *
442   *	Map @kuid into the user-namespace specified by @targ and
443   *	return the resulting uid.
444   *
445   *	There is always a mapping into the initial user_namespace.
446   *
447   *	Unlike from_kuid from_kuid_munged never fails and always
448   *	returns a valid uid.  This makes from_kuid_munged appropriate
449   *	for use in syscalls like stat and getuid where failing the
450   *	system call and failing to provide a valid uid are not an
451   *	options.
452   *
453   *	If @kuid has no mapping in @targ overflowuid is returned.
454   */
from_kuid_munged(struct user_namespace * targ,kuid_t kuid)455  uid_t from_kuid_munged(struct user_namespace *targ, kuid_t kuid)
456  {
457  	uid_t uid;
458  	uid = from_kuid(targ, kuid);
459  
460  	if (uid == (uid_t) -1)
461  		uid = overflowuid;
462  	return uid;
463  }
464  EXPORT_SYMBOL(from_kuid_munged);
465  
466  /**
467   *	make_kgid - Map a user-namespace gid pair into a kgid.
468   *	@ns:  User namespace that the gid is in
469   *	@gid: group identifier
470   *
471   *	Maps a user-namespace gid pair into a kernel internal kgid,
472   *	and returns that kgid.
473   *
474   *	When there is no mapping defined for the user-namespace gid
475   *	pair INVALID_GID is returned.  Callers are expected to test
476   *	for and handle INVALID_GID being returned.  INVALID_GID may be
477   *	tested for using gid_valid().
478   */
make_kgid(struct user_namespace * ns,gid_t gid)479  kgid_t make_kgid(struct user_namespace *ns, gid_t gid)
480  {
481  	/* Map the gid to a global kernel gid */
482  	return KGIDT_INIT(map_id_down(&ns->gid_map, gid));
483  }
484  EXPORT_SYMBOL(make_kgid);
485  
486  /**
487   *	from_kgid - Create a gid from a kgid user-namespace pair.
488   *	@targ: The user namespace we want a gid in.
489   *	@kgid: The kernel internal gid to start with.
490   *
491   *	Map @kgid into the user-namespace specified by @targ and
492   *	return the resulting gid.
493   *
494   *	There is always a mapping into the initial user_namespace.
495   *
496   *	If @kgid has no mapping in @targ (gid_t)-1 is returned.
497   */
from_kgid(struct user_namespace * targ,kgid_t kgid)498  gid_t from_kgid(struct user_namespace *targ, kgid_t kgid)
499  {
500  	/* Map the gid from a global kernel gid */
501  	return map_id_up(&targ->gid_map, __kgid_val(kgid));
502  }
503  EXPORT_SYMBOL(from_kgid);
504  
505  /**
506   *	from_kgid_munged - Create a gid from a kgid user-namespace pair.
507   *	@targ: The user namespace we want a gid in.
508   *	@kgid: The kernel internal gid to start with.
509   *
510   *	Map @kgid into the user-namespace specified by @targ and
511   *	return the resulting gid.
512   *
513   *	There is always a mapping into the initial user_namespace.
514   *
515   *	Unlike from_kgid from_kgid_munged never fails and always
516   *	returns a valid gid.  This makes from_kgid_munged appropriate
517   *	for use in syscalls like stat and getgid where failing the
518   *	system call and failing to provide a valid gid are not options.
519   *
520   *	If @kgid has no mapping in @targ overflowgid is returned.
521   */
from_kgid_munged(struct user_namespace * targ,kgid_t kgid)522  gid_t from_kgid_munged(struct user_namespace *targ, kgid_t kgid)
523  {
524  	gid_t gid;
525  	gid = from_kgid(targ, kgid);
526  
527  	if (gid == (gid_t) -1)
528  		gid = overflowgid;
529  	return gid;
530  }
531  EXPORT_SYMBOL(from_kgid_munged);
532  
533  /**
534   *	make_kprojid - Map a user-namespace projid pair into a kprojid.
535   *	@ns:  User namespace that the projid is in
536   *	@projid: Project identifier
537   *
538   *	Maps a user-namespace uid pair into a kernel internal kuid,
539   *	and returns that kuid.
540   *
541   *	When there is no mapping defined for the user-namespace projid
542   *	pair INVALID_PROJID is returned.  Callers are expected to test
543   *	for and handle INVALID_PROJID being returned.  INVALID_PROJID
544   *	may be tested for using projid_valid().
545   */
make_kprojid(struct user_namespace * ns,projid_t projid)546  kprojid_t make_kprojid(struct user_namespace *ns, projid_t projid)
547  {
548  	/* Map the uid to a global kernel uid */
549  	return KPROJIDT_INIT(map_id_down(&ns->projid_map, projid));
550  }
551  EXPORT_SYMBOL(make_kprojid);
552  
553  /**
554   *	from_kprojid - Create a projid from a kprojid user-namespace pair.
555   *	@targ: The user namespace we want a projid in.
556   *	@kprojid: The kernel internal project identifier to start with.
557   *
558   *	Map @kprojid into the user-namespace specified by @targ and
559   *	return the resulting projid.
560   *
561   *	There is always a mapping into the initial user_namespace.
562   *
563   *	If @kprojid has no mapping in @targ (projid_t)-1 is returned.
564   */
from_kprojid(struct user_namespace * targ,kprojid_t kprojid)565  projid_t from_kprojid(struct user_namespace *targ, kprojid_t kprojid)
566  {
567  	/* Map the uid from a global kernel uid */
568  	return map_id_up(&targ->projid_map, __kprojid_val(kprojid));
569  }
570  EXPORT_SYMBOL(from_kprojid);
571  
572  /**
573   *	from_kprojid_munged - Create a projiid from a kprojid user-namespace pair.
574   *	@targ: The user namespace we want a projid in.
575   *	@kprojid: The kernel internal projid to start with.
576   *
577   *	Map @kprojid into the user-namespace specified by @targ and
578   *	return the resulting projid.
579   *
580   *	There is always a mapping into the initial user_namespace.
581   *
582   *	Unlike from_kprojid from_kprojid_munged never fails and always
583   *	returns a valid projid.  This makes from_kprojid_munged
584   *	appropriate for use in syscalls like stat and where
585   *	failing the system call and failing to provide a valid projid are
586   *	not an options.
587   *
588   *	If @kprojid has no mapping in @targ OVERFLOW_PROJID is returned.
589   */
from_kprojid_munged(struct user_namespace * targ,kprojid_t kprojid)590  projid_t from_kprojid_munged(struct user_namespace *targ, kprojid_t kprojid)
591  {
592  	projid_t projid;
593  	projid = from_kprojid(targ, kprojid);
594  
595  	if (projid == (projid_t) -1)
596  		projid = OVERFLOW_PROJID;
597  	return projid;
598  }
599  EXPORT_SYMBOL(from_kprojid_munged);
600  
601  
uid_m_show(struct seq_file * seq,void * v)602  static int uid_m_show(struct seq_file *seq, void *v)
603  {
604  	struct user_namespace *ns = seq->private;
605  	struct uid_gid_extent *extent = v;
606  	struct user_namespace *lower_ns;
607  	uid_t lower;
608  
609  	lower_ns = seq_user_ns(seq);
610  	if ((lower_ns == ns) && lower_ns->parent)
611  		lower_ns = lower_ns->parent;
612  
613  	lower = from_kuid(lower_ns, KUIDT_INIT(extent->lower_first));
614  
615  	seq_printf(seq, "%10u %10u %10u\n",
616  		extent->first,
617  		lower,
618  		extent->count);
619  
620  	return 0;
621  }
622  
gid_m_show(struct seq_file * seq,void * v)623  static int gid_m_show(struct seq_file *seq, void *v)
624  {
625  	struct user_namespace *ns = seq->private;
626  	struct uid_gid_extent *extent = v;
627  	struct user_namespace *lower_ns;
628  	gid_t lower;
629  
630  	lower_ns = seq_user_ns(seq);
631  	if ((lower_ns == ns) && lower_ns->parent)
632  		lower_ns = lower_ns->parent;
633  
634  	lower = from_kgid(lower_ns, KGIDT_INIT(extent->lower_first));
635  
636  	seq_printf(seq, "%10u %10u %10u\n",
637  		extent->first,
638  		lower,
639  		extent->count);
640  
641  	return 0;
642  }
643  
projid_m_show(struct seq_file * seq,void * v)644  static int projid_m_show(struct seq_file *seq, void *v)
645  {
646  	struct user_namespace *ns = seq->private;
647  	struct uid_gid_extent *extent = v;
648  	struct user_namespace *lower_ns;
649  	projid_t lower;
650  
651  	lower_ns = seq_user_ns(seq);
652  	if ((lower_ns == ns) && lower_ns->parent)
653  		lower_ns = lower_ns->parent;
654  
655  	lower = from_kprojid(lower_ns, KPROJIDT_INIT(extent->lower_first));
656  
657  	seq_printf(seq, "%10u %10u %10u\n",
658  		extent->first,
659  		lower,
660  		extent->count);
661  
662  	return 0;
663  }
664  
m_start(struct seq_file * seq,loff_t * ppos,struct uid_gid_map * map)665  static void *m_start(struct seq_file *seq, loff_t *ppos,
666  		     struct uid_gid_map *map)
667  {
668  	loff_t pos = *ppos;
669  	unsigned extents = map->nr_extents;
670  	smp_rmb();
671  
672  	if (pos >= extents)
673  		return NULL;
674  
675  	if (extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
676  		return &map->extent[pos];
677  
678  	return &map->forward[pos];
679  }
680  
uid_m_start(struct seq_file * seq,loff_t * ppos)681  static void *uid_m_start(struct seq_file *seq, loff_t *ppos)
682  {
683  	struct user_namespace *ns = seq->private;
684  
685  	return m_start(seq, ppos, &ns->uid_map);
686  }
687  
gid_m_start(struct seq_file * seq,loff_t * ppos)688  static void *gid_m_start(struct seq_file *seq, loff_t *ppos)
689  {
690  	struct user_namespace *ns = seq->private;
691  
692  	return m_start(seq, ppos, &ns->gid_map);
693  }
694  
projid_m_start(struct seq_file * seq,loff_t * ppos)695  static void *projid_m_start(struct seq_file *seq, loff_t *ppos)
696  {
697  	struct user_namespace *ns = seq->private;
698  
699  	return m_start(seq, ppos, &ns->projid_map);
700  }
701  
m_next(struct seq_file * seq,void * v,loff_t * pos)702  static void *m_next(struct seq_file *seq, void *v, loff_t *pos)
703  {
704  	(*pos)++;
705  	return seq->op->start(seq, pos);
706  }
707  
m_stop(struct seq_file * seq,void * v)708  static void m_stop(struct seq_file *seq, void *v)
709  {
710  	return;
711  }
712  
713  const struct seq_operations proc_uid_seq_operations = {
714  	.start = uid_m_start,
715  	.stop = m_stop,
716  	.next = m_next,
717  	.show = uid_m_show,
718  };
719  
720  const struct seq_operations proc_gid_seq_operations = {
721  	.start = gid_m_start,
722  	.stop = m_stop,
723  	.next = m_next,
724  	.show = gid_m_show,
725  };
726  
727  const struct seq_operations proc_projid_seq_operations = {
728  	.start = projid_m_start,
729  	.stop = m_stop,
730  	.next = m_next,
731  	.show = projid_m_show,
732  };
733  
mappings_overlap(struct uid_gid_map * new_map,struct uid_gid_extent * extent)734  static bool mappings_overlap(struct uid_gid_map *new_map,
735  			     struct uid_gid_extent *extent)
736  {
737  	u32 upper_first, lower_first, upper_last, lower_last;
738  	unsigned idx;
739  
740  	upper_first = extent->first;
741  	lower_first = extent->lower_first;
742  	upper_last = upper_first + extent->count - 1;
743  	lower_last = lower_first + extent->count - 1;
744  
745  	for (idx = 0; idx < new_map->nr_extents; idx++) {
746  		u32 prev_upper_first, prev_lower_first;
747  		u32 prev_upper_last, prev_lower_last;
748  		struct uid_gid_extent *prev;
749  
750  		if (new_map->nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
751  			prev = &new_map->extent[idx];
752  		else
753  			prev = &new_map->forward[idx];
754  
755  		prev_upper_first = prev->first;
756  		prev_lower_first = prev->lower_first;
757  		prev_upper_last = prev_upper_first + prev->count - 1;
758  		prev_lower_last = prev_lower_first + prev->count - 1;
759  
760  		/* Does the upper range intersect a previous extent? */
761  		if ((prev_upper_first <= upper_last) &&
762  		    (prev_upper_last >= upper_first))
763  			return true;
764  
765  		/* Does the lower range intersect a previous extent? */
766  		if ((prev_lower_first <= lower_last) &&
767  		    (prev_lower_last >= lower_first))
768  			return true;
769  	}
770  	return false;
771  }
772  
773  /*
774   * insert_extent - Safely insert a new idmap extent into struct uid_gid_map.
775   * Takes care to allocate a 4K block of memory if the number of mappings exceeds
776   * UID_GID_MAP_MAX_BASE_EXTENTS.
777   */
insert_extent(struct uid_gid_map * map,struct uid_gid_extent * extent)778  static int insert_extent(struct uid_gid_map *map, struct uid_gid_extent *extent)
779  {
780  	struct uid_gid_extent *dest;
781  
782  	if (map->nr_extents == UID_GID_MAP_MAX_BASE_EXTENTS) {
783  		struct uid_gid_extent *forward;
784  
785  		/* Allocate memory for 340 mappings. */
786  		forward = kmalloc_array(UID_GID_MAP_MAX_EXTENTS,
787  					sizeof(struct uid_gid_extent),
788  					GFP_KERNEL);
789  		if (!forward)
790  			return -ENOMEM;
791  
792  		/* Copy over memory. Only set up memory for the forward pointer.
793  		 * Defer the memory setup for the reverse pointer.
794  		 */
795  		memcpy(forward, map->extent,
796  		       map->nr_extents * sizeof(map->extent[0]));
797  
798  		map->forward = forward;
799  		map->reverse = NULL;
800  	}
801  
802  	if (map->nr_extents < UID_GID_MAP_MAX_BASE_EXTENTS)
803  		dest = &map->extent[map->nr_extents];
804  	else
805  		dest = &map->forward[map->nr_extents];
806  
807  	*dest = *extent;
808  	map->nr_extents++;
809  	return 0;
810  }
811  
812  /* cmp function to sort() forward mappings */
cmp_extents_forward(const void * a,const void * b)813  static int cmp_extents_forward(const void *a, const void *b)
814  {
815  	const struct uid_gid_extent *e1 = a;
816  	const struct uid_gid_extent *e2 = b;
817  
818  	if (e1->first < e2->first)
819  		return -1;
820  
821  	if (e1->first > e2->first)
822  		return 1;
823  
824  	return 0;
825  }
826  
827  /* cmp function to sort() reverse mappings */
cmp_extents_reverse(const void * a,const void * b)828  static int cmp_extents_reverse(const void *a, const void *b)
829  {
830  	const struct uid_gid_extent *e1 = a;
831  	const struct uid_gid_extent *e2 = b;
832  
833  	if (e1->lower_first < e2->lower_first)
834  		return -1;
835  
836  	if (e1->lower_first > e2->lower_first)
837  		return 1;
838  
839  	return 0;
840  }
841  
842  /*
843   * sort_idmaps - Sorts an array of idmap entries.
844   * Can only be called if number of mappings exceeds UID_GID_MAP_MAX_BASE_EXTENTS.
845   */
sort_idmaps(struct uid_gid_map * map)846  static int sort_idmaps(struct uid_gid_map *map)
847  {
848  	if (map->nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
849  		return 0;
850  
851  	/* Sort forward array. */
852  	sort(map->forward, map->nr_extents, sizeof(struct uid_gid_extent),
853  	     cmp_extents_forward, NULL);
854  
855  	/* Only copy the memory from forward we actually need. */
856  	map->reverse = kmemdup_array(map->forward, map->nr_extents,
857  				     sizeof(struct uid_gid_extent), GFP_KERNEL);
858  	if (!map->reverse)
859  		return -ENOMEM;
860  
861  	/* Sort reverse array. */
862  	sort(map->reverse, map->nr_extents, sizeof(struct uid_gid_extent),
863  	     cmp_extents_reverse, NULL);
864  
865  	return 0;
866  }
867  
868  /**
869   * verify_root_map() - check the uid 0 mapping
870   * @file: idmapping file
871   * @map_ns: user namespace of the target process
872   * @new_map: requested idmap
873   *
874   * If a process requests mapping parent uid 0 into the new ns, verify that the
875   * process writing the map had the CAP_SETFCAP capability as the target process
876   * will be able to write fscaps that are valid in ancestor user namespaces.
877   *
878   * Return: true if the mapping is allowed, false if not.
879   */
verify_root_map(const struct file * file,struct user_namespace * map_ns,struct uid_gid_map * new_map)880  static bool verify_root_map(const struct file *file,
881  			    struct user_namespace *map_ns,
882  			    struct uid_gid_map *new_map)
883  {
884  	int idx;
885  	const struct user_namespace *file_ns = file->f_cred->user_ns;
886  	struct uid_gid_extent *extent0 = NULL;
887  
888  	for (idx = 0; idx < new_map->nr_extents; idx++) {
889  		if (new_map->nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
890  			extent0 = &new_map->extent[idx];
891  		else
892  			extent0 = &new_map->forward[idx];
893  		if (extent0->lower_first == 0)
894  			break;
895  
896  		extent0 = NULL;
897  	}
898  
899  	if (!extent0)
900  		return true;
901  
902  	if (map_ns == file_ns) {
903  		/* The process unshared its ns and is writing to its own
904  		 * /proc/self/uid_map.  User already has full capabilites in
905  		 * the new namespace.  Verify that the parent had CAP_SETFCAP
906  		 * when it unshared.
907  		 * */
908  		if (!file_ns->parent_could_setfcap)
909  			return false;
910  	} else {
911  		/* Process p1 is writing to uid_map of p2, who is in a child
912  		 * user namespace to p1's.  Verify that the opener of the map
913  		 * file has CAP_SETFCAP against the parent of the new map
914  		 * namespace */
915  		if (!file_ns_capable(file, map_ns->parent, CAP_SETFCAP))
916  			return false;
917  	}
918  
919  	return true;
920  }
921  
map_write(struct file * file,const char __user * buf,size_t count,loff_t * ppos,int cap_setid,struct uid_gid_map * map,struct uid_gid_map * parent_map)922  static ssize_t map_write(struct file *file, const char __user *buf,
923  			 size_t count, loff_t *ppos,
924  			 int cap_setid,
925  			 struct uid_gid_map *map,
926  			 struct uid_gid_map *parent_map)
927  {
928  	struct seq_file *seq = file->private_data;
929  	struct user_namespace *map_ns = seq->private;
930  	struct uid_gid_map new_map;
931  	unsigned idx;
932  	struct uid_gid_extent extent;
933  	char *kbuf, *pos, *next_line;
934  	ssize_t ret;
935  
936  	/* Only allow < page size writes at the beginning of the file */
937  	if ((*ppos != 0) || (count >= PAGE_SIZE))
938  		return -EINVAL;
939  
940  	/* Slurp in the user data */
941  	kbuf = memdup_user_nul(buf, count);
942  	if (IS_ERR(kbuf))
943  		return PTR_ERR(kbuf);
944  
945  	/*
946  	 * The userns_state_mutex serializes all writes to any given map.
947  	 *
948  	 * Any map is only ever written once.
949  	 *
950  	 * An id map fits within 1 cache line on most architectures.
951  	 *
952  	 * On read nothing needs to be done unless you are on an
953  	 * architecture with a crazy cache coherency model like alpha.
954  	 *
955  	 * There is a one time data dependency between reading the
956  	 * count of the extents and the values of the extents.  The
957  	 * desired behavior is to see the values of the extents that
958  	 * were written before the count of the extents.
959  	 *
960  	 * To achieve this smp_wmb() is used on guarantee the write
961  	 * order and smp_rmb() is guaranteed that we don't have crazy
962  	 * architectures returning stale data.
963  	 */
964  	mutex_lock(&userns_state_mutex);
965  
966  	memset(&new_map, 0, sizeof(struct uid_gid_map));
967  
968  	ret = -EPERM;
969  	/* Only allow one successful write to the map */
970  	if (map->nr_extents != 0)
971  		goto out;
972  
973  	/*
974  	 * Adjusting namespace settings requires capabilities on the target.
975  	 */
976  	if (cap_valid(cap_setid) && !file_ns_capable(file, map_ns, CAP_SYS_ADMIN))
977  		goto out;
978  
979  	/* Parse the user data */
980  	ret = -EINVAL;
981  	pos = kbuf;
982  	for (; pos; pos = next_line) {
983  
984  		/* Find the end of line and ensure I don't look past it */
985  		next_line = strchr(pos, '\n');
986  		if (next_line) {
987  			*next_line = '\0';
988  			next_line++;
989  			if (*next_line == '\0')
990  				next_line = NULL;
991  		}
992  
993  		pos = skip_spaces(pos);
994  		extent.first = simple_strtoul(pos, &pos, 10);
995  		if (!isspace(*pos))
996  			goto out;
997  
998  		pos = skip_spaces(pos);
999  		extent.lower_first = simple_strtoul(pos, &pos, 10);
1000  		if (!isspace(*pos))
1001  			goto out;
1002  
1003  		pos = skip_spaces(pos);
1004  		extent.count = simple_strtoul(pos, &pos, 10);
1005  		if (*pos && !isspace(*pos))
1006  			goto out;
1007  
1008  		/* Verify there is not trailing junk on the line */
1009  		pos = skip_spaces(pos);
1010  		if (*pos != '\0')
1011  			goto out;
1012  
1013  		/* Verify we have been given valid starting values */
1014  		if ((extent.first == (u32) -1) ||
1015  		    (extent.lower_first == (u32) -1))
1016  			goto out;
1017  
1018  		/* Verify count is not zero and does not cause the
1019  		 * extent to wrap
1020  		 */
1021  		if ((extent.first + extent.count) <= extent.first)
1022  			goto out;
1023  		if ((extent.lower_first + extent.count) <=
1024  		     extent.lower_first)
1025  			goto out;
1026  
1027  		/* Do the ranges in extent overlap any previous extents? */
1028  		if (mappings_overlap(&new_map, &extent))
1029  			goto out;
1030  
1031  		if ((new_map.nr_extents + 1) == UID_GID_MAP_MAX_EXTENTS &&
1032  		    (next_line != NULL))
1033  			goto out;
1034  
1035  		ret = insert_extent(&new_map, &extent);
1036  		if (ret < 0)
1037  			goto out;
1038  		ret = -EINVAL;
1039  	}
1040  	/* Be very certain the new map actually exists */
1041  	if (new_map.nr_extents == 0)
1042  		goto out;
1043  
1044  	ret = -EPERM;
1045  	/* Validate the user is allowed to use user id's mapped to. */
1046  	if (!new_idmap_permitted(file, map_ns, cap_setid, &new_map))
1047  		goto out;
1048  
1049  	ret = -EPERM;
1050  	/* Map the lower ids from the parent user namespace to the
1051  	 * kernel global id space.
1052  	 */
1053  	for (idx = 0; idx < new_map.nr_extents; idx++) {
1054  		struct uid_gid_extent *e;
1055  		u32 lower_first;
1056  
1057  		if (new_map.nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
1058  			e = &new_map.extent[idx];
1059  		else
1060  			e = &new_map.forward[idx];
1061  
1062  		lower_first = map_id_range_down(parent_map,
1063  						e->lower_first,
1064  						e->count);
1065  
1066  		/* Fail if we can not map the specified extent to
1067  		 * the kernel global id space.
1068  		 */
1069  		if (lower_first == (u32) -1)
1070  			goto out;
1071  
1072  		e->lower_first = lower_first;
1073  	}
1074  
1075  	/*
1076  	 * If we want to use binary search for lookup, this clones the extent
1077  	 * array and sorts both copies.
1078  	 */
1079  	ret = sort_idmaps(&new_map);
1080  	if (ret < 0)
1081  		goto out;
1082  
1083  	/* Install the map */
1084  	if (new_map.nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS) {
1085  		memcpy(map->extent, new_map.extent,
1086  		       new_map.nr_extents * sizeof(new_map.extent[0]));
1087  	} else {
1088  		map->forward = new_map.forward;
1089  		map->reverse = new_map.reverse;
1090  	}
1091  	smp_wmb();
1092  	map->nr_extents = new_map.nr_extents;
1093  
1094  	*ppos = count;
1095  	ret = count;
1096  out:
1097  	if (ret < 0 && new_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) {
1098  		kfree(new_map.forward);
1099  		kfree(new_map.reverse);
1100  		map->forward = NULL;
1101  		map->reverse = NULL;
1102  		map->nr_extents = 0;
1103  	}
1104  
1105  	mutex_unlock(&userns_state_mutex);
1106  	kfree(kbuf);
1107  	return ret;
1108  }
1109  
proc_uid_map_write(struct file * file,const char __user * buf,size_t size,loff_t * ppos)1110  ssize_t proc_uid_map_write(struct file *file, const char __user *buf,
1111  			   size_t size, loff_t *ppos)
1112  {
1113  	struct seq_file *seq = file->private_data;
1114  	struct user_namespace *ns = seq->private;
1115  	struct user_namespace *seq_ns = seq_user_ns(seq);
1116  
1117  	if (!ns->parent)
1118  		return -EPERM;
1119  
1120  	if ((seq_ns != ns) && (seq_ns != ns->parent))
1121  		return -EPERM;
1122  
1123  	return map_write(file, buf, size, ppos, CAP_SETUID,
1124  			 &ns->uid_map, &ns->parent->uid_map);
1125  }
1126  
proc_gid_map_write(struct file * file,const char __user * buf,size_t size,loff_t * ppos)1127  ssize_t proc_gid_map_write(struct file *file, const char __user *buf,
1128  			   size_t size, loff_t *ppos)
1129  {
1130  	struct seq_file *seq = file->private_data;
1131  	struct user_namespace *ns = seq->private;
1132  	struct user_namespace *seq_ns = seq_user_ns(seq);
1133  
1134  	if (!ns->parent)
1135  		return -EPERM;
1136  
1137  	if ((seq_ns != ns) && (seq_ns != ns->parent))
1138  		return -EPERM;
1139  
1140  	return map_write(file, buf, size, ppos, CAP_SETGID,
1141  			 &ns->gid_map, &ns->parent->gid_map);
1142  }
1143  
proc_projid_map_write(struct file * file,const char __user * buf,size_t size,loff_t * ppos)1144  ssize_t proc_projid_map_write(struct file *file, const char __user *buf,
1145  			      size_t size, loff_t *ppos)
1146  {
1147  	struct seq_file *seq = file->private_data;
1148  	struct user_namespace *ns = seq->private;
1149  	struct user_namespace *seq_ns = seq_user_ns(seq);
1150  
1151  	if (!ns->parent)
1152  		return -EPERM;
1153  
1154  	if ((seq_ns != ns) && (seq_ns != ns->parent))
1155  		return -EPERM;
1156  
1157  	/* Anyone can set any valid project id no capability needed */
1158  	return map_write(file, buf, size, ppos, -1,
1159  			 &ns->projid_map, &ns->parent->projid_map);
1160  }
1161  
new_idmap_permitted(const struct file * file,struct user_namespace * ns,int cap_setid,struct uid_gid_map * new_map)1162  static bool new_idmap_permitted(const struct file *file,
1163  				struct user_namespace *ns, int cap_setid,
1164  				struct uid_gid_map *new_map)
1165  {
1166  	const struct cred *cred = file->f_cred;
1167  
1168  	if (cap_setid == CAP_SETUID && !verify_root_map(file, ns, new_map))
1169  		return false;
1170  
1171  	/* Don't allow mappings that would allow anything that wouldn't
1172  	 * be allowed without the establishment of unprivileged mappings.
1173  	 */
1174  	if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1) &&
1175  	    uid_eq(ns->owner, cred->euid)) {
1176  		u32 id = new_map->extent[0].lower_first;
1177  		if (cap_setid == CAP_SETUID) {
1178  			kuid_t uid = make_kuid(ns->parent, id);
1179  			if (uid_eq(uid, cred->euid))
1180  				return true;
1181  		} else if (cap_setid == CAP_SETGID) {
1182  			kgid_t gid = make_kgid(ns->parent, id);
1183  			if (!(ns->flags & USERNS_SETGROUPS_ALLOWED) &&
1184  			    gid_eq(gid, cred->egid))
1185  				return true;
1186  		}
1187  	}
1188  
1189  	/* Allow anyone to set a mapping that doesn't require privilege */
1190  	if (!cap_valid(cap_setid))
1191  		return true;
1192  
1193  	/* Allow the specified ids if we have the appropriate capability
1194  	 * (CAP_SETUID or CAP_SETGID) over the parent user namespace.
1195  	 * And the opener of the id file also has the appropriate capability.
1196  	 */
1197  	if (ns_capable(ns->parent, cap_setid) &&
1198  	    file_ns_capable(file, ns->parent, cap_setid))
1199  		return true;
1200  
1201  	return false;
1202  }
1203  
proc_setgroups_show(struct seq_file * seq,void * v)1204  int proc_setgroups_show(struct seq_file *seq, void *v)
1205  {
1206  	struct user_namespace *ns = seq->private;
1207  	unsigned long userns_flags = READ_ONCE(ns->flags);
1208  
1209  	seq_printf(seq, "%s\n",
1210  		   (userns_flags & USERNS_SETGROUPS_ALLOWED) ?
1211  		   "allow" : "deny");
1212  	return 0;
1213  }
1214  
proc_setgroups_write(struct file * file,const char __user * buf,size_t count,loff_t * ppos)1215  ssize_t proc_setgroups_write(struct file *file, const char __user *buf,
1216  			     size_t count, loff_t *ppos)
1217  {
1218  	struct seq_file *seq = file->private_data;
1219  	struct user_namespace *ns = seq->private;
1220  	char kbuf[8], *pos;
1221  	bool setgroups_allowed;
1222  	ssize_t ret;
1223  
1224  	/* Only allow a very narrow range of strings to be written */
1225  	ret = -EINVAL;
1226  	if ((*ppos != 0) || (count >= sizeof(kbuf)))
1227  		goto out;
1228  
1229  	/* What was written? */
1230  	ret = -EFAULT;
1231  	if (copy_from_user(kbuf, buf, count))
1232  		goto out;
1233  	kbuf[count] = '\0';
1234  	pos = kbuf;
1235  
1236  	/* What is being requested? */
1237  	ret = -EINVAL;
1238  	if (strncmp(pos, "allow", 5) == 0) {
1239  		pos += 5;
1240  		setgroups_allowed = true;
1241  	}
1242  	else if (strncmp(pos, "deny", 4) == 0) {
1243  		pos += 4;
1244  		setgroups_allowed = false;
1245  	}
1246  	else
1247  		goto out;
1248  
1249  	/* Verify there is not trailing junk on the line */
1250  	pos = skip_spaces(pos);
1251  	if (*pos != '\0')
1252  		goto out;
1253  
1254  	ret = -EPERM;
1255  	mutex_lock(&userns_state_mutex);
1256  	if (setgroups_allowed) {
1257  		/* Enabling setgroups after setgroups has been disabled
1258  		 * is not allowed.
1259  		 */
1260  		if (!(ns->flags & USERNS_SETGROUPS_ALLOWED))
1261  			goto out_unlock;
1262  	} else {
1263  		/* Permanently disabling setgroups after setgroups has
1264  		 * been enabled by writing the gid_map is not allowed.
1265  		 */
1266  		if (ns->gid_map.nr_extents != 0)
1267  			goto out_unlock;
1268  		ns->flags &= ~USERNS_SETGROUPS_ALLOWED;
1269  	}
1270  	mutex_unlock(&userns_state_mutex);
1271  
1272  	/* Report a successful write */
1273  	*ppos = count;
1274  	ret = count;
1275  out:
1276  	return ret;
1277  out_unlock:
1278  	mutex_unlock(&userns_state_mutex);
1279  	goto out;
1280  }
1281  
userns_may_setgroups(const struct user_namespace * ns)1282  bool userns_may_setgroups(const struct user_namespace *ns)
1283  {
1284  	bool allowed;
1285  
1286  	mutex_lock(&userns_state_mutex);
1287  	/* It is not safe to use setgroups until a gid mapping in
1288  	 * the user namespace has been established.
1289  	 */
1290  	allowed = ns->gid_map.nr_extents != 0;
1291  	/* Is setgroups allowed? */
1292  	allowed = allowed && (ns->flags & USERNS_SETGROUPS_ALLOWED);
1293  	mutex_unlock(&userns_state_mutex);
1294  
1295  	return allowed;
1296  }
1297  
1298  /*
1299   * Returns true if @child is the same namespace or a descendant of
1300   * @ancestor.
1301   */
in_userns(const struct user_namespace * ancestor,const struct user_namespace * child)1302  bool in_userns(const struct user_namespace *ancestor,
1303  	       const struct user_namespace *child)
1304  {
1305  	const struct user_namespace *ns;
1306  	for (ns = child; ns->level > ancestor->level; ns = ns->parent)
1307  		;
1308  	return (ns == ancestor);
1309  }
1310  
current_in_userns(const struct user_namespace * target_ns)1311  bool current_in_userns(const struct user_namespace *target_ns)
1312  {
1313  	return in_userns(target_ns, current_user_ns());
1314  }
1315  EXPORT_SYMBOL(current_in_userns);
1316  
to_user_ns(struct ns_common * ns)1317  static inline struct user_namespace *to_user_ns(struct ns_common *ns)
1318  {
1319  	return container_of(ns, struct user_namespace, ns);
1320  }
1321  
userns_get(struct task_struct * task)1322  static struct ns_common *userns_get(struct task_struct *task)
1323  {
1324  	struct user_namespace *user_ns;
1325  
1326  	rcu_read_lock();
1327  	user_ns = get_user_ns(__task_cred(task)->user_ns);
1328  	rcu_read_unlock();
1329  
1330  	return user_ns ? &user_ns->ns : NULL;
1331  }
1332  
userns_put(struct ns_common * ns)1333  static void userns_put(struct ns_common *ns)
1334  {
1335  	put_user_ns(to_user_ns(ns));
1336  }
1337  
userns_install(struct nsset * nsset,struct ns_common * ns)1338  static int userns_install(struct nsset *nsset, struct ns_common *ns)
1339  {
1340  	struct user_namespace *user_ns = to_user_ns(ns);
1341  	struct cred *cred;
1342  
1343  	/* Don't allow gaining capabilities by reentering
1344  	 * the same user namespace.
1345  	 */
1346  	if (user_ns == current_user_ns())
1347  		return -EINVAL;
1348  
1349  	/* Tasks that share a thread group must share a user namespace */
1350  	if (!thread_group_empty(current))
1351  		return -EINVAL;
1352  
1353  	if (current->fs->users != 1)
1354  		return -EINVAL;
1355  
1356  	if (!ns_capable(user_ns, CAP_SYS_ADMIN))
1357  		return -EPERM;
1358  
1359  	cred = nsset_cred(nsset);
1360  	if (!cred)
1361  		return -EINVAL;
1362  
1363  	put_user_ns(cred->user_ns);
1364  	set_cred_user_ns(cred, get_user_ns(user_ns));
1365  
1366  	if (set_cred_ucounts(cred) < 0)
1367  		return -EINVAL;
1368  
1369  	return 0;
1370  }
1371  
ns_get_owner(struct ns_common * ns)1372  struct ns_common *ns_get_owner(struct ns_common *ns)
1373  {
1374  	struct user_namespace *my_user_ns = current_user_ns();
1375  	struct user_namespace *owner, *p;
1376  
1377  	/* See if the owner is in the current user namespace */
1378  	owner = p = ns->ops->owner(ns);
1379  	for (;;) {
1380  		if (!p)
1381  			return ERR_PTR(-EPERM);
1382  		if (p == my_user_ns)
1383  			break;
1384  		p = p->parent;
1385  	}
1386  
1387  	return &get_user_ns(owner)->ns;
1388  }
1389  
userns_owner(struct ns_common * ns)1390  static struct user_namespace *userns_owner(struct ns_common *ns)
1391  {
1392  	return to_user_ns(ns)->parent;
1393  }
1394  
1395  const struct proc_ns_operations userns_operations = {
1396  	.name		= "user",
1397  	.type		= CLONE_NEWUSER,
1398  	.get		= userns_get,
1399  	.put		= userns_put,
1400  	.install	= userns_install,
1401  	.owner		= userns_owner,
1402  	.get_parent	= ns_get_owner,
1403  };
1404  
user_namespaces_init(void)1405  static __init int user_namespaces_init(void)
1406  {
1407  	user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC | SLAB_ACCOUNT);
1408  	return 0;
1409  }
1410  subsys_initcall(user_namespaces_init);
1411