1  // SPDX-License-Identifier: GPL-2.0-or-later
2  /*
3   * net/core/netprio_cgroup.c	Priority Control Group
4   *
5   * Authors:	Neil Horman <nhorman@tuxdriver.com>
6   */
7  
8  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
9  
10  #include <linux/module.h>
11  #include <linux/slab.h>
12  #include <linux/types.h>
13  #include <linux/string.h>
14  #include <linux/errno.h>
15  #include <linux/skbuff.h>
16  #include <linux/cgroup.h>
17  #include <linux/rcupdate.h>
18  #include <linux/atomic.h>
19  #include <linux/sched/task.h>
20  
21  #include <net/rtnetlink.h>
22  #include <net/pkt_cls.h>
23  #include <net/sock.h>
24  #include <net/netprio_cgroup.h>
25  
26  #include <linux/fdtable.h>
27  
28  /*
29   * netprio allocates per-net_device priomap array which is indexed by
30   * css->id.  Limiting css ID to 16bits doesn't lose anything.
31   */
32  #define NETPRIO_ID_MAX		USHRT_MAX
33  
34  #define PRIOMAP_MIN_SZ		128
35  
36  /*
37   * Extend @dev->priomap so that it's large enough to accommodate
38   * @target_idx.  @dev->priomap.priomap_len > @target_idx after successful
39   * return.  Must be called under rtnl lock.
40   */
extend_netdev_table(struct net_device * dev,u32 target_idx)41  static int extend_netdev_table(struct net_device *dev, u32 target_idx)
42  {
43  	struct netprio_map *old, *new;
44  	size_t new_sz, new_len;
45  
46  	/* is the existing priomap large enough? */
47  	old = rtnl_dereference(dev->priomap);
48  	if (old && old->priomap_len > target_idx)
49  		return 0;
50  
51  	/*
52  	 * Determine the new size.  Let's keep it power-of-two.  We start
53  	 * from PRIOMAP_MIN_SZ and double it until it's large enough to
54  	 * accommodate @target_idx.
55  	 */
56  	new_sz = PRIOMAP_MIN_SZ;
57  	while (true) {
58  		new_len = (new_sz - offsetof(struct netprio_map, priomap)) /
59  			sizeof(new->priomap[0]);
60  		if (new_len > target_idx)
61  			break;
62  		new_sz *= 2;
63  		/* overflowed? */
64  		if (WARN_ON(new_sz < PRIOMAP_MIN_SZ))
65  			return -ENOSPC;
66  	}
67  
68  	/* allocate & copy */
69  	new = kzalloc(new_sz, GFP_KERNEL);
70  	if (!new)
71  		return -ENOMEM;
72  
73  	if (old)
74  		memcpy(new->priomap, old->priomap,
75  		       old->priomap_len * sizeof(old->priomap[0]));
76  
77  	new->priomap_len = new_len;
78  
79  	/* install the new priomap */
80  	rcu_assign_pointer(dev->priomap, new);
81  	if (old)
82  		kfree_rcu(old, rcu);
83  	return 0;
84  }
85  
86  /**
87   * netprio_prio - return the effective netprio of a cgroup-net_device pair
88   * @css: css part of the target pair
89   * @dev: net_device part of the target pair
90   *
91   * Should be called under RCU read or rtnl lock.
92   */
netprio_prio(struct cgroup_subsys_state * css,struct net_device * dev)93  static u32 netprio_prio(struct cgroup_subsys_state *css, struct net_device *dev)
94  {
95  	struct netprio_map *map = rcu_dereference_rtnl(dev->priomap);
96  	int id = css->id;
97  
98  	if (map && id < map->priomap_len)
99  		return map->priomap[id];
100  	return 0;
101  }
102  
103  /**
104   * netprio_set_prio - set netprio on a cgroup-net_device pair
105   * @css: css part of the target pair
106   * @dev: net_device part of the target pair
107   * @prio: prio to set
108   *
109   * Set netprio to @prio on @css-@dev pair.  Should be called under rtnl
110   * lock and may fail under memory pressure for non-zero @prio.
111   */
netprio_set_prio(struct cgroup_subsys_state * css,struct net_device * dev,u32 prio)112  static int netprio_set_prio(struct cgroup_subsys_state *css,
113  			    struct net_device *dev, u32 prio)
114  {
115  	struct netprio_map *map;
116  	int id = css->id;
117  	int ret;
118  
119  	/* avoid extending priomap for zero writes */
120  	map = rtnl_dereference(dev->priomap);
121  	if (!prio && (!map || map->priomap_len <= id))
122  		return 0;
123  
124  	ret = extend_netdev_table(dev, id);
125  	if (ret)
126  		return ret;
127  
128  	map = rtnl_dereference(dev->priomap);
129  	map->priomap[id] = prio;
130  	return 0;
131  }
132  
133  static struct cgroup_subsys_state *
cgrp_css_alloc(struct cgroup_subsys_state * parent_css)134  cgrp_css_alloc(struct cgroup_subsys_state *parent_css)
135  {
136  	struct cgroup_subsys_state *css;
137  
138  	css = kzalloc(sizeof(*css), GFP_KERNEL);
139  	if (!css)
140  		return ERR_PTR(-ENOMEM);
141  
142  	return css;
143  }
144  
cgrp_css_online(struct cgroup_subsys_state * css)145  static int cgrp_css_online(struct cgroup_subsys_state *css)
146  {
147  	struct cgroup_subsys_state *parent_css = css->parent;
148  	struct net_device *dev;
149  	int ret = 0;
150  
151  	if (css->id > NETPRIO_ID_MAX)
152  		return -ENOSPC;
153  
154  	if (!parent_css)
155  		return 0;
156  
157  	rtnl_lock();
158  	/*
159  	 * Inherit prios from the parent.  As all prios are set during
160  	 * onlining, there is no need to clear them on offline.
161  	 */
162  	for_each_netdev(&init_net, dev) {
163  		u32 prio = netprio_prio(parent_css, dev);
164  
165  		ret = netprio_set_prio(css, dev, prio);
166  		if (ret)
167  			break;
168  	}
169  	rtnl_unlock();
170  	return ret;
171  }
172  
cgrp_css_free(struct cgroup_subsys_state * css)173  static void cgrp_css_free(struct cgroup_subsys_state *css)
174  {
175  	kfree(css);
176  }
177  
read_prioidx(struct cgroup_subsys_state * css,struct cftype * cft)178  static u64 read_prioidx(struct cgroup_subsys_state *css, struct cftype *cft)
179  {
180  	return css->id;
181  }
182  
read_priomap(struct seq_file * sf,void * v)183  static int read_priomap(struct seq_file *sf, void *v)
184  {
185  	struct net_device *dev;
186  
187  	rcu_read_lock();
188  	for_each_netdev_rcu(&init_net, dev)
189  		seq_printf(sf, "%s %u\n", dev->name,
190  			   netprio_prio(seq_css(sf), dev));
191  	rcu_read_unlock();
192  	return 0;
193  }
194  
write_priomap(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)195  static ssize_t write_priomap(struct kernfs_open_file *of,
196  			     char *buf, size_t nbytes, loff_t off)
197  {
198  	char devname[IFNAMSIZ + 1];
199  	struct net_device *dev;
200  	u32 prio;
201  	int ret;
202  
203  	if (sscanf(buf, "%"__stringify(IFNAMSIZ)"s %u", devname, &prio) != 2)
204  		return -EINVAL;
205  
206  	dev = dev_get_by_name(&init_net, devname);
207  	if (!dev)
208  		return -ENODEV;
209  
210  	rtnl_lock();
211  
212  	ret = netprio_set_prio(of_css(of), dev, prio);
213  
214  	rtnl_unlock();
215  	dev_put(dev);
216  	return ret ?: nbytes;
217  }
218  
update_netprio(const void * v,struct file * file,unsigned n)219  static int update_netprio(const void *v, struct file *file, unsigned n)
220  {
221  	struct socket *sock = sock_from_file(file);
222  
223  	if (sock)
224  		sock_cgroup_set_prioidx(&sock->sk->sk_cgrp_data,
225  					(unsigned long)v);
226  	return 0;
227  }
228  
net_prio_attach(struct cgroup_taskset * tset)229  static void net_prio_attach(struct cgroup_taskset *tset)
230  {
231  	struct task_struct *p;
232  	struct cgroup_subsys_state *css;
233  
234  	cgroup_taskset_for_each(p, css, tset) {
235  		void *v = (void *)(unsigned long)css->id;
236  
237  		task_lock(p);
238  		iterate_fd(p->files, 0, update_netprio, v);
239  		task_unlock(p);
240  	}
241  }
242  
243  static struct cftype ss_files[] = {
244  	{
245  		.name = "prioidx",
246  		.read_u64 = read_prioidx,
247  	},
248  	{
249  		.name = "ifpriomap",
250  		.seq_show = read_priomap,
251  		.write = write_priomap,
252  	},
253  	{ }	/* terminate */
254  };
255  
256  struct cgroup_subsys net_prio_cgrp_subsys = {
257  	.css_alloc	= cgrp_css_alloc,
258  	.css_online	= cgrp_css_online,
259  	.css_free	= cgrp_css_free,
260  	.attach		= net_prio_attach,
261  	.legacy_cftypes	= ss_files,
262  };
263  
netprio_device_event(struct notifier_block * unused,unsigned long event,void * ptr)264  static int netprio_device_event(struct notifier_block *unused,
265  				unsigned long event, void *ptr)
266  {
267  	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
268  	struct netprio_map *old;
269  
270  	/*
271  	 * Note this is called with rtnl_lock held so we have update side
272  	 * protection on our rcu assignments
273  	 */
274  
275  	switch (event) {
276  	case NETDEV_UNREGISTER:
277  		old = rtnl_dereference(dev->priomap);
278  		RCU_INIT_POINTER(dev->priomap, NULL);
279  		if (old)
280  			kfree_rcu(old, rcu);
281  		break;
282  	}
283  	return NOTIFY_DONE;
284  }
285  
286  static struct notifier_block netprio_device_notifier = {
287  	.notifier_call = netprio_device_event
288  };
289  
init_cgroup_netprio(void)290  static int __init init_cgroup_netprio(void)
291  {
292  	register_netdevice_notifier(&netprio_device_notifier);
293  	return 0;
294  }
295  subsys_initcall(init_cgroup_netprio);
296