1  // SPDX-License-Identifier: GPL-2.0-or-later
2  /* net/sched/sch_teql.c	"True" (or "trivial") link equalizer.
3   *
4   * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
5   */
6  
7  #include <linux/module.h>
8  #include <linux/types.h>
9  #include <linux/kernel.h>
10  #include <linux/slab.h>
11  #include <linux/string.h>
12  #include <linux/errno.h>
13  #include <linux/if_arp.h>
14  #include <linux/netdevice.h>
15  #include <linux/init.h>
16  #include <linux/skbuff.h>
17  #include <linux/moduleparam.h>
18  #include <net/dst.h>
19  #include <net/neighbour.h>
20  #include <net/pkt_sched.h>
21  
22  /*
23     How to setup it.
24     ----------------
25  
26     After loading this module you will find a new device teqlN
27     and new qdisc with the same name. To join a slave to the equalizer
28     you should just set this qdisc on a device f.e.
29  
30     # tc qdisc add dev eth0 root teql0
31     # tc qdisc add dev eth1 root teql0
32  
33     That's all. Full PnP 8)
34  
35     Applicability.
36     --------------
37  
38     1. Slave devices MUST be active devices, i.e., they must raise the tbusy
39        signal and generate EOI events. If you want to equalize virtual devices
40        like tunnels, use a normal eql device.
41     2. This device puts no limitations on physical slave characteristics
42        f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
43        Certainly, large difference in link speeds will make the resulting
44        eqalized link unusable, because of huge packet reordering.
45        I estimate an upper useful difference as ~10 times.
46     3. If the slave requires address resolution, only protocols using
47        neighbour cache (IPv4/IPv6) will work over the equalized link.
48        Other protocols are still allowed to use the slave device directly,
49        which will not break load balancing, though native slave
50        traffic will have the highest priority.  */
51  
52  struct teql_master {
53  	struct Qdisc_ops qops;
54  	struct net_device *dev;
55  	struct Qdisc *slaves;
56  	struct list_head master_list;
57  	unsigned long	tx_bytes;
58  	unsigned long	tx_packets;
59  	unsigned long	tx_errors;
60  	unsigned long	tx_dropped;
61  };
62  
63  struct teql_sched_data {
64  	struct Qdisc *next;
65  	struct teql_master *m;
66  	struct sk_buff_head q;
67  };
68  
69  #define NEXT_SLAVE(q) (((struct teql_sched_data *)qdisc_priv(q))->next)
70  
71  #define FMASK (IFF_BROADCAST | IFF_POINTOPOINT)
72  
73  /* "teql*" qdisc routines */
74  
75  static int
teql_enqueue(struct sk_buff * skb,struct Qdisc * sch,struct sk_buff ** to_free)76  teql_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
77  {
78  	struct net_device *dev = qdisc_dev(sch);
79  	struct teql_sched_data *q = qdisc_priv(sch);
80  
81  	if (q->q.qlen < READ_ONCE(dev->tx_queue_len)) {
82  		__skb_queue_tail(&q->q, skb);
83  		return NET_XMIT_SUCCESS;
84  	}
85  
86  	return qdisc_drop(skb, sch, to_free);
87  }
88  
89  static struct sk_buff *
teql_dequeue(struct Qdisc * sch)90  teql_dequeue(struct Qdisc *sch)
91  {
92  	struct teql_sched_data *dat = qdisc_priv(sch);
93  	struct netdev_queue *dat_queue;
94  	struct sk_buff *skb;
95  	struct Qdisc *q;
96  
97  	skb = __skb_dequeue(&dat->q);
98  	dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
99  	q = rcu_dereference_bh(dat_queue->qdisc);
100  
101  	if (skb == NULL) {
102  		struct net_device *m = qdisc_dev(q);
103  		if (m) {
104  			dat->m->slaves = sch;
105  			netif_wake_queue(m);
106  		}
107  	} else {
108  		qdisc_bstats_update(sch, skb);
109  	}
110  	sch->q.qlen = dat->q.qlen + q->q.qlen;
111  	return skb;
112  }
113  
114  static struct sk_buff *
teql_peek(struct Qdisc * sch)115  teql_peek(struct Qdisc *sch)
116  {
117  	/* teql is meant to be used as root qdisc */
118  	return NULL;
119  }
120  
121  static void
teql_reset(struct Qdisc * sch)122  teql_reset(struct Qdisc *sch)
123  {
124  	struct teql_sched_data *dat = qdisc_priv(sch);
125  
126  	skb_queue_purge(&dat->q);
127  }
128  
129  static void
teql_destroy(struct Qdisc * sch)130  teql_destroy(struct Qdisc *sch)
131  {
132  	struct Qdisc *q, *prev;
133  	struct teql_sched_data *dat = qdisc_priv(sch);
134  	struct teql_master *master = dat->m;
135  
136  	if (!master)
137  		return;
138  
139  	prev = master->slaves;
140  	if (prev) {
141  		do {
142  			q = NEXT_SLAVE(prev);
143  			if (q == sch) {
144  				NEXT_SLAVE(prev) = NEXT_SLAVE(q);
145  				if (q == master->slaves) {
146  					master->slaves = NEXT_SLAVE(q);
147  					if (q == master->slaves) {
148  						struct netdev_queue *txq;
149  						spinlock_t *root_lock;
150  
151  						txq = netdev_get_tx_queue(master->dev, 0);
152  						master->slaves = NULL;
153  
154  						root_lock = qdisc_root_sleeping_lock(rtnl_dereference(txq->qdisc));
155  						spin_lock_bh(root_lock);
156  						qdisc_reset(rtnl_dereference(txq->qdisc));
157  						spin_unlock_bh(root_lock);
158  					}
159  				}
160  				skb_queue_purge(&dat->q);
161  				break;
162  			}
163  
164  		} while ((prev = q) != master->slaves);
165  	}
166  }
167  
teql_qdisc_init(struct Qdisc * sch,struct nlattr * opt,struct netlink_ext_ack * extack)168  static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt,
169  			   struct netlink_ext_ack *extack)
170  {
171  	struct net_device *dev = qdisc_dev(sch);
172  	struct teql_master *m = (struct teql_master *)sch->ops;
173  	struct teql_sched_data *q = qdisc_priv(sch);
174  
175  	if (dev->hard_header_len > m->dev->hard_header_len)
176  		return -EINVAL;
177  
178  	if (m->dev == dev)
179  		return -ELOOP;
180  
181  	q->m = m;
182  
183  	skb_queue_head_init(&q->q);
184  
185  	if (m->slaves) {
186  		if (m->dev->flags & IFF_UP) {
187  			if ((m->dev->flags & IFF_POINTOPOINT &&
188  			     !(dev->flags & IFF_POINTOPOINT)) ||
189  			    (m->dev->flags & IFF_BROADCAST &&
190  			     !(dev->flags & IFF_BROADCAST)) ||
191  			    (m->dev->flags & IFF_MULTICAST &&
192  			     !(dev->flags & IFF_MULTICAST)) ||
193  			    dev->mtu < m->dev->mtu)
194  				return -EINVAL;
195  		} else {
196  			if (!(dev->flags&IFF_POINTOPOINT))
197  				m->dev->flags &= ~IFF_POINTOPOINT;
198  			if (!(dev->flags&IFF_BROADCAST))
199  				m->dev->flags &= ~IFF_BROADCAST;
200  			if (!(dev->flags&IFF_MULTICAST))
201  				m->dev->flags &= ~IFF_MULTICAST;
202  			if (dev->mtu < m->dev->mtu)
203  				m->dev->mtu = dev->mtu;
204  		}
205  		q->next = NEXT_SLAVE(m->slaves);
206  		NEXT_SLAVE(m->slaves) = sch;
207  	} else {
208  		q->next = sch;
209  		m->slaves = sch;
210  		m->dev->mtu = dev->mtu;
211  		m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
212  	}
213  	return 0;
214  }
215  
216  
217  static int
__teql_resolve(struct sk_buff * skb,struct sk_buff * skb_res,struct net_device * dev,struct netdev_queue * txq,struct dst_entry * dst)218  __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res,
219  	       struct net_device *dev, struct netdev_queue *txq,
220  	       struct dst_entry *dst)
221  {
222  	struct neighbour *n;
223  	int err = 0;
224  
225  	n = dst_neigh_lookup_skb(dst, skb);
226  	if (!n)
227  		return -ENOENT;
228  
229  	if (dst->dev != dev) {
230  		struct neighbour *mn;
231  
232  		mn = __neigh_lookup_errno(n->tbl, n->primary_key, dev);
233  		neigh_release(n);
234  		if (IS_ERR(mn))
235  			return PTR_ERR(mn);
236  		n = mn;
237  	}
238  
239  	if (neigh_event_send(n, skb_res) == 0) {
240  		int err;
241  		char haddr[MAX_ADDR_LEN];
242  
243  		neigh_ha_snapshot(haddr, n, dev);
244  		err = dev_hard_header(skb, dev, ntohs(skb_protocol(skb, false)),
245  				      haddr, NULL, skb->len);
246  
247  		if (err < 0)
248  			err = -EINVAL;
249  	} else {
250  		err = (skb_res == NULL) ? -EAGAIN : 1;
251  	}
252  	neigh_release(n);
253  	return err;
254  }
255  
teql_resolve(struct sk_buff * skb,struct sk_buff * skb_res,struct net_device * dev,struct netdev_queue * txq)256  static inline int teql_resolve(struct sk_buff *skb,
257  			       struct sk_buff *skb_res,
258  			       struct net_device *dev,
259  			       struct netdev_queue *txq)
260  {
261  	struct dst_entry *dst = skb_dst(skb);
262  	int res;
263  
264  	if (rcu_access_pointer(txq->qdisc) == &noop_qdisc)
265  		return -ENODEV;
266  
267  	if (!dev->header_ops || !dst)
268  		return 0;
269  
270  	rcu_read_lock();
271  	res = __teql_resolve(skb, skb_res, dev, txq, dst);
272  	rcu_read_unlock();
273  
274  	return res;
275  }
276  
teql_master_xmit(struct sk_buff * skb,struct net_device * dev)277  static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
278  {
279  	struct teql_master *master = netdev_priv(dev);
280  	struct Qdisc *start, *q;
281  	int busy;
282  	int nores;
283  	int subq = skb_get_queue_mapping(skb);
284  	struct sk_buff *skb_res = NULL;
285  
286  	start = master->slaves;
287  
288  restart:
289  	nores = 0;
290  	busy = 0;
291  
292  	q = start;
293  	if (!q)
294  		goto drop;
295  
296  	do {
297  		struct net_device *slave = qdisc_dev(q);
298  		struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
299  
300  		if (rcu_access_pointer(slave_txq->qdisc_sleeping) != q)
301  			continue;
302  		if (netif_xmit_stopped(netdev_get_tx_queue(slave, subq)) ||
303  		    !netif_running(slave)) {
304  			busy = 1;
305  			continue;
306  		}
307  
308  		switch (teql_resolve(skb, skb_res, slave, slave_txq)) {
309  		case 0:
310  			if (__netif_tx_trylock(slave_txq)) {
311  				unsigned int length = qdisc_pkt_len(skb);
312  
313  				if (!netif_xmit_frozen_or_stopped(slave_txq) &&
314  				    netdev_start_xmit(skb, slave, slave_txq, false) ==
315  				    NETDEV_TX_OK) {
316  					__netif_tx_unlock(slave_txq);
317  					master->slaves = NEXT_SLAVE(q);
318  					netif_wake_queue(dev);
319  					master->tx_packets++;
320  					master->tx_bytes += length;
321  					return NETDEV_TX_OK;
322  				}
323  				__netif_tx_unlock(slave_txq);
324  			}
325  			if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)))
326  				busy = 1;
327  			break;
328  		case 1:
329  			master->slaves = NEXT_SLAVE(q);
330  			return NETDEV_TX_OK;
331  		default:
332  			nores = 1;
333  			break;
334  		}
335  		__skb_pull(skb, skb_network_offset(skb));
336  	} while ((q = NEXT_SLAVE(q)) != start);
337  
338  	if (nores && skb_res == NULL) {
339  		skb_res = skb;
340  		goto restart;
341  	}
342  
343  	if (busy) {
344  		netif_stop_queue(dev);
345  		return NETDEV_TX_BUSY;
346  	}
347  	master->tx_errors++;
348  
349  drop:
350  	master->tx_dropped++;
351  	dev_kfree_skb(skb);
352  	return NETDEV_TX_OK;
353  }
354  
teql_master_open(struct net_device * dev)355  static int teql_master_open(struct net_device *dev)
356  {
357  	struct Qdisc *q;
358  	struct teql_master *m = netdev_priv(dev);
359  	int mtu = 0xFFFE;
360  	unsigned int flags = IFF_NOARP | IFF_MULTICAST;
361  
362  	if (m->slaves == NULL)
363  		return -EUNATCH;
364  
365  	flags = FMASK;
366  
367  	q = m->slaves;
368  	do {
369  		struct net_device *slave = qdisc_dev(q);
370  
371  		if (slave == NULL)
372  			return -EUNATCH;
373  
374  		if (slave->mtu < mtu)
375  			mtu = slave->mtu;
376  		if (slave->hard_header_len > LL_MAX_HEADER)
377  			return -EINVAL;
378  
379  		/* If all the slaves are BROADCAST, master is BROADCAST
380  		   If all the slaves are PtP, master is PtP
381  		   Otherwise, master is NBMA.
382  		 */
383  		if (!(slave->flags&IFF_POINTOPOINT))
384  			flags &= ~IFF_POINTOPOINT;
385  		if (!(slave->flags&IFF_BROADCAST))
386  			flags &= ~IFF_BROADCAST;
387  		if (!(slave->flags&IFF_MULTICAST))
388  			flags &= ~IFF_MULTICAST;
389  	} while ((q = NEXT_SLAVE(q)) != m->slaves);
390  
391  	m->dev->mtu = mtu;
392  	m->dev->flags = (m->dev->flags&~FMASK) | flags;
393  	netif_start_queue(m->dev);
394  	return 0;
395  }
396  
teql_master_close(struct net_device * dev)397  static int teql_master_close(struct net_device *dev)
398  {
399  	netif_stop_queue(dev);
400  	return 0;
401  }
402  
teql_master_stats64(struct net_device * dev,struct rtnl_link_stats64 * stats)403  static void teql_master_stats64(struct net_device *dev,
404  				struct rtnl_link_stats64 *stats)
405  {
406  	struct teql_master *m = netdev_priv(dev);
407  
408  	stats->tx_packets	= m->tx_packets;
409  	stats->tx_bytes		= m->tx_bytes;
410  	stats->tx_errors	= m->tx_errors;
411  	stats->tx_dropped	= m->tx_dropped;
412  }
413  
teql_master_mtu(struct net_device * dev,int new_mtu)414  static int teql_master_mtu(struct net_device *dev, int new_mtu)
415  {
416  	struct teql_master *m = netdev_priv(dev);
417  	struct Qdisc *q;
418  
419  	q = m->slaves;
420  	if (q) {
421  		do {
422  			if (new_mtu > qdisc_dev(q)->mtu)
423  				return -EINVAL;
424  		} while ((q = NEXT_SLAVE(q)) != m->slaves);
425  	}
426  
427  	WRITE_ONCE(dev->mtu, new_mtu);
428  	return 0;
429  }
430  
431  static const struct net_device_ops teql_netdev_ops = {
432  	.ndo_open	= teql_master_open,
433  	.ndo_stop	= teql_master_close,
434  	.ndo_start_xmit	= teql_master_xmit,
435  	.ndo_get_stats64 = teql_master_stats64,
436  	.ndo_change_mtu	= teql_master_mtu,
437  };
438  
teql_master_setup(struct net_device * dev)439  static __init void teql_master_setup(struct net_device *dev)
440  {
441  	struct teql_master *master = netdev_priv(dev);
442  	struct Qdisc_ops *ops = &master->qops;
443  
444  	master->dev	= dev;
445  	ops->priv_size  = sizeof(struct teql_sched_data);
446  
447  	ops->enqueue	=	teql_enqueue;
448  	ops->dequeue	=	teql_dequeue;
449  	ops->peek	=	teql_peek;
450  	ops->init	=	teql_qdisc_init;
451  	ops->reset	=	teql_reset;
452  	ops->destroy	=	teql_destroy;
453  	ops->owner	=	THIS_MODULE;
454  
455  	dev->netdev_ops =       &teql_netdev_ops;
456  	dev->type		= ARPHRD_VOID;
457  	dev->mtu		= 1500;
458  	dev->min_mtu		= 68;
459  	dev->max_mtu		= 65535;
460  	dev->tx_queue_len	= 100;
461  	dev->flags		= IFF_NOARP;
462  	dev->hard_header_len	= LL_MAX_HEADER;
463  	netif_keep_dst(dev);
464  }
465  
466  static LIST_HEAD(master_dev_list);
467  static int max_equalizers = 1;
468  module_param(max_equalizers, int, 0);
469  MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
470  
teql_init(void)471  static int __init teql_init(void)
472  {
473  	int i;
474  	int err = -ENODEV;
475  
476  	for (i = 0; i < max_equalizers; i++) {
477  		struct net_device *dev;
478  		struct teql_master *master;
479  
480  		dev = alloc_netdev(sizeof(struct teql_master), "teql%d",
481  				   NET_NAME_UNKNOWN, teql_master_setup);
482  		if (!dev) {
483  			err = -ENOMEM;
484  			break;
485  		}
486  
487  		if ((err = register_netdev(dev))) {
488  			free_netdev(dev);
489  			break;
490  		}
491  
492  		master = netdev_priv(dev);
493  
494  		strscpy(master->qops.id, dev->name, IFNAMSIZ);
495  		err = register_qdisc(&master->qops);
496  
497  		if (err) {
498  			unregister_netdev(dev);
499  			free_netdev(dev);
500  			break;
501  		}
502  
503  		list_add_tail(&master->master_list, &master_dev_list);
504  	}
505  	return i ? 0 : err;
506  }
507  
teql_exit(void)508  static void __exit teql_exit(void)
509  {
510  	struct teql_master *master, *nxt;
511  
512  	list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
513  
514  		list_del(&master->master_list);
515  
516  		unregister_qdisc(&master->qops);
517  		unregister_netdev(master->dev);
518  		free_netdev(master->dev);
519  	}
520  }
521  
522  module_init(teql_init);
523  module_exit(teql_exit);
524  
525  MODULE_LICENSE("GPL");
526  MODULE_DESCRIPTION("True (or trivial) link equalizer qdisc");
527