1  // SPDX-License-Identifier: GPL-2.0-or-later
2  /*
3   * net/sched/cls_u32.c	Ugly (or Universal) 32bit key Packet Classifier.
4   *
5   * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6   *
7   *	The filters are packed to hash tables of key nodes
8   *	with a set of 32bit key/mask pairs at every node.
9   *	Nodes reference next level hash tables etc.
10   *
11   *	This scheme is the best universal classifier I managed to
12   *	invent; it is not super-fast, but it is not slow (provided you
13   *	program it correctly), and general enough.  And its relative
14   *	speed grows as the number of rules becomes larger.
15   *
16   *	It seems that it represents the best middle point between
17   *	speed and manageability both by human and by machine.
18   *
19   *	It is especially useful for link sharing combined with QoS;
20   *	pure RSVP doesn't need such a general approach and can use
21   *	much simpler (and faster) schemes, sort of cls_rsvp.c.
22   *
23   *	nfmark match added by Catalin(ux aka Dino) BOIE <catab at umbrella.ro>
24   */
25  
26  #include <linux/module.h>
27  #include <linux/slab.h>
28  #include <linux/types.h>
29  #include <linux/kernel.h>
30  #include <linux/string.h>
31  #include <linux/errno.h>
32  #include <linux/percpu.h>
33  #include <linux/rtnetlink.h>
34  #include <linux/skbuff.h>
35  #include <linux/bitmap.h>
36  #include <linux/netdevice.h>
37  #include <linux/hash.h>
38  #include <net/netlink.h>
39  #include <net/act_api.h>
40  #include <net/pkt_cls.h>
41  #include <linux/idr.h>
42  #include <net/tc_wrapper.h>
43  
44  struct tc_u_knode {
45  	struct tc_u_knode __rcu	*next;
46  	u32			handle;
47  	struct tc_u_hnode __rcu	*ht_up;
48  	struct tcf_exts		exts;
49  	int			ifindex;
50  	u8			fshift;
51  	struct tcf_result	res;
52  	struct tc_u_hnode __rcu	*ht_down;
53  #ifdef CONFIG_CLS_U32_PERF
54  	struct tc_u32_pcnt __percpu *pf;
55  #endif
56  	u32			flags;
57  	unsigned int		in_hw_count;
58  #ifdef CONFIG_CLS_U32_MARK
59  	u32			val;
60  	u32			mask;
61  	u32 __percpu		*pcpu_success;
62  #endif
63  	struct rcu_work		rwork;
64  	/* The 'sel' field MUST be the last field in structure to allow for
65  	 * tc_u32_keys allocated at end of structure.
66  	 */
67  	struct tc_u32_sel	sel;
68  };
69  
70  struct tc_u_hnode {
71  	struct tc_u_hnode __rcu	*next;
72  	u32			handle;
73  	u32			prio;
74  	refcount_t		refcnt;
75  	unsigned int		divisor;
76  	struct idr		handle_idr;
77  	bool			is_root;
78  	struct rcu_head		rcu;
79  	u32			flags;
80  	/* The 'ht' field MUST be the last field in structure to allow for
81  	 * more entries allocated at end of structure.
82  	 */
83  	struct tc_u_knode __rcu	*ht[];
84  };
85  
86  struct tc_u_common {
87  	struct tc_u_hnode __rcu	*hlist;
88  	void			*ptr;
89  	refcount_t		refcnt;
90  	struct idr		handle_idr;
91  	struct hlist_node	hnode;
92  	long			knodes;
93  };
94  
handle2id(u32 h)95  static u32 handle2id(u32 h)
96  {
97  	return ((h & 0x80000000) ? ((h >> 20) & 0x7FF) : h);
98  }
99  
id2handle(u32 id)100  static u32 id2handle(u32 id)
101  {
102  	return (id | 0x800U) << 20;
103  }
104  
u32_hash_fold(__be32 key,const struct tc_u32_sel * sel,u8 fshift)105  static inline unsigned int u32_hash_fold(__be32 key,
106  					 const struct tc_u32_sel *sel,
107  					 u8 fshift)
108  {
109  	unsigned int h = ntohl(key & sel->hmask) >> fshift;
110  
111  	return h;
112  }
113  
u32_classify(struct sk_buff * skb,const struct tcf_proto * tp,struct tcf_result * res)114  TC_INDIRECT_SCOPE int u32_classify(struct sk_buff *skb,
115  				   const struct tcf_proto *tp,
116  				   struct tcf_result *res)
117  {
118  	struct {
119  		struct tc_u_knode *knode;
120  		unsigned int	  off;
121  	} stack[TC_U32_MAXDEPTH];
122  
123  	struct tc_u_hnode *ht = rcu_dereference_bh(tp->root);
124  	unsigned int off = skb_network_offset(skb);
125  	struct tc_u_knode *n;
126  	int sdepth = 0;
127  	int off2 = 0;
128  	int sel = 0;
129  #ifdef CONFIG_CLS_U32_PERF
130  	int j;
131  #endif
132  	int i, r;
133  
134  next_ht:
135  	n = rcu_dereference_bh(ht->ht[sel]);
136  
137  next_knode:
138  	if (n) {
139  		struct tc_u32_key *key = n->sel.keys;
140  
141  #ifdef CONFIG_CLS_U32_PERF
142  		__this_cpu_inc(n->pf->rcnt);
143  		j = 0;
144  #endif
145  
146  		if (tc_skip_sw(n->flags)) {
147  			n = rcu_dereference_bh(n->next);
148  			goto next_knode;
149  		}
150  
151  #ifdef CONFIG_CLS_U32_MARK
152  		if ((skb->mark & n->mask) != n->val) {
153  			n = rcu_dereference_bh(n->next);
154  			goto next_knode;
155  		} else {
156  			__this_cpu_inc(*n->pcpu_success);
157  		}
158  #endif
159  
160  		for (i = n->sel.nkeys; i > 0; i--, key++) {
161  			int toff = off + key->off + (off2 & key->offmask);
162  			__be32 *data, hdata;
163  
164  			if (skb_headroom(skb) + toff > INT_MAX)
165  				goto out;
166  
167  			data = skb_header_pointer(skb, toff, 4, &hdata);
168  			if (!data)
169  				goto out;
170  			if ((*data ^ key->val) & key->mask) {
171  				n = rcu_dereference_bh(n->next);
172  				goto next_knode;
173  			}
174  #ifdef CONFIG_CLS_U32_PERF
175  			__this_cpu_inc(n->pf->kcnts[j]);
176  			j++;
177  #endif
178  		}
179  
180  		ht = rcu_dereference_bh(n->ht_down);
181  		if (!ht) {
182  check_terminal:
183  			if (n->sel.flags & TC_U32_TERMINAL) {
184  
185  				*res = n->res;
186  				if (!tcf_match_indev(skb, n->ifindex)) {
187  					n = rcu_dereference_bh(n->next);
188  					goto next_knode;
189  				}
190  #ifdef CONFIG_CLS_U32_PERF
191  				__this_cpu_inc(n->pf->rhit);
192  #endif
193  				r = tcf_exts_exec(skb, &n->exts, res);
194  				if (r < 0) {
195  					n = rcu_dereference_bh(n->next);
196  					goto next_knode;
197  				}
198  
199  				return r;
200  			}
201  			n = rcu_dereference_bh(n->next);
202  			goto next_knode;
203  		}
204  
205  		/* PUSH */
206  		if (sdepth >= TC_U32_MAXDEPTH)
207  			goto deadloop;
208  		stack[sdepth].knode = n;
209  		stack[sdepth].off = off;
210  		sdepth++;
211  
212  		ht = rcu_dereference_bh(n->ht_down);
213  		sel = 0;
214  		if (ht->divisor) {
215  			__be32 *data, hdata;
216  
217  			data = skb_header_pointer(skb, off + n->sel.hoff, 4,
218  						  &hdata);
219  			if (!data)
220  				goto out;
221  			sel = ht->divisor & u32_hash_fold(*data, &n->sel,
222  							  n->fshift);
223  		}
224  		if (!(n->sel.flags & (TC_U32_VAROFFSET | TC_U32_OFFSET | TC_U32_EAT)))
225  			goto next_ht;
226  
227  		if (n->sel.flags & (TC_U32_OFFSET | TC_U32_VAROFFSET)) {
228  			off2 = n->sel.off + 3;
229  			if (n->sel.flags & TC_U32_VAROFFSET) {
230  				__be16 *data, hdata;
231  
232  				data = skb_header_pointer(skb,
233  							  off + n->sel.offoff,
234  							  2, &hdata);
235  				if (!data)
236  					goto out;
237  				off2 += ntohs(n->sel.offmask & *data) >>
238  					n->sel.offshift;
239  			}
240  			off2 &= ~3;
241  		}
242  		if (n->sel.flags & TC_U32_EAT) {
243  			off += off2;
244  			off2 = 0;
245  		}
246  
247  		if (off < skb->len)
248  			goto next_ht;
249  	}
250  
251  	/* POP */
252  	if (sdepth--) {
253  		n = stack[sdepth].knode;
254  		ht = rcu_dereference_bh(n->ht_up);
255  		off = stack[sdepth].off;
256  		goto check_terminal;
257  	}
258  out:
259  	return -1;
260  
261  deadloop:
262  	net_warn_ratelimited("cls_u32: dead loop\n");
263  	return -1;
264  }
265  
u32_lookup_ht(struct tc_u_common * tp_c,u32 handle)266  static struct tc_u_hnode *u32_lookup_ht(struct tc_u_common *tp_c, u32 handle)
267  {
268  	struct tc_u_hnode *ht;
269  
270  	for (ht = rtnl_dereference(tp_c->hlist);
271  	     ht;
272  	     ht = rtnl_dereference(ht->next))
273  		if (ht->handle == handle)
274  			break;
275  
276  	return ht;
277  }
278  
u32_lookup_key(struct tc_u_hnode * ht,u32 handle)279  static struct tc_u_knode *u32_lookup_key(struct tc_u_hnode *ht, u32 handle)
280  {
281  	unsigned int sel;
282  	struct tc_u_knode *n = NULL;
283  
284  	sel = TC_U32_HASH(handle);
285  	if (sel > ht->divisor)
286  		goto out;
287  
288  	for (n = rtnl_dereference(ht->ht[sel]);
289  	     n;
290  	     n = rtnl_dereference(n->next))
291  		if (n->handle == handle)
292  			break;
293  out:
294  	return n;
295  }
296  
297  
u32_get(struct tcf_proto * tp,u32 handle)298  static void *u32_get(struct tcf_proto *tp, u32 handle)
299  {
300  	struct tc_u_hnode *ht;
301  	struct tc_u_common *tp_c = tp->data;
302  
303  	if (TC_U32_HTID(handle) == TC_U32_ROOT)
304  		ht = rtnl_dereference(tp->root);
305  	else
306  		ht = u32_lookup_ht(tp_c, TC_U32_HTID(handle));
307  
308  	if (!ht)
309  		return NULL;
310  
311  	if (TC_U32_KEY(handle) == 0)
312  		return ht;
313  
314  	return u32_lookup_key(ht, handle);
315  }
316  
317  /* Protected by rtnl lock */
gen_new_htid(struct tc_u_common * tp_c,struct tc_u_hnode * ptr)318  static u32 gen_new_htid(struct tc_u_common *tp_c, struct tc_u_hnode *ptr)
319  {
320  	int id = idr_alloc_cyclic(&tp_c->handle_idr, ptr, 1, 0x7FF, GFP_KERNEL);
321  	if (id < 0)
322  		return 0;
323  	return id2handle(id);
324  }
325  
326  static struct hlist_head *tc_u_common_hash;
327  
328  #define U32_HASH_SHIFT 10
329  #define U32_HASH_SIZE (1 << U32_HASH_SHIFT)
330  
tc_u_common_ptr(const struct tcf_proto * tp)331  static void *tc_u_common_ptr(const struct tcf_proto *tp)
332  {
333  	struct tcf_block *block = tp->chain->block;
334  
335  	/* The block sharing is currently supported only
336  	 * for classless qdiscs. In that case we use block
337  	 * for tc_u_common identification. In case the
338  	 * block is not shared, block->q is a valid pointer
339  	 * and we can use that. That works for classful qdiscs.
340  	 */
341  	if (tcf_block_shared(block))
342  		return block;
343  	else
344  		return block->q;
345  }
346  
tc_u_hash(void * key)347  static struct hlist_head *tc_u_hash(void *key)
348  {
349  	return tc_u_common_hash + hash_ptr(key, U32_HASH_SHIFT);
350  }
351  
tc_u_common_find(void * key)352  static struct tc_u_common *tc_u_common_find(void *key)
353  {
354  	struct tc_u_common *tc;
355  	hlist_for_each_entry(tc, tc_u_hash(key), hnode) {
356  		if (tc->ptr == key)
357  			return tc;
358  	}
359  	return NULL;
360  }
361  
u32_init(struct tcf_proto * tp)362  static int u32_init(struct tcf_proto *tp)
363  {
364  	struct tc_u_hnode *root_ht;
365  	void *key = tc_u_common_ptr(tp);
366  	struct tc_u_common *tp_c = tc_u_common_find(key);
367  
368  	root_ht = kzalloc(struct_size(root_ht, ht, 1), GFP_KERNEL);
369  	if (root_ht == NULL)
370  		return -ENOBUFS;
371  
372  	refcount_set(&root_ht->refcnt, 1);
373  	root_ht->handle = tp_c ? gen_new_htid(tp_c, root_ht) : id2handle(0);
374  	root_ht->prio = tp->prio;
375  	root_ht->is_root = true;
376  	idr_init(&root_ht->handle_idr);
377  
378  	if (tp_c == NULL) {
379  		tp_c = kzalloc(sizeof(*tp_c), GFP_KERNEL);
380  		if (tp_c == NULL) {
381  			kfree(root_ht);
382  			return -ENOBUFS;
383  		}
384  		refcount_set(&tp_c->refcnt, 1);
385  		tp_c->ptr = key;
386  		INIT_HLIST_NODE(&tp_c->hnode);
387  		idr_init(&tp_c->handle_idr);
388  
389  		hlist_add_head(&tp_c->hnode, tc_u_hash(key));
390  	} else {
391  		refcount_inc(&tp_c->refcnt);
392  	}
393  
394  	RCU_INIT_POINTER(root_ht->next, tp_c->hlist);
395  	rcu_assign_pointer(tp_c->hlist, root_ht);
396  
397  	/* root_ht must be destroyed when tcf_proto is destroyed */
398  	rcu_assign_pointer(tp->root, root_ht);
399  	tp->data = tp_c;
400  	return 0;
401  }
402  
__u32_destroy_key(struct tc_u_knode * n)403  static void __u32_destroy_key(struct tc_u_knode *n)
404  {
405  	struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);
406  
407  	tcf_exts_destroy(&n->exts);
408  	if (ht && refcount_dec_and_test(&ht->refcnt))
409  		kfree(ht);
410  	kfree(n);
411  }
412  
u32_destroy_key(struct tc_u_knode * n,bool free_pf)413  static void u32_destroy_key(struct tc_u_knode *n, bool free_pf)
414  {
415  	tcf_exts_put_net(&n->exts);
416  #ifdef CONFIG_CLS_U32_PERF
417  	if (free_pf)
418  		free_percpu(n->pf);
419  #endif
420  #ifdef CONFIG_CLS_U32_MARK
421  	if (free_pf)
422  		free_percpu(n->pcpu_success);
423  #endif
424  	__u32_destroy_key(n);
425  }
426  
427  /* u32_delete_key_rcu should be called when free'ing a copied
428   * version of a tc_u_knode obtained from u32_init_knode(). When
429   * copies are obtained from u32_init_knode() the statistics are
430   * shared between the old and new copies to allow readers to
431   * continue to update the statistics during the copy. To support
432   * this the u32_delete_key_rcu variant does not free the percpu
433   * statistics.
434   */
u32_delete_key_work(struct work_struct * work)435  static void u32_delete_key_work(struct work_struct *work)
436  {
437  	struct tc_u_knode *key = container_of(to_rcu_work(work),
438  					      struct tc_u_knode,
439  					      rwork);
440  	rtnl_lock();
441  	u32_destroy_key(key, false);
442  	rtnl_unlock();
443  }
444  
445  /* u32_delete_key_freepf_rcu is the rcu callback variant
446   * that free's the entire structure including the statistics
447   * percpu variables. Only use this if the key is not a copy
448   * returned by u32_init_knode(). See u32_delete_key_rcu()
449   * for the variant that should be used with keys return from
450   * u32_init_knode()
451   */
u32_delete_key_freepf_work(struct work_struct * work)452  static void u32_delete_key_freepf_work(struct work_struct *work)
453  {
454  	struct tc_u_knode *key = container_of(to_rcu_work(work),
455  					      struct tc_u_knode,
456  					      rwork);
457  	rtnl_lock();
458  	u32_destroy_key(key, true);
459  	rtnl_unlock();
460  }
461  
u32_delete_key(struct tcf_proto * tp,struct tc_u_knode * key)462  static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
463  {
464  	struct tc_u_common *tp_c = tp->data;
465  	struct tc_u_knode __rcu **kp;
466  	struct tc_u_knode *pkp;
467  	struct tc_u_hnode *ht = rtnl_dereference(key->ht_up);
468  
469  	if (ht) {
470  		kp = &ht->ht[TC_U32_HASH(key->handle)];
471  		for (pkp = rtnl_dereference(*kp); pkp;
472  		     kp = &pkp->next, pkp = rtnl_dereference(*kp)) {
473  			if (pkp == key) {
474  				RCU_INIT_POINTER(*kp, key->next);
475  				tp_c->knodes--;
476  
477  				tcf_unbind_filter(tp, &key->res);
478  				idr_remove(&ht->handle_idr, key->handle);
479  				tcf_exts_get_net(&key->exts);
480  				tcf_queue_work(&key->rwork, u32_delete_key_freepf_work);
481  				return 0;
482  			}
483  		}
484  	}
485  	WARN_ON(1);
486  	return 0;
487  }
488  
u32_clear_hw_hnode(struct tcf_proto * tp,struct tc_u_hnode * h,struct netlink_ext_ack * extack)489  static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
490  			       struct netlink_ext_ack *extack)
491  {
492  	struct tcf_block *block = tp->chain->block;
493  	struct tc_cls_u32_offload cls_u32 = {};
494  
495  	tc_cls_common_offload_init(&cls_u32.common, tp, h->flags, extack);
496  	cls_u32.command = TC_CLSU32_DELETE_HNODE;
497  	cls_u32.hnode.divisor = h->divisor;
498  	cls_u32.hnode.handle = h->handle;
499  	cls_u32.hnode.prio = h->prio;
500  
501  	tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, false, true);
502  }
503  
u32_replace_hw_hnode(struct tcf_proto * tp,struct tc_u_hnode * h,u32 flags,struct netlink_ext_ack * extack)504  static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
505  				u32 flags, struct netlink_ext_ack *extack)
506  {
507  	struct tcf_block *block = tp->chain->block;
508  	struct tc_cls_u32_offload cls_u32 = {};
509  	bool skip_sw = tc_skip_sw(flags);
510  	bool offloaded = false;
511  	int err;
512  
513  	tc_cls_common_offload_init(&cls_u32.common, tp, flags, extack);
514  	cls_u32.command = TC_CLSU32_NEW_HNODE;
515  	cls_u32.hnode.divisor = h->divisor;
516  	cls_u32.hnode.handle = h->handle;
517  	cls_u32.hnode.prio = h->prio;
518  
519  	err = tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, skip_sw, true);
520  	if (err < 0) {
521  		u32_clear_hw_hnode(tp, h, NULL);
522  		return err;
523  	} else if (err > 0) {
524  		offloaded = true;
525  	}
526  
527  	if (skip_sw && !offloaded)
528  		return -EINVAL;
529  
530  	return 0;
531  }
532  
u32_remove_hw_knode(struct tcf_proto * tp,struct tc_u_knode * n,struct netlink_ext_ack * extack)533  static void u32_remove_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
534  				struct netlink_ext_ack *extack)
535  {
536  	struct tcf_block *block = tp->chain->block;
537  	struct tc_cls_u32_offload cls_u32 = {};
538  
539  	tc_cls_common_offload_init(&cls_u32.common, tp, n->flags, extack);
540  	cls_u32.command = TC_CLSU32_DELETE_KNODE;
541  	cls_u32.knode.handle = n->handle;
542  
543  	tc_setup_cb_destroy(block, tp, TC_SETUP_CLSU32, &cls_u32, false,
544  			    &n->flags, &n->in_hw_count, true);
545  }
546  
u32_replace_hw_knode(struct tcf_proto * tp,struct tc_u_knode * n,u32 flags,struct netlink_ext_ack * extack)547  static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
548  				u32 flags, struct netlink_ext_ack *extack)
549  {
550  	struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);
551  	struct tcf_block *block = tp->chain->block;
552  	struct tc_cls_u32_offload cls_u32 = {};
553  	bool skip_sw = tc_skip_sw(flags);
554  	int err;
555  
556  	tc_cls_common_offload_init(&cls_u32.common, tp, flags, extack);
557  	cls_u32.command = TC_CLSU32_REPLACE_KNODE;
558  	cls_u32.knode.handle = n->handle;
559  	cls_u32.knode.fshift = n->fshift;
560  #ifdef CONFIG_CLS_U32_MARK
561  	cls_u32.knode.val = n->val;
562  	cls_u32.knode.mask = n->mask;
563  #else
564  	cls_u32.knode.val = 0;
565  	cls_u32.knode.mask = 0;
566  #endif
567  	cls_u32.knode.sel = &n->sel;
568  	cls_u32.knode.res = &n->res;
569  	cls_u32.knode.exts = &n->exts;
570  	if (n->ht_down)
571  		cls_u32.knode.link_handle = ht->handle;
572  
573  	err = tc_setup_cb_add(block, tp, TC_SETUP_CLSU32, &cls_u32, skip_sw,
574  			      &n->flags, &n->in_hw_count, true);
575  	if (err) {
576  		u32_remove_hw_knode(tp, n, NULL);
577  		return err;
578  	}
579  
580  	if (skip_sw && !(n->flags & TCA_CLS_FLAGS_IN_HW))
581  		return -EINVAL;
582  
583  	return 0;
584  }
585  
u32_clear_hnode(struct tcf_proto * tp,struct tc_u_hnode * ht,struct netlink_ext_ack * extack)586  static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
587  			    struct netlink_ext_ack *extack)
588  {
589  	struct tc_u_common *tp_c = tp->data;
590  	struct tc_u_knode *n;
591  	unsigned int h;
592  
593  	for (h = 0; h <= ht->divisor; h++) {
594  		while ((n = rtnl_dereference(ht->ht[h])) != NULL) {
595  			RCU_INIT_POINTER(ht->ht[h],
596  					 rtnl_dereference(n->next));
597  			tp_c->knodes--;
598  			tcf_unbind_filter(tp, &n->res);
599  			u32_remove_hw_knode(tp, n, extack);
600  			idr_remove(&ht->handle_idr, n->handle);
601  			if (tcf_exts_get_net(&n->exts))
602  				tcf_queue_work(&n->rwork, u32_delete_key_freepf_work);
603  			else
604  				u32_destroy_key(n, true);
605  		}
606  	}
607  }
608  
u32_destroy_hnode(struct tcf_proto * tp,struct tc_u_hnode * ht,struct netlink_ext_ack * extack)609  static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
610  			     struct netlink_ext_ack *extack)
611  {
612  	struct tc_u_common *tp_c = tp->data;
613  	struct tc_u_hnode __rcu **hn;
614  	struct tc_u_hnode *phn;
615  
616  	u32_clear_hnode(tp, ht, extack);
617  
618  	hn = &tp_c->hlist;
619  	for (phn = rtnl_dereference(*hn);
620  	     phn;
621  	     hn = &phn->next, phn = rtnl_dereference(*hn)) {
622  		if (phn == ht) {
623  			u32_clear_hw_hnode(tp, ht, extack);
624  			idr_destroy(&ht->handle_idr);
625  			idr_remove(&tp_c->handle_idr, handle2id(ht->handle));
626  			RCU_INIT_POINTER(*hn, ht->next);
627  			kfree_rcu(ht, rcu);
628  			return 0;
629  		}
630  	}
631  
632  	return -ENOENT;
633  }
634  
u32_destroy(struct tcf_proto * tp,bool rtnl_held,struct netlink_ext_ack * extack)635  static void u32_destroy(struct tcf_proto *tp, bool rtnl_held,
636  			struct netlink_ext_ack *extack)
637  {
638  	struct tc_u_common *tp_c = tp->data;
639  	struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
640  
641  	WARN_ON(root_ht == NULL);
642  
643  	if (root_ht && refcount_dec_and_test(&root_ht->refcnt))
644  		u32_destroy_hnode(tp, root_ht, extack);
645  
646  	if (refcount_dec_and_test(&tp_c->refcnt)) {
647  		struct tc_u_hnode *ht;
648  
649  		hlist_del(&tp_c->hnode);
650  
651  		while ((ht = rtnl_dereference(tp_c->hlist)) != NULL) {
652  			u32_clear_hnode(tp, ht, extack);
653  			RCU_INIT_POINTER(tp_c->hlist, ht->next);
654  
655  			/* u32_destroy_key() will later free ht for us, if it's
656  			 * still referenced by some knode
657  			 */
658  			if (refcount_dec_and_test(&ht->refcnt))
659  				kfree_rcu(ht, rcu);
660  		}
661  
662  		idr_destroy(&tp_c->handle_idr);
663  		kfree(tp_c);
664  	}
665  
666  	tp->data = NULL;
667  }
668  
u32_delete(struct tcf_proto * tp,void * arg,bool * last,bool rtnl_held,struct netlink_ext_ack * extack)669  static int u32_delete(struct tcf_proto *tp, void *arg, bool *last,
670  		      bool rtnl_held, struct netlink_ext_ack *extack)
671  {
672  	struct tc_u_hnode *ht = arg;
673  	struct tc_u_common *tp_c = tp->data;
674  	int ret = 0;
675  
676  	if (TC_U32_KEY(ht->handle)) {
677  		u32_remove_hw_knode(tp, (struct tc_u_knode *)ht, extack);
678  		ret = u32_delete_key(tp, (struct tc_u_knode *)ht);
679  		goto out;
680  	}
681  
682  	if (ht->is_root) {
683  		NL_SET_ERR_MSG_MOD(extack, "Not allowed to delete root node");
684  		return -EINVAL;
685  	}
686  
687  	if (refcount_dec_if_one(&ht->refcnt)) {
688  		u32_destroy_hnode(tp, ht, extack);
689  	} else {
690  		NL_SET_ERR_MSG_MOD(extack, "Can not delete in-use filter");
691  		return -EBUSY;
692  	}
693  
694  out:
695  	*last = refcount_read(&tp_c->refcnt) == 1 && tp_c->knodes == 0;
696  	return ret;
697  }
698  
gen_new_kid(struct tc_u_hnode * ht,u32 htid)699  static u32 gen_new_kid(struct tc_u_hnode *ht, u32 htid)
700  {
701  	u32 index = htid | 0x800;
702  	u32 max = htid | 0xFFF;
703  
704  	if (idr_alloc_u32(&ht->handle_idr, NULL, &index, max, GFP_KERNEL)) {
705  		index = htid + 1;
706  		if (idr_alloc_u32(&ht->handle_idr, NULL, &index, max,
707  				 GFP_KERNEL))
708  			index = max;
709  	}
710  
711  	return index;
712  }
713  
714  static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
715  	[TCA_U32_CLASSID]	= { .type = NLA_U32 },
716  	[TCA_U32_HASH]		= { .type = NLA_U32 },
717  	[TCA_U32_LINK]		= { .type = NLA_U32 },
718  	[TCA_U32_DIVISOR]	= { .type = NLA_U32 },
719  	[TCA_U32_SEL]		= { .len = sizeof(struct tc_u32_sel) },
720  	[TCA_U32_INDEV]		= { .type = NLA_STRING, .len = IFNAMSIZ },
721  	[TCA_U32_MARK]		= { .len = sizeof(struct tc_u32_mark) },
722  	[TCA_U32_FLAGS]		= { .type = NLA_U32 },
723  };
724  
u32_unbind_filter(struct tcf_proto * tp,struct tc_u_knode * n,struct nlattr ** tb)725  static void u32_unbind_filter(struct tcf_proto *tp, struct tc_u_knode *n,
726  			      struct nlattr **tb)
727  {
728  	if (tb[TCA_U32_CLASSID])
729  		tcf_unbind_filter(tp, &n->res);
730  }
731  
u32_bind_filter(struct tcf_proto * tp,struct tc_u_knode * n,unsigned long base,struct nlattr ** tb)732  static void u32_bind_filter(struct tcf_proto *tp, struct tc_u_knode *n,
733  			    unsigned long base, struct nlattr **tb)
734  {
735  	if (tb[TCA_U32_CLASSID]) {
736  		n->res.classid = nla_get_u32(tb[TCA_U32_CLASSID]);
737  		tcf_bind_filter(tp, &n->res, base);
738  	}
739  }
740  
u32_set_parms(struct net * net,struct tcf_proto * tp,struct tc_u_knode * n,struct nlattr ** tb,struct nlattr * est,u32 flags,u32 fl_flags,struct netlink_ext_ack * extack)741  static int u32_set_parms(struct net *net, struct tcf_proto *tp,
742  			 struct tc_u_knode *n, struct nlattr **tb,
743  			 struct nlattr *est, u32 flags, u32 fl_flags,
744  			 struct netlink_ext_ack *extack)
745  {
746  	int err, ifindex = -1;
747  
748  	err = tcf_exts_validate_ex(net, tp, tb, est, &n->exts, flags,
749  				   fl_flags, extack);
750  	if (err < 0)
751  		return err;
752  
753  	if (tb[TCA_U32_INDEV]) {
754  		ifindex = tcf_change_indev(net, tb[TCA_U32_INDEV], extack);
755  		if (ifindex < 0)
756  			return -EINVAL;
757  	}
758  
759  	if (tb[TCA_U32_LINK]) {
760  		u32 handle = nla_get_u32(tb[TCA_U32_LINK]);
761  		struct tc_u_hnode *ht_down = NULL, *ht_old;
762  
763  		if (TC_U32_KEY(handle)) {
764  			NL_SET_ERR_MSG_MOD(extack, "u32 Link handle must be a hash table");
765  			return -EINVAL;
766  		}
767  
768  		if (handle) {
769  			ht_down = u32_lookup_ht(tp->data, handle);
770  
771  			if (!ht_down) {
772  				NL_SET_ERR_MSG_MOD(extack, "Link hash table not found");
773  				return -EINVAL;
774  			}
775  			if (ht_down->is_root) {
776  				NL_SET_ERR_MSG_MOD(extack, "Not linking to root node");
777  				return -EINVAL;
778  			}
779  			refcount_inc(&ht_down->refcnt);
780  		}
781  
782  		ht_old = rtnl_dereference(n->ht_down);
783  		rcu_assign_pointer(n->ht_down, ht_down);
784  
785  		if (ht_old)
786  			refcount_dec(&ht_old->refcnt);
787  	}
788  
789  	if (ifindex >= 0)
790  		n->ifindex = ifindex;
791  
792  	return 0;
793  }
794  
u32_replace_knode(struct tcf_proto * tp,struct tc_u_common * tp_c,struct tc_u_knode * n)795  static void u32_replace_knode(struct tcf_proto *tp, struct tc_u_common *tp_c,
796  			      struct tc_u_knode *n)
797  {
798  	struct tc_u_knode __rcu **ins;
799  	struct tc_u_knode *pins;
800  	struct tc_u_hnode *ht;
801  
802  	if (TC_U32_HTID(n->handle) == TC_U32_ROOT)
803  		ht = rtnl_dereference(tp->root);
804  	else
805  		ht = u32_lookup_ht(tp_c, TC_U32_HTID(n->handle));
806  
807  	ins = &ht->ht[TC_U32_HASH(n->handle)];
808  
809  	/* The node must always exist for it to be replaced if this is not the
810  	 * case then something went very wrong elsewhere.
811  	 */
812  	for (pins = rtnl_dereference(*ins); ;
813  	     ins = &pins->next, pins = rtnl_dereference(*ins))
814  		if (pins->handle == n->handle)
815  			break;
816  
817  	idr_replace(&ht->handle_idr, n, n->handle);
818  	RCU_INIT_POINTER(n->next, pins->next);
819  	rcu_assign_pointer(*ins, n);
820  }
821  
u32_init_knode(struct net * net,struct tcf_proto * tp,struct tc_u_knode * n)822  static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp,
823  					 struct tc_u_knode *n)
824  {
825  	struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);
826  	struct tc_u32_sel *s = &n->sel;
827  	struct tc_u_knode *new;
828  
829  	new = kzalloc(struct_size(new, sel.keys, s->nkeys), GFP_KERNEL);
830  	if (!new)
831  		return NULL;
832  
833  	RCU_INIT_POINTER(new->next, n->next);
834  	new->handle = n->handle;
835  	RCU_INIT_POINTER(new->ht_up, n->ht_up);
836  
837  	new->ifindex = n->ifindex;
838  	new->fshift = n->fshift;
839  	new->flags = n->flags;
840  	RCU_INIT_POINTER(new->ht_down, ht);
841  
842  #ifdef CONFIG_CLS_U32_PERF
843  	/* Statistics may be incremented by readers during update
844  	 * so we must keep them in tact. When the node is later destroyed
845  	 * a special destroy call must be made to not free the pf memory.
846  	 */
847  	new->pf = n->pf;
848  #endif
849  
850  #ifdef CONFIG_CLS_U32_MARK
851  	new->val = n->val;
852  	new->mask = n->mask;
853  	/* Similarly success statistics must be moved as pointers */
854  	new->pcpu_success = n->pcpu_success;
855  #endif
856  	memcpy(&new->sel, s, struct_size(s, keys, s->nkeys));
857  
858  	if (tcf_exts_init(&new->exts, net, TCA_U32_ACT, TCA_U32_POLICE)) {
859  		kfree(new);
860  		return NULL;
861  	}
862  
863  	/* bump reference count as long as we hold pointer to structure */
864  	if (ht)
865  		refcount_inc(&ht->refcnt);
866  
867  	return new;
868  }
869  
u32_change(struct net * net,struct sk_buff * in_skb,struct tcf_proto * tp,unsigned long base,u32 handle,struct nlattr ** tca,void ** arg,u32 flags,struct netlink_ext_ack * extack)870  static int u32_change(struct net *net, struct sk_buff *in_skb,
871  		      struct tcf_proto *tp, unsigned long base, u32 handle,
872  		      struct nlattr **tca, void **arg, u32 flags,
873  		      struct netlink_ext_ack *extack)
874  {
875  	struct tc_u_common *tp_c = tp->data;
876  	struct tc_u_hnode *ht;
877  	struct tc_u_knode *n;
878  	struct tc_u32_sel *s;
879  	struct nlattr *opt = tca[TCA_OPTIONS];
880  	struct nlattr *tb[TCA_U32_MAX + 1];
881  	u32 htid, userflags = 0;
882  	size_t sel_size;
883  	int err;
884  
885  	if (!opt) {
886  		if (handle) {
887  			NL_SET_ERR_MSG_MOD(extack, "Filter handle requires options");
888  			return -EINVAL;
889  		} else {
890  			return 0;
891  		}
892  	}
893  
894  	err = nla_parse_nested_deprecated(tb, TCA_U32_MAX, opt, u32_policy,
895  					  extack);
896  	if (err < 0)
897  		return err;
898  
899  	if (tb[TCA_U32_FLAGS]) {
900  		userflags = nla_get_u32(tb[TCA_U32_FLAGS]);
901  		if (!tc_flags_valid(userflags)) {
902  			NL_SET_ERR_MSG_MOD(extack, "Invalid filter flags");
903  			return -EINVAL;
904  		}
905  	}
906  
907  	n = *arg;
908  	if (n) {
909  		struct tc_u_knode *new;
910  
911  		if (TC_U32_KEY(n->handle) == 0) {
912  			NL_SET_ERR_MSG_MOD(extack, "Key node id cannot be zero");
913  			return -EINVAL;
914  		}
915  
916  		if ((n->flags ^ userflags) &
917  		    ~(TCA_CLS_FLAGS_IN_HW | TCA_CLS_FLAGS_NOT_IN_HW)) {
918  			NL_SET_ERR_MSG_MOD(extack, "Key node flags do not match passed flags");
919  			return -EINVAL;
920  		}
921  
922  		new = u32_init_knode(net, tp, n);
923  		if (!new)
924  			return -ENOMEM;
925  
926  		err = u32_set_parms(net, tp, new, tb, tca[TCA_RATE],
927  				    flags, new->flags, extack);
928  
929  		if (err) {
930  			__u32_destroy_key(new);
931  			return err;
932  		}
933  
934  		u32_bind_filter(tp, new, base, tb);
935  
936  		err = u32_replace_hw_knode(tp, new, flags, extack);
937  		if (err) {
938  			u32_unbind_filter(tp, new, tb);
939  
940  			if (tb[TCA_U32_LINK]) {
941  				struct tc_u_hnode *ht_old;
942  
943  				ht_old = rtnl_dereference(n->ht_down);
944  				if (ht_old)
945  					refcount_inc(&ht_old->refcnt);
946  			}
947  			__u32_destroy_key(new);
948  			return err;
949  		}
950  
951  		if (!tc_in_hw(new->flags))
952  			new->flags |= TCA_CLS_FLAGS_NOT_IN_HW;
953  
954  		u32_replace_knode(tp, tp_c, new);
955  		tcf_unbind_filter(tp, &n->res);
956  		tcf_exts_get_net(&n->exts);
957  		tcf_queue_work(&n->rwork, u32_delete_key_work);
958  		return 0;
959  	}
960  
961  	if (tb[TCA_U32_DIVISOR]) {
962  		unsigned int divisor = nla_get_u32(tb[TCA_U32_DIVISOR]);
963  
964  		if (!is_power_of_2(divisor)) {
965  			NL_SET_ERR_MSG_MOD(extack, "Divisor is not a power of 2");
966  			return -EINVAL;
967  		}
968  		if (divisor-- > 0x100) {
969  			NL_SET_ERR_MSG_MOD(extack, "Exceeded maximum 256 hash buckets");
970  			return -EINVAL;
971  		}
972  		if (TC_U32_KEY(handle)) {
973  			NL_SET_ERR_MSG_MOD(extack, "Divisor can only be used on a hash table");
974  			return -EINVAL;
975  		}
976  		ht = kzalloc(struct_size(ht, ht, divisor + 1), GFP_KERNEL);
977  		if (ht == NULL)
978  			return -ENOBUFS;
979  		if (handle == 0) {
980  			handle = gen_new_htid(tp->data, ht);
981  			if (handle == 0) {
982  				kfree(ht);
983  				return -ENOMEM;
984  			}
985  		} else {
986  			err = idr_alloc_u32(&tp_c->handle_idr, ht, &handle,
987  					    handle, GFP_KERNEL);
988  			if (err) {
989  				kfree(ht);
990  				return err;
991  			}
992  		}
993  		refcount_set(&ht->refcnt, 1);
994  		ht->divisor = divisor;
995  		ht->handle = handle;
996  		ht->prio = tp->prio;
997  		idr_init(&ht->handle_idr);
998  		ht->flags = userflags;
999  
1000  		err = u32_replace_hw_hnode(tp, ht, userflags, extack);
1001  		if (err) {
1002  			idr_remove(&tp_c->handle_idr, handle2id(handle));
1003  			kfree(ht);
1004  			return err;
1005  		}
1006  
1007  		RCU_INIT_POINTER(ht->next, tp_c->hlist);
1008  		rcu_assign_pointer(tp_c->hlist, ht);
1009  		*arg = ht;
1010  
1011  		return 0;
1012  	}
1013  
1014  	if (tb[TCA_U32_HASH]) {
1015  		htid = nla_get_u32(tb[TCA_U32_HASH]);
1016  		if (TC_U32_HTID(htid) == TC_U32_ROOT) {
1017  			ht = rtnl_dereference(tp->root);
1018  			htid = ht->handle;
1019  		} else {
1020  			ht = u32_lookup_ht(tp->data, TC_U32_HTID(htid));
1021  			if (!ht) {
1022  				NL_SET_ERR_MSG_MOD(extack, "Specified hash table not found");
1023  				return -EINVAL;
1024  			}
1025  		}
1026  	} else {
1027  		ht = rtnl_dereference(tp->root);
1028  		htid = ht->handle;
1029  	}
1030  
1031  	if (ht->divisor < TC_U32_HASH(htid)) {
1032  		NL_SET_ERR_MSG_MOD(extack, "Specified hash table buckets exceed configured value");
1033  		return -EINVAL;
1034  	}
1035  
1036  	/* At this point, we need to derive the new handle that will be used to
1037  	 * uniquely map the identity of this table match entry. The
1038  	 * identity of the entry that we need to construct is 32 bits made of:
1039  	 *     htid(12b):bucketid(8b):node/entryid(12b)
1040  	 *
1041  	 * At this point _we have the table(ht)_ in which we will insert this
1042  	 * entry. We carry the table's id in variable "htid".
1043  	 * Note that earlier code picked the ht selection either by a) the user
1044  	 * providing the htid specified via TCA_U32_HASH attribute or b) when
1045  	 * no such attribute is passed then the root ht, is default to at ID
1046  	 * 0x[800][00][000]. Rule: the root table has a single bucket with ID 0.
1047  	 * If OTOH the user passed us the htid, they may also pass a bucketid of
1048  	 * choice. 0 is fine. For example a user htid is 0x[600][01][000] it is
1049  	 * indicating hash bucketid of 1. Rule: the entry/node ID _cannot_ be
1050  	 * passed via the htid, so even if it was non-zero it will be ignored.
1051  	 *
1052  	 * We may also have a handle, if the user passed one. The handle also
1053  	 * carries the same addressing of htid(12b):bucketid(8b):node/entryid(12b).
1054  	 * Rule: the bucketid on the handle is ignored even if one was passed;
1055  	 * rather the value on "htid" is always assumed to be the bucketid.
1056  	 */
1057  	if (handle) {
1058  		/* Rule: The htid from handle and tableid from htid must match */
1059  		if (TC_U32_HTID(handle) && TC_U32_HTID(handle ^ htid)) {
1060  			NL_SET_ERR_MSG_MOD(extack, "Handle specified hash table address mismatch");
1061  			return -EINVAL;
1062  		}
1063  		/* Ok, so far we have a valid htid(12b):bucketid(8b) but we
1064  		 * need to finalize the table entry identification with the last
1065  		 * part - the node/entryid(12b)). Rule: Nodeid _cannot be 0_ for
1066  		 * entries. Rule: nodeid of 0 is reserved only for tables(see
1067  		 * earlier code which processes TC_U32_DIVISOR attribute).
1068  		 * Rule: The nodeid can only be derived from the handle (and not
1069  		 * htid).
1070  		 * Rule: if the handle specified zero for the node id example
1071  		 * 0x60000000, then pick a new nodeid from the pool of IDs
1072  		 * this hash table has been allocating from.
1073  		 * If OTOH it is specified (i.e for example the user passed a
1074  		 * handle such as 0x60000123), then we use it generate our final
1075  		 * handle which is used to uniquely identify the match entry.
1076  		 */
1077  		if (!TC_U32_NODE(handle)) {
1078  			handle = gen_new_kid(ht, htid);
1079  		} else {
1080  			handle = htid | TC_U32_NODE(handle);
1081  			err = idr_alloc_u32(&ht->handle_idr, NULL, &handle,
1082  					    handle, GFP_KERNEL);
1083  			if (err)
1084  				return err;
1085  		}
1086  	} else {
1087  		/* The user did not give us a handle; lets just generate one
1088  		 * from the table's pool of nodeids.
1089  		 */
1090  		handle = gen_new_kid(ht, htid);
1091  	}
1092  
1093  	if (tb[TCA_U32_SEL] == NULL) {
1094  		NL_SET_ERR_MSG_MOD(extack, "Selector not specified");
1095  		err = -EINVAL;
1096  		goto erridr;
1097  	}
1098  
1099  	s = nla_data(tb[TCA_U32_SEL]);
1100  	sel_size = struct_size(s, keys, s->nkeys);
1101  	if (nla_len(tb[TCA_U32_SEL]) < sel_size) {
1102  		err = -EINVAL;
1103  		goto erridr;
1104  	}
1105  
1106  	n = kzalloc(struct_size(n, sel.keys, s->nkeys), GFP_KERNEL);
1107  	if (n == NULL) {
1108  		err = -ENOBUFS;
1109  		goto erridr;
1110  	}
1111  
1112  #ifdef CONFIG_CLS_U32_PERF
1113  	n->pf = __alloc_percpu(struct_size(n->pf, kcnts, s->nkeys),
1114  			       __alignof__(struct tc_u32_pcnt));
1115  	if (!n->pf) {
1116  		err = -ENOBUFS;
1117  		goto errfree;
1118  	}
1119  #endif
1120  
1121  	unsafe_memcpy(&n->sel, s, sel_size,
1122  		      /* A composite flex-array structure destination,
1123  		       * which was correctly sized with struct_size(),
1124  		       * bounds-checked against nla_len(), and allocated
1125  		       * above. */);
1126  	RCU_INIT_POINTER(n->ht_up, ht);
1127  	n->handle = handle;
1128  	n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0;
1129  	n->flags = userflags;
1130  
1131  	err = tcf_exts_init(&n->exts, net, TCA_U32_ACT, TCA_U32_POLICE);
1132  	if (err < 0)
1133  		goto errout;
1134  
1135  #ifdef CONFIG_CLS_U32_MARK
1136  	n->pcpu_success = alloc_percpu(u32);
1137  	if (!n->pcpu_success) {
1138  		err = -ENOMEM;
1139  		goto errout;
1140  	}
1141  
1142  	if (tb[TCA_U32_MARK]) {
1143  		struct tc_u32_mark *mark;
1144  
1145  		mark = nla_data(tb[TCA_U32_MARK]);
1146  		n->val = mark->val;
1147  		n->mask = mark->mask;
1148  	}
1149  #endif
1150  
1151  	err = u32_set_parms(net, tp, n, tb, tca[TCA_RATE],
1152  			    flags, n->flags, extack);
1153  
1154  	u32_bind_filter(tp, n, base, tb);
1155  
1156  	if (err == 0) {
1157  		struct tc_u_knode __rcu **ins;
1158  		struct tc_u_knode *pins;
1159  
1160  		err = u32_replace_hw_knode(tp, n, flags, extack);
1161  		if (err)
1162  			goto errunbind;
1163  
1164  		if (!tc_in_hw(n->flags))
1165  			n->flags |= TCA_CLS_FLAGS_NOT_IN_HW;
1166  
1167  		ins = &ht->ht[TC_U32_HASH(handle)];
1168  		for (pins = rtnl_dereference(*ins); pins;
1169  		     ins = &pins->next, pins = rtnl_dereference(*ins))
1170  			if (TC_U32_NODE(handle) < TC_U32_NODE(pins->handle))
1171  				break;
1172  
1173  		RCU_INIT_POINTER(n->next, pins);
1174  		rcu_assign_pointer(*ins, n);
1175  		tp_c->knodes++;
1176  		*arg = n;
1177  		return 0;
1178  	}
1179  
1180  errunbind:
1181  	u32_unbind_filter(tp, n, tb);
1182  
1183  #ifdef CONFIG_CLS_U32_MARK
1184  	free_percpu(n->pcpu_success);
1185  #endif
1186  
1187  errout:
1188  	tcf_exts_destroy(&n->exts);
1189  #ifdef CONFIG_CLS_U32_PERF
1190  errfree:
1191  	free_percpu(n->pf);
1192  #endif
1193  	kfree(n);
1194  erridr:
1195  	idr_remove(&ht->handle_idr, handle);
1196  	return err;
1197  }
1198  
u32_walk(struct tcf_proto * tp,struct tcf_walker * arg,bool rtnl_held)1199  static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg,
1200  		     bool rtnl_held)
1201  {
1202  	struct tc_u_common *tp_c = tp->data;
1203  	struct tc_u_hnode *ht;
1204  	struct tc_u_knode *n;
1205  	unsigned int h;
1206  
1207  	if (arg->stop)
1208  		return;
1209  
1210  	for (ht = rtnl_dereference(tp_c->hlist);
1211  	     ht;
1212  	     ht = rtnl_dereference(ht->next)) {
1213  		if (ht->prio != tp->prio)
1214  			continue;
1215  
1216  		if (!tc_cls_stats_dump(tp, arg, ht))
1217  			return;
1218  
1219  		for (h = 0; h <= ht->divisor; h++) {
1220  			for (n = rtnl_dereference(ht->ht[h]);
1221  			     n;
1222  			     n = rtnl_dereference(n->next)) {
1223  				if (!tc_cls_stats_dump(tp, arg, n))
1224  					return;
1225  			}
1226  		}
1227  	}
1228  }
1229  
u32_reoffload_hnode(struct tcf_proto * tp,struct tc_u_hnode * ht,bool add,flow_setup_cb_t * cb,void * cb_priv,struct netlink_ext_ack * extack)1230  static int u32_reoffload_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
1231  			       bool add, flow_setup_cb_t *cb, void *cb_priv,
1232  			       struct netlink_ext_ack *extack)
1233  {
1234  	struct tc_cls_u32_offload cls_u32 = {};
1235  	int err;
1236  
1237  	tc_cls_common_offload_init(&cls_u32.common, tp, ht->flags, extack);
1238  	cls_u32.command = add ? TC_CLSU32_NEW_HNODE : TC_CLSU32_DELETE_HNODE;
1239  	cls_u32.hnode.divisor = ht->divisor;
1240  	cls_u32.hnode.handle = ht->handle;
1241  	cls_u32.hnode.prio = ht->prio;
1242  
1243  	err = cb(TC_SETUP_CLSU32, &cls_u32, cb_priv);
1244  	if (err && add && tc_skip_sw(ht->flags))
1245  		return err;
1246  
1247  	return 0;
1248  }
1249  
u32_reoffload_knode(struct tcf_proto * tp,struct tc_u_knode * n,bool add,flow_setup_cb_t * cb,void * cb_priv,struct netlink_ext_ack * extack)1250  static int u32_reoffload_knode(struct tcf_proto *tp, struct tc_u_knode *n,
1251  			       bool add, flow_setup_cb_t *cb, void *cb_priv,
1252  			       struct netlink_ext_ack *extack)
1253  {
1254  	struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);
1255  	struct tcf_block *block = tp->chain->block;
1256  	struct tc_cls_u32_offload cls_u32 = {};
1257  
1258  	tc_cls_common_offload_init(&cls_u32.common, tp, n->flags, extack);
1259  	cls_u32.command = add ?
1260  		TC_CLSU32_REPLACE_KNODE : TC_CLSU32_DELETE_KNODE;
1261  	cls_u32.knode.handle = n->handle;
1262  
1263  	if (add) {
1264  		cls_u32.knode.fshift = n->fshift;
1265  #ifdef CONFIG_CLS_U32_MARK
1266  		cls_u32.knode.val = n->val;
1267  		cls_u32.knode.mask = n->mask;
1268  #else
1269  		cls_u32.knode.val = 0;
1270  		cls_u32.knode.mask = 0;
1271  #endif
1272  		cls_u32.knode.sel = &n->sel;
1273  		cls_u32.knode.res = &n->res;
1274  		cls_u32.knode.exts = &n->exts;
1275  		if (n->ht_down)
1276  			cls_u32.knode.link_handle = ht->handle;
1277  	}
1278  
1279  	return tc_setup_cb_reoffload(block, tp, add, cb, TC_SETUP_CLSU32,
1280  				     &cls_u32, cb_priv, &n->flags,
1281  				     &n->in_hw_count);
1282  }
1283  
u32_reoffload(struct tcf_proto * tp,bool add,flow_setup_cb_t * cb,void * cb_priv,struct netlink_ext_ack * extack)1284  static int u32_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb,
1285  			 void *cb_priv, struct netlink_ext_ack *extack)
1286  {
1287  	struct tc_u_common *tp_c = tp->data;
1288  	struct tc_u_hnode *ht;
1289  	struct tc_u_knode *n;
1290  	unsigned int h;
1291  	int err;
1292  
1293  	for (ht = rtnl_dereference(tp_c->hlist);
1294  	     ht;
1295  	     ht = rtnl_dereference(ht->next)) {
1296  		if (ht->prio != tp->prio)
1297  			continue;
1298  
1299  		/* When adding filters to a new dev, try to offload the
1300  		 * hashtable first. When removing, do the filters before the
1301  		 * hashtable.
1302  		 */
1303  		if (add && !tc_skip_hw(ht->flags)) {
1304  			err = u32_reoffload_hnode(tp, ht, add, cb, cb_priv,
1305  						  extack);
1306  			if (err)
1307  				return err;
1308  		}
1309  
1310  		for (h = 0; h <= ht->divisor; h++) {
1311  			for (n = rtnl_dereference(ht->ht[h]);
1312  			     n;
1313  			     n = rtnl_dereference(n->next)) {
1314  				if (tc_skip_hw(n->flags))
1315  					continue;
1316  
1317  				err = u32_reoffload_knode(tp, n, add, cb,
1318  							  cb_priv, extack);
1319  				if (err)
1320  					return err;
1321  			}
1322  		}
1323  
1324  		if (!add && !tc_skip_hw(ht->flags))
1325  			u32_reoffload_hnode(tp, ht, add, cb, cb_priv, extack);
1326  	}
1327  
1328  	return 0;
1329  }
1330  
u32_bind_class(void * fh,u32 classid,unsigned long cl,void * q,unsigned long base)1331  static void u32_bind_class(void *fh, u32 classid, unsigned long cl, void *q,
1332  			   unsigned long base)
1333  {
1334  	struct tc_u_knode *n = fh;
1335  
1336  	tc_cls_bind_class(classid, cl, q, &n->res, base);
1337  }
1338  
u32_dump(struct net * net,struct tcf_proto * tp,void * fh,struct sk_buff * skb,struct tcmsg * t,bool rtnl_held)1339  static int u32_dump(struct net *net, struct tcf_proto *tp, void *fh,
1340  		    struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
1341  {
1342  	struct tc_u_knode *n = fh;
1343  	struct tc_u_hnode *ht_up, *ht_down;
1344  	struct nlattr *nest;
1345  
1346  	if (n == NULL)
1347  		return skb->len;
1348  
1349  	t->tcm_handle = n->handle;
1350  
1351  	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1352  	if (nest == NULL)
1353  		goto nla_put_failure;
1354  
1355  	if (TC_U32_KEY(n->handle) == 0) {
1356  		struct tc_u_hnode *ht = fh;
1357  		u32 divisor = ht->divisor + 1;
1358  
1359  		if (nla_put_u32(skb, TCA_U32_DIVISOR, divisor))
1360  			goto nla_put_failure;
1361  	} else {
1362  #ifdef CONFIG_CLS_U32_PERF
1363  		struct tc_u32_pcnt *gpf;
1364  		int cpu;
1365  #endif
1366  
1367  		if (nla_put(skb, TCA_U32_SEL, struct_size(&n->sel, keys, n->sel.nkeys),
1368  			    &n->sel))
1369  			goto nla_put_failure;
1370  
1371  		ht_up = rtnl_dereference(n->ht_up);
1372  		if (ht_up) {
1373  			u32 htid = n->handle & 0xFFFFF000;
1374  			if (nla_put_u32(skb, TCA_U32_HASH, htid))
1375  				goto nla_put_failure;
1376  		}
1377  		if (n->res.classid &&
1378  		    nla_put_u32(skb, TCA_U32_CLASSID, n->res.classid))
1379  			goto nla_put_failure;
1380  
1381  		ht_down = rtnl_dereference(n->ht_down);
1382  		if (ht_down &&
1383  		    nla_put_u32(skb, TCA_U32_LINK, ht_down->handle))
1384  			goto nla_put_failure;
1385  
1386  		if (n->flags && nla_put_u32(skb, TCA_U32_FLAGS, n->flags))
1387  			goto nla_put_failure;
1388  
1389  #ifdef CONFIG_CLS_U32_MARK
1390  		if ((n->val || n->mask)) {
1391  			struct tc_u32_mark mark = {.val = n->val,
1392  						   .mask = n->mask,
1393  						   .success = 0};
1394  			int cpum;
1395  
1396  			for_each_possible_cpu(cpum) {
1397  				__u32 cnt = *per_cpu_ptr(n->pcpu_success, cpum);
1398  
1399  				mark.success += cnt;
1400  			}
1401  
1402  			if (nla_put(skb, TCA_U32_MARK, sizeof(mark), &mark))
1403  				goto nla_put_failure;
1404  		}
1405  #endif
1406  
1407  		if (tcf_exts_dump(skb, &n->exts) < 0)
1408  			goto nla_put_failure;
1409  
1410  		if (n->ifindex) {
1411  			struct net_device *dev;
1412  			dev = __dev_get_by_index(net, n->ifindex);
1413  			if (dev && nla_put_string(skb, TCA_U32_INDEV, dev->name))
1414  				goto nla_put_failure;
1415  		}
1416  #ifdef CONFIG_CLS_U32_PERF
1417  		gpf = kzalloc(struct_size(gpf, kcnts, n->sel.nkeys), GFP_KERNEL);
1418  		if (!gpf)
1419  			goto nla_put_failure;
1420  
1421  		for_each_possible_cpu(cpu) {
1422  			int i;
1423  			struct tc_u32_pcnt *pf = per_cpu_ptr(n->pf, cpu);
1424  
1425  			gpf->rcnt += pf->rcnt;
1426  			gpf->rhit += pf->rhit;
1427  			for (i = 0; i < n->sel.nkeys; i++)
1428  				gpf->kcnts[i] += pf->kcnts[i];
1429  		}
1430  
1431  		if (nla_put_64bit(skb, TCA_U32_PCNT, struct_size(gpf, kcnts, n->sel.nkeys),
1432  				  gpf, TCA_U32_PAD)) {
1433  			kfree(gpf);
1434  			goto nla_put_failure;
1435  		}
1436  		kfree(gpf);
1437  #endif
1438  	}
1439  
1440  	nla_nest_end(skb, nest);
1441  
1442  	if (TC_U32_KEY(n->handle))
1443  		if (tcf_exts_dump_stats(skb, &n->exts) < 0)
1444  			goto nla_put_failure;
1445  	return skb->len;
1446  
1447  nla_put_failure:
1448  	nla_nest_cancel(skb, nest);
1449  	return -1;
1450  }
1451  
1452  static struct tcf_proto_ops cls_u32_ops __read_mostly = {
1453  	.kind		=	"u32",
1454  	.classify	=	u32_classify,
1455  	.init		=	u32_init,
1456  	.destroy	=	u32_destroy,
1457  	.get		=	u32_get,
1458  	.change		=	u32_change,
1459  	.delete		=	u32_delete,
1460  	.walk		=	u32_walk,
1461  	.reoffload	=	u32_reoffload,
1462  	.dump		=	u32_dump,
1463  	.bind_class	=	u32_bind_class,
1464  	.owner		=	THIS_MODULE,
1465  };
1466  MODULE_ALIAS_NET_CLS("u32");
1467  
init_u32(void)1468  static int __init init_u32(void)
1469  {
1470  	int i, ret;
1471  
1472  	pr_info("u32 classifier\n");
1473  #ifdef CONFIG_CLS_U32_PERF
1474  	pr_info("    Performance counters on\n");
1475  #endif
1476  	pr_info("    input device check on\n");
1477  #ifdef CONFIG_NET_CLS_ACT
1478  	pr_info("    Actions configured\n");
1479  #endif
1480  	tc_u_common_hash = kvmalloc_array(U32_HASH_SIZE,
1481  					  sizeof(struct hlist_head),
1482  					  GFP_KERNEL);
1483  	if (!tc_u_common_hash)
1484  		return -ENOMEM;
1485  
1486  	for (i = 0; i < U32_HASH_SIZE; i++)
1487  		INIT_HLIST_HEAD(&tc_u_common_hash[i]);
1488  
1489  	ret = register_tcf_proto_ops(&cls_u32_ops);
1490  	if (ret)
1491  		kvfree(tc_u_common_hash);
1492  	return ret;
1493  }
1494  
exit_u32(void)1495  static void __exit exit_u32(void)
1496  {
1497  	unregister_tcf_proto_ops(&cls_u32_ops);
1498  	kvfree(tc_u_common_hash);
1499  }
1500  
1501  module_init(init_u32)
1502  module_exit(exit_u32)
1503  MODULE_DESCRIPTION("Universal 32bit based TC Classifier");
1504  MODULE_LICENSE("GPL");
1505