1  /* SPDX-License-Identifier: GPL-2.0 */
2  /*
3   * Generic nexthop implementation
4   *
5   * Copyright (c) 2017-19 Cumulus Networks
6   * Copyright (c) 2017-19 David Ahern <dsa@cumulusnetworks.com>
7   */
8  
9  #ifndef __LINUX_NEXTHOP_H
10  #define __LINUX_NEXTHOP_H
11  
12  #include <linux/netdevice.h>
13  #include <linux/notifier.h>
14  #include <linux/route.h>
15  #include <linux/types.h>
16  #include <net/ip_fib.h>
17  #include <net/ip6_fib.h>
18  #include <net/netlink.h>
19  
20  #define NEXTHOP_VALID_USER_FLAGS RTNH_F_ONLINK
21  
22  struct nexthop;
23  
24  struct nh_config {
25  	u32		nh_id;
26  
27  	u8		nh_family;
28  	u8		nh_protocol;
29  	u8		nh_blackhole;
30  	u8		nh_fdb;
31  	u32		nh_flags;
32  
33  	int		nh_ifindex;
34  	struct net_device *dev;
35  
36  	union {
37  		__be32		ipv4;
38  		struct in6_addr	ipv6;
39  	} gw;
40  
41  	struct nlattr	*nh_grp;
42  	u16		nh_grp_type;
43  	u16		nh_grp_res_num_buckets;
44  	unsigned long	nh_grp_res_idle_timer;
45  	unsigned long	nh_grp_res_unbalanced_timer;
46  	bool		nh_grp_res_has_num_buckets;
47  	bool		nh_grp_res_has_idle_timer;
48  	bool		nh_grp_res_has_unbalanced_timer;
49  
50  	bool		nh_hw_stats;
51  
52  	struct nlattr	*nh_encap;
53  	u16		nh_encap_type;
54  
55  	u32		nlflags;
56  	struct nl_info	nlinfo;
57  };
58  
59  struct nh_info {
60  	struct hlist_node	dev_hash;    /* entry on netns devhash */
61  	struct nexthop		*nh_parent;
62  
63  	u8			family;
64  	bool			reject_nh;
65  	bool			fdb_nh;
66  
67  	union {
68  		struct fib_nh_common	fib_nhc;
69  		struct fib_nh		fib_nh;
70  		struct fib6_nh		fib6_nh;
71  	};
72  };
73  
74  struct nh_res_bucket {
75  	struct nh_grp_entry __rcu *nh_entry;
76  	atomic_long_t		used_time;
77  	unsigned long		migrated_time;
78  	bool			occupied;
79  	u8			nh_flags;
80  };
81  
82  struct nh_res_table {
83  	struct net		*net;
84  	u32			nhg_id;
85  	struct delayed_work	upkeep_dw;
86  
87  	/* List of NHGEs that have too few buckets ("uw" for underweight).
88  	 * Reclaimed buckets will be given to entries in this list.
89  	 */
90  	struct list_head	uw_nh_entries;
91  	unsigned long		unbalanced_since;
92  
93  	u32			idle_timer;
94  	u32			unbalanced_timer;
95  
96  	u16			num_nh_buckets;
97  	struct nh_res_bucket	nh_buckets[] __counted_by(num_nh_buckets);
98  };
99  
100  struct nh_grp_entry_stats {
101  	u64_stats_t packets;
102  	struct u64_stats_sync syncp;
103  };
104  
105  struct nh_grp_entry {
106  	struct nexthop	*nh;
107  	struct nh_grp_entry_stats __percpu	*stats;
108  	u16		weight;
109  
110  	union {
111  		struct {
112  			atomic_t	upper_bound;
113  		} hthr;
114  		struct {
115  			/* Member on uw_nh_entries. */
116  			struct list_head	uw_nh_entry;
117  
118  			u16			count_buckets;
119  			u16			wants_buckets;
120  		} res;
121  	};
122  
123  	struct list_head nh_list;
124  	struct nexthop	*nh_parent;  /* nexthop of group with this entry */
125  	u64		packets_hw;
126  };
127  
128  struct nh_group {
129  	struct nh_group		*spare; /* spare group for removals */
130  	u16			num_nh;
131  	bool			is_multipath;
132  	bool			hash_threshold;
133  	bool			resilient;
134  	bool			fdb_nh;
135  	bool			has_v4;
136  	bool			hw_stats;
137  
138  	struct nh_res_table __rcu *res_table;
139  	struct nh_grp_entry	nh_entries[] __counted_by(num_nh);
140  };
141  
142  struct nexthop {
143  	struct rb_node		rb_node;    /* entry on netns rbtree */
144  	struct list_head	fi_list;    /* v4 entries using nh */
145  	struct list_head	f6i_list;   /* v6 entries using nh */
146  	struct list_head        fdb_list;   /* fdb entries using this nh */
147  	struct list_head	grp_list;   /* nh group entries using this nh */
148  	struct net		*net;
149  
150  	u32			id;
151  
152  	u8			protocol;   /* app managing this nh */
153  	u8			nh_flags;
154  	bool			is_group;
155  
156  	refcount_t		refcnt;
157  	struct rcu_head		rcu;
158  
159  	union {
160  		struct nh_info	__rcu *nh_info;
161  		struct nh_group __rcu *nh_grp;
162  	};
163  };
164  
165  enum nexthop_event_type {
166  	NEXTHOP_EVENT_DEL,
167  	NEXTHOP_EVENT_REPLACE,
168  	NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE,
169  	NEXTHOP_EVENT_BUCKET_REPLACE,
170  	NEXTHOP_EVENT_HW_STATS_REPORT_DELTA,
171  };
172  
173  enum nh_notifier_info_type {
174  	NH_NOTIFIER_INFO_TYPE_SINGLE,
175  	NH_NOTIFIER_INFO_TYPE_GRP,
176  	NH_NOTIFIER_INFO_TYPE_RES_TABLE,
177  	NH_NOTIFIER_INFO_TYPE_RES_BUCKET,
178  	NH_NOTIFIER_INFO_TYPE_GRP_HW_STATS,
179  };
180  
181  struct nh_notifier_single_info {
182  	struct net_device *dev;
183  	u8 gw_family;
184  	union {
185  		__be32 ipv4;
186  		struct in6_addr ipv6;
187  	};
188  	u32 id;
189  	u8 is_reject:1,
190  	   is_fdb:1,
191  	   has_encap:1;
192  };
193  
194  struct nh_notifier_grp_entry_info {
195  	u16 weight;
196  	struct nh_notifier_single_info nh;
197  };
198  
199  struct nh_notifier_grp_info {
200  	u16 num_nh;
201  	bool is_fdb;
202  	bool hw_stats;
203  	struct nh_notifier_grp_entry_info nh_entries[] __counted_by(num_nh);
204  };
205  
206  struct nh_notifier_res_bucket_info {
207  	u16 bucket_index;
208  	unsigned int idle_timer_ms;
209  	bool force;
210  	struct nh_notifier_single_info old_nh;
211  	struct nh_notifier_single_info new_nh;
212  };
213  
214  struct nh_notifier_res_table_info {
215  	u16 num_nh_buckets;
216  	bool hw_stats;
217  	struct nh_notifier_single_info nhs[] __counted_by(num_nh_buckets);
218  };
219  
220  struct nh_notifier_grp_hw_stats_entry_info {
221  	u32 id;
222  	u64 packets;
223  };
224  
225  struct nh_notifier_grp_hw_stats_info {
226  	u16 num_nh;
227  	bool hw_stats_used;
228  	struct nh_notifier_grp_hw_stats_entry_info stats[] __counted_by(num_nh);
229  };
230  
231  struct nh_notifier_info {
232  	struct net *net;
233  	struct netlink_ext_ack *extack;
234  	u32 id;
235  	enum nh_notifier_info_type type;
236  	union {
237  		struct nh_notifier_single_info *nh;
238  		struct nh_notifier_grp_info *nh_grp;
239  		struct nh_notifier_res_table_info *nh_res_table;
240  		struct nh_notifier_res_bucket_info *nh_res_bucket;
241  		struct nh_notifier_grp_hw_stats_info *nh_grp_hw_stats;
242  	};
243  };
244  
245  int register_nexthop_notifier(struct net *net, struct notifier_block *nb,
246  			      struct netlink_ext_ack *extack);
247  int __unregister_nexthop_notifier(struct net *net, struct notifier_block *nb);
248  int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb);
249  void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap);
250  void nexthop_bucket_set_hw_flags(struct net *net, u32 id, u16 bucket_index,
251  				 bool offload, bool trap);
252  void nexthop_res_grp_activity_update(struct net *net, u32 id, u16 num_buckets,
253  				     unsigned long *activity);
254  void nh_grp_hw_stats_report_delta(struct nh_notifier_grp_hw_stats_info *info,
255  				  unsigned int nh_idx,
256  				  u64 delta_packets);
257  
258  /* caller is holding rcu or rtnl; no reference taken to nexthop */
259  struct nexthop *nexthop_find_by_id(struct net *net, u32 id);
260  void nexthop_free_rcu(struct rcu_head *head);
261  
nexthop_get(struct nexthop * nh)262  static inline bool nexthop_get(struct nexthop *nh)
263  {
264  	return refcount_inc_not_zero(&nh->refcnt);
265  }
266  
nexthop_put(struct nexthop * nh)267  static inline void nexthop_put(struct nexthop *nh)
268  {
269  	if (refcount_dec_and_test(&nh->refcnt))
270  		call_rcu_hurry(&nh->rcu, nexthop_free_rcu);
271  }
272  
nexthop_cmp(const struct nexthop * nh1,const struct nexthop * nh2)273  static inline bool nexthop_cmp(const struct nexthop *nh1,
274  			       const struct nexthop *nh2)
275  {
276  	return nh1 == nh2;
277  }
278  
nexthop_is_fdb(const struct nexthop * nh)279  static inline bool nexthop_is_fdb(const struct nexthop *nh)
280  {
281  	if (nh->is_group) {
282  		const struct nh_group *nh_grp;
283  
284  		nh_grp = rcu_dereference_rtnl(nh->nh_grp);
285  		return nh_grp->fdb_nh;
286  	} else {
287  		const struct nh_info *nhi;
288  
289  		nhi = rcu_dereference_rtnl(nh->nh_info);
290  		return nhi->fdb_nh;
291  	}
292  }
293  
nexthop_has_v4(const struct nexthop * nh)294  static inline bool nexthop_has_v4(const struct nexthop *nh)
295  {
296  	if (nh->is_group) {
297  		struct nh_group *nh_grp;
298  
299  		nh_grp = rcu_dereference_rtnl(nh->nh_grp);
300  		return nh_grp->has_v4;
301  	}
302  	return false;
303  }
304  
nexthop_is_multipath(const struct nexthop * nh)305  static inline bool nexthop_is_multipath(const struct nexthop *nh)
306  {
307  	if (nh->is_group) {
308  		struct nh_group *nh_grp;
309  
310  		nh_grp = rcu_dereference_rtnl(nh->nh_grp);
311  		return nh_grp->is_multipath;
312  	}
313  	return false;
314  }
315  
316  struct nexthop *nexthop_select_path(struct nexthop *nh, int hash);
317  
nexthop_num_path(const struct nexthop * nh)318  static inline unsigned int nexthop_num_path(const struct nexthop *nh)
319  {
320  	unsigned int rc = 1;
321  
322  	if (nh->is_group) {
323  		struct nh_group *nh_grp;
324  
325  		nh_grp = rcu_dereference_rtnl(nh->nh_grp);
326  		if (nh_grp->is_multipath)
327  			rc = nh_grp->num_nh;
328  	}
329  
330  	return rc;
331  }
332  
333  static inline
nexthop_mpath_select(const struct nh_group * nhg,int nhsel)334  struct nexthop *nexthop_mpath_select(const struct nh_group *nhg, int nhsel)
335  {
336  	/* for_nexthops macros in fib_semantics.c grabs a pointer to
337  	 * the nexthop before checking nhsel
338  	 */
339  	if (nhsel >= nhg->num_nh)
340  		return NULL;
341  
342  	return nhg->nh_entries[nhsel].nh;
343  }
344  
345  static inline
nexthop_mpath_fill_node(struct sk_buff * skb,struct nexthop * nh,u8 rt_family)346  int nexthop_mpath_fill_node(struct sk_buff *skb, struct nexthop *nh,
347  			    u8 rt_family)
348  {
349  	struct nh_group *nhg = rcu_dereference_rtnl(nh->nh_grp);
350  	int i;
351  
352  	for (i = 0; i < nhg->num_nh; i++) {
353  		struct nexthop *nhe = nhg->nh_entries[i].nh;
354  		struct nh_info *nhi = rcu_dereference_rtnl(nhe->nh_info);
355  		struct fib_nh_common *nhc = &nhi->fib_nhc;
356  		int weight = nhg->nh_entries[i].weight;
357  
358  		if (fib_add_nexthop(skb, nhc, weight, rt_family, 0) < 0)
359  			return -EMSGSIZE;
360  	}
361  
362  	return 0;
363  }
364  
365  /* called with rcu lock */
nexthop_is_blackhole(const struct nexthop * nh)366  static inline bool nexthop_is_blackhole(const struct nexthop *nh)
367  {
368  	const struct nh_info *nhi;
369  
370  	if (nh->is_group) {
371  		struct nh_group *nh_grp;
372  
373  		nh_grp = rcu_dereference_rtnl(nh->nh_grp);
374  		if (nh_grp->num_nh > 1)
375  			return false;
376  
377  		nh = nh_grp->nh_entries[0].nh;
378  	}
379  
380  	nhi = rcu_dereference_rtnl(nh->nh_info);
381  	return nhi->reject_nh;
382  }
383  
nexthop_path_fib_result(struct fib_result * res,int hash)384  static inline void nexthop_path_fib_result(struct fib_result *res, int hash)
385  {
386  	struct nh_info *nhi;
387  	struct nexthop *nh;
388  
389  	nh = nexthop_select_path(res->fi->nh, hash);
390  	nhi = rcu_dereference(nh->nh_info);
391  	res->nhc = &nhi->fib_nhc;
392  }
393  
394  /* called with rcu read lock or rtnl held */
395  static inline
nexthop_fib_nhc(struct nexthop * nh,int nhsel)396  struct fib_nh_common *nexthop_fib_nhc(struct nexthop *nh, int nhsel)
397  {
398  	struct nh_info *nhi;
399  
400  	BUILD_BUG_ON(offsetof(struct fib_nh, nh_common) != 0);
401  	BUILD_BUG_ON(offsetof(struct fib6_nh, nh_common) != 0);
402  
403  	if (nh->is_group) {
404  		struct nh_group *nh_grp;
405  
406  		nh_grp = rcu_dereference_rtnl(nh->nh_grp);
407  		if (nh_grp->is_multipath) {
408  			nh = nexthop_mpath_select(nh_grp, nhsel);
409  			if (!nh)
410  				return NULL;
411  		}
412  	}
413  
414  	nhi = rcu_dereference_rtnl(nh->nh_info);
415  	return &nhi->fib_nhc;
416  }
417  
418  /* called from fib_table_lookup with rcu_lock */
419  static inline
nexthop_get_nhc_lookup(const struct nexthop * nh,int fib_flags,const struct flowi4 * flp,int * nhsel)420  struct fib_nh_common *nexthop_get_nhc_lookup(const struct nexthop *nh,
421  					     int fib_flags,
422  					     const struct flowi4 *flp,
423  					     int *nhsel)
424  {
425  	struct nh_info *nhi;
426  
427  	if (nh->is_group) {
428  		struct nh_group *nhg = rcu_dereference(nh->nh_grp);
429  		int i;
430  
431  		for (i = 0; i < nhg->num_nh; i++) {
432  			struct nexthop *nhe = nhg->nh_entries[i].nh;
433  
434  			nhi = rcu_dereference(nhe->nh_info);
435  			if (fib_lookup_good_nhc(&nhi->fib_nhc, fib_flags, flp)) {
436  				*nhsel = i;
437  				return &nhi->fib_nhc;
438  			}
439  		}
440  	} else {
441  		nhi = rcu_dereference(nh->nh_info);
442  		if (fib_lookup_good_nhc(&nhi->fib_nhc, fib_flags, flp)) {
443  			*nhsel = 0;
444  			return &nhi->fib_nhc;
445  		}
446  	}
447  
448  	return NULL;
449  }
450  
nexthop_uses_dev(const struct nexthop * nh,const struct net_device * dev)451  static inline bool nexthop_uses_dev(const struct nexthop *nh,
452  				    const struct net_device *dev)
453  {
454  	struct nh_info *nhi;
455  
456  	if (nh->is_group) {
457  		struct nh_group *nhg = rcu_dereference(nh->nh_grp);
458  		int i;
459  
460  		for (i = 0; i < nhg->num_nh; i++) {
461  			struct nexthop *nhe = nhg->nh_entries[i].nh;
462  
463  			nhi = rcu_dereference(nhe->nh_info);
464  			if (nhc_l3mdev_matches_dev(&nhi->fib_nhc, dev))
465  				return true;
466  		}
467  	} else {
468  		nhi = rcu_dereference(nh->nh_info);
469  		if (nhc_l3mdev_matches_dev(&nhi->fib_nhc, dev))
470  			return true;
471  	}
472  
473  	return false;
474  }
475  
fib_info_num_path(const struct fib_info * fi)476  static inline unsigned int fib_info_num_path(const struct fib_info *fi)
477  {
478  	if (unlikely(fi->nh))
479  		return nexthop_num_path(fi->nh);
480  
481  	return fi->fib_nhs;
482  }
483  
484  int fib_check_nexthop(struct nexthop *nh, u8 scope,
485  		      struct netlink_ext_ack *extack);
486  
fib_info_nhc(struct fib_info * fi,int nhsel)487  static inline struct fib_nh_common *fib_info_nhc(struct fib_info *fi, int nhsel)
488  {
489  	if (unlikely(fi->nh))
490  		return nexthop_fib_nhc(fi->nh, nhsel);
491  
492  	return &fi->fib_nh[nhsel].nh_common;
493  }
494  
495  /* only used when fib_nh is built into fib_info */
fib_info_nh(struct fib_info * fi,int nhsel)496  static inline struct fib_nh *fib_info_nh(struct fib_info *fi, int nhsel)
497  {
498  	WARN_ON(fi->nh);
499  
500  	return &fi->fib_nh[nhsel];
501  }
502  
503  /*
504   * IPv6 variants
505   */
506  int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg,
507  		       struct netlink_ext_ack *extack);
508  
509  /* Caller should either hold rcu_read_lock(), or RTNL. */
nexthop_fib6_nh(struct nexthop * nh)510  static inline struct fib6_nh *nexthop_fib6_nh(struct nexthop *nh)
511  {
512  	struct nh_info *nhi;
513  
514  	if (nh->is_group) {
515  		struct nh_group *nh_grp;
516  
517  		nh_grp = rcu_dereference_rtnl(nh->nh_grp);
518  		nh = nexthop_mpath_select(nh_grp, 0);
519  		if (!nh)
520  			return NULL;
521  	}
522  
523  	nhi = rcu_dereference_rtnl(nh->nh_info);
524  	if (nhi->family == AF_INET6)
525  		return &nhi->fib6_nh;
526  
527  	return NULL;
528  }
529  
fib6_info_nh_dev(struct fib6_info * f6i)530  static inline struct net_device *fib6_info_nh_dev(struct fib6_info *f6i)
531  {
532  	struct fib6_nh *fib6_nh;
533  
534  	fib6_nh = f6i->nh ? nexthop_fib6_nh(f6i->nh) : f6i->fib6_nh;
535  	return fib6_nh->fib_nh_dev;
536  }
537  
nexthop_path_fib6_result(struct fib6_result * res,int hash)538  static inline void nexthop_path_fib6_result(struct fib6_result *res, int hash)
539  {
540  	struct nexthop *nh = res->f6i->nh;
541  	struct nh_info *nhi;
542  
543  	nh = nexthop_select_path(nh, hash);
544  
545  	nhi = rcu_dereference_rtnl(nh->nh_info);
546  	if (nhi->reject_nh) {
547  		res->fib6_type = RTN_BLACKHOLE;
548  		res->fib6_flags |= RTF_REJECT;
549  		res->nh = nexthop_fib6_nh(nh);
550  	} else {
551  		res->nh = &nhi->fib6_nh;
552  	}
553  }
554  
555  int nexthop_for_each_fib6_nh(struct nexthop *nh,
556  			     int (*cb)(struct fib6_nh *nh, void *arg),
557  			     void *arg);
558  
nexthop_get_family(struct nexthop * nh)559  static inline int nexthop_get_family(struct nexthop *nh)
560  {
561  	struct nh_info *nhi = rcu_dereference_rtnl(nh->nh_info);
562  
563  	return nhi->family;
564  }
565  
566  static inline
nexthop_fdb_nhc(struct nexthop * nh)567  struct fib_nh_common *nexthop_fdb_nhc(struct nexthop *nh)
568  {
569  	struct nh_info *nhi = rcu_dereference_rtnl(nh->nh_info);
570  
571  	return &nhi->fib_nhc;
572  }
573  
nexthop_path_fdb_result(struct nexthop * nh,int hash)574  static inline struct fib_nh_common *nexthop_path_fdb_result(struct nexthop *nh,
575  							    int hash)
576  {
577  	struct nh_info *nhi;
578  	struct nexthop *nhp;
579  
580  	nhp = nexthop_select_path(nh, hash);
581  	if (unlikely(!nhp))
582  		return NULL;
583  	nhi = rcu_dereference(nhp->nh_info);
584  	return &nhi->fib_nhc;
585  }
586  #endif
587