1  /* SPDX-License-Identifier: GPL-2.0 */
2  /*
3   *  Shared Memory Communications over RDMA (SMC-R) and RoCE
4   *
5   *  Definitions for the SMC module (socket related)
6   *
7   *  Copyright IBM Corp. 2016
8   *
9   *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
10   */
11  #ifndef __SMC_H
12  #define __SMC_H
13  
14  #include <linux/socket.h>
15  #include <linux/types.h>
16  #include <linux/compiler.h> /* __aligned */
17  #include <net/genetlink.h>
18  #include <net/sock.h>
19  
20  #include "smc_ib.h"
21  
22  #define SMC_V1		1		/* SMC version V1 */
23  #define SMC_V2		2		/* SMC version V2 */
24  
25  #define SMC_RELEASE_0 0
26  #define SMC_RELEASE_1 1
27  #define SMC_RELEASE	SMC_RELEASE_1 /* the latest release version */
28  
29  #define SMCPROTO_SMC		0	/* SMC protocol, IPv4 */
30  #define SMCPROTO_SMC6		1	/* SMC protocol, IPv6 */
31  
32  #define SMC_AUTOCORKING_DEFAULT_SIZE	0x10000	/* 64K by default */
33  
34  extern struct proto smc_proto;
35  extern struct proto smc_proto6;
36  
37  extern struct smc_hashinfo smc_v4_hashinfo;
38  extern struct smc_hashinfo smc_v6_hashinfo;
39  
40  int smc_hash_sk(struct sock *sk);
41  void smc_unhash_sk(struct sock *sk);
42  void smc_release_cb(struct sock *sk);
43  
44  int smc_release(struct socket *sock);
45  int smc_bind(struct socket *sock, struct sockaddr *uaddr,
46  	     int addr_len);
47  int smc_connect(struct socket *sock, struct sockaddr *addr,
48  		int alen, int flags);
49  int smc_accept(struct socket *sock, struct socket *new_sock,
50  	       struct proto_accept_arg *arg);
51  int smc_getname(struct socket *sock, struct sockaddr *addr,
52  		int peer);
53  __poll_t smc_poll(struct file *file, struct socket *sock,
54  		  poll_table *wait);
55  int smc_ioctl(struct socket *sock, unsigned int cmd,
56  	      unsigned long arg);
57  int smc_listen(struct socket *sock, int backlog);
58  int smc_shutdown(struct socket *sock, int how);
59  int smc_setsockopt(struct socket *sock, int level, int optname,
60  		   sockptr_t optval, unsigned int optlen);
61  int smc_getsockopt(struct socket *sock, int level, int optname,
62  		   char __user *optval, int __user *optlen);
63  int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len);
64  int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
65  		int flags);
66  ssize_t smc_splice_read(struct socket *sock, loff_t *ppos,
67  			struct pipe_inode_info *pipe, size_t len,
68  			unsigned int flags);
69  
70  /* smc sock initialization */
71  void smc_sk_init(struct net *net, struct sock *sk, int protocol);
72  /* clcsock initialization */
73  int smc_create_clcsk(struct net *net, struct sock *sk, int family);
74  
75  #ifdef ATOMIC64_INIT
76  #define KERNEL_HAS_ATOMIC64
77  #endif
78  
79  enum smc_state {		/* possible states of an SMC socket */
80  	SMC_ACTIVE	= 1,
81  	SMC_INIT	= 2,
82  	SMC_CLOSED	= 7,
83  	SMC_LISTEN	= 10,
84  	/* normal close */
85  	SMC_PEERCLOSEWAIT1	= 20,
86  	SMC_PEERCLOSEWAIT2	= 21,
87  	SMC_APPFINCLOSEWAIT	= 24,
88  	SMC_APPCLOSEWAIT1	= 22,
89  	SMC_APPCLOSEWAIT2	= 23,
90  	SMC_PEERFINCLOSEWAIT	= 25,
91  	/* abnormal close */
92  	SMC_PEERABORTWAIT	= 26,
93  	SMC_PROCESSABORT	= 27,
94  };
95  
96  enum smc_supplemental_features {
97  	SMC_SPF_EMULATED_ISM_DEV	= 0,
98  };
99  
100  #define SMC_FEATURE_MASK \
101  	(BIT(SMC_SPF_EMULATED_ISM_DEV))
102  
103  struct smc_link_group;
104  
105  struct smc_wr_rx_hdr {	/* common prefix part of LLC and CDC to demultiplex */
106  	union {
107  		u8 type;
108  #if defined(__BIG_ENDIAN_BITFIELD)
109  		struct {
110  			u8 llc_version:4,
111  			   llc_type:4;
112  		};
113  #elif defined(__LITTLE_ENDIAN_BITFIELD)
114  		struct {
115  			u8 llc_type:4,
116  			   llc_version:4;
117  		};
118  #endif
119  	};
120  } __aligned(1);
121  
122  struct smc_cdc_conn_state_flags {
123  #if defined(__BIG_ENDIAN_BITFIELD)
124  	u8	peer_done_writing : 1;	/* Sending done indicator */
125  	u8	peer_conn_closed : 1;	/* Peer connection closed indicator */
126  	u8	peer_conn_abort : 1;	/* Abnormal close indicator */
127  	u8	reserved : 5;
128  #elif defined(__LITTLE_ENDIAN_BITFIELD)
129  	u8	reserved : 5;
130  	u8	peer_conn_abort : 1;
131  	u8	peer_conn_closed : 1;
132  	u8	peer_done_writing : 1;
133  #endif
134  };
135  
136  struct smc_cdc_producer_flags {
137  #if defined(__BIG_ENDIAN_BITFIELD)
138  	u8	write_blocked : 1;	/* Writing Blocked, no rx buf space */
139  	u8	urg_data_pending : 1;	/* Urgent Data Pending */
140  	u8	urg_data_present : 1;	/* Urgent Data Present */
141  	u8	cons_curs_upd_req : 1;	/* cursor update requested */
142  	u8	failover_validation : 1;/* message replay due to failover */
143  	u8	reserved : 3;
144  #elif defined(__LITTLE_ENDIAN_BITFIELD)
145  	u8	reserved : 3;
146  	u8	failover_validation : 1;
147  	u8	cons_curs_upd_req : 1;
148  	u8	urg_data_present : 1;
149  	u8	urg_data_pending : 1;
150  	u8	write_blocked : 1;
151  #endif
152  };
153  
154  /* in host byte order */
155  union smc_host_cursor {	/* SMC cursor - an offset in an RMBE */
156  	struct {
157  		u16	reserved;
158  		u16	wrap;		/* window wrap sequence number */
159  		u32	count;		/* cursor (= offset) part */
160  	};
161  #ifdef KERNEL_HAS_ATOMIC64
162  	atomic64_t		acurs;	/* for atomic processing */
163  #else
164  	u64			acurs;	/* for atomic processing */
165  #endif
166  } __aligned(8);
167  
168  /* in host byte order, except for flag bitfields in network byte order */
169  struct smc_host_cdc_msg {		/* Connection Data Control message */
170  	struct smc_wr_rx_hdr		common; /* .type = 0xFE */
171  	u8				len;	/* length = 44 */
172  	u16				seqno;	/* connection seq # */
173  	u32				token;	/* alert_token */
174  	union smc_host_cursor		prod;		/* producer cursor */
175  	union smc_host_cursor		cons;		/* consumer cursor,
176  							 * piggy backed "ack"
177  							 */
178  	struct smc_cdc_producer_flags	prod_flags;	/* conn. tx/rx status */
179  	struct smc_cdc_conn_state_flags	conn_state_flags; /* peer conn. status*/
180  	u8				reserved[18];
181  } __aligned(8);
182  
183  enum smc_urg_state {
184  	SMC_URG_VALID	= 1,			/* data present */
185  	SMC_URG_NOTYET	= 2,			/* data pending */
186  	SMC_URG_READ	= 3,			/* data was already read */
187  };
188  
189  struct smc_mark_woken {
190  	bool woken;
191  	void *key;
192  	wait_queue_entry_t wait_entry;
193  };
194  
195  struct smc_connection {
196  	struct rb_node		alert_node;
197  	struct smc_link_group	*lgr;		/* link group of connection */
198  	struct smc_link		*lnk;		/* assigned SMC-R link */
199  	u32			alert_token_local; /* unique conn. id */
200  	u8			peer_rmbe_idx;	/* from tcp handshake */
201  	int			peer_rmbe_size;	/* size of peer rx buffer */
202  	atomic_t		peer_rmbe_space;/* remaining free bytes in peer
203  						 * rmbe
204  						 */
205  	int			rtoken_idx;	/* idx to peer RMB rkey/addr */
206  
207  	struct smc_buf_desc	*sndbuf_desc;	/* send buffer descriptor */
208  	struct smc_buf_desc	*rmb_desc;	/* RMBE descriptor */
209  	int                     rmbe_size_comp; /* compressed notation */
210  	int			rmbe_update_limit;
211  						/* lower limit for consumer
212  						 * cursor update
213  						 */
214  
215  	struct smc_host_cdc_msg	local_tx_ctrl;	/* host byte order staging
216  						 * buffer for CDC msg send
217  						 * .prod cf. TCP snd_nxt
218  						 * .cons cf. TCP sends ack
219  						 */
220  	union smc_host_cursor	local_tx_ctrl_fin;
221  						/* prod crsr - confirmed by peer
222  						 */
223  	union smc_host_cursor	tx_curs_prep;	/* tx - prepared data
224  						 * snd_max..wmem_alloc
225  						 */
226  	union smc_host_cursor	tx_curs_sent;	/* tx - sent data
227  						 * snd_nxt ?
228  						 */
229  	union smc_host_cursor	tx_curs_fin;	/* tx - confirmed by peer
230  						 * snd-wnd-begin ?
231  						 */
232  	atomic_t		sndbuf_space;	/* remaining space in sndbuf */
233  	u16			tx_cdc_seq;	/* sequence # for CDC send */
234  	u16			tx_cdc_seq_fin;	/* sequence # - tx completed */
235  	spinlock_t		send_lock;	/* protect wr_sends */
236  	atomic_t		cdc_pend_tx_wr; /* number of pending tx CDC wqe
237  						 * - inc when post wqe,
238  						 * - dec on polled tx cqe
239  						 */
240  	wait_queue_head_t	cdc_pend_tx_wq; /* wakeup on no cdc_pend_tx_wr*/
241  	struct delayed_work	tx_work;	/* retry of smc_cdc_msg_send */
242  	u32			tx_off;		/* base offset in peer rmb */
243  
244  	struct smc_host_cdc_msg	local_rx_ctrl;	/* filled during event_handl.
245  						 * .prod cf. TCP rcv_nxt
246  						 * .cons cf. TCP snd_una
247  						 */
248  	union smc_host_cursor	rx_curs_confirmed; /* confirmed to peer
249  						    * source of snd_una ?
250  						    */
251  	union smc_host_cursor	urg_curs;	/* points at urgent byte */
252  	enum smc_urg_state	urg_state;
253  	bool			urg_tx_pend;	/* urgent data staged */
254  	bool			urg_rx_skip_pend;
255  						/* indicate urgent oob data
256  						 * read, but previous regular
257  						 * data still pending
258  						 */
259  	char			urg_rx_byte;	/* urgent byte */
260  	bool			tx_in_release_sock;
261  						/* flush pending tx data in
262  						 * sock release_cb()
263  						 */
264  	atomic_t		bytes_to_rcv;	/* arrived data,
265  						 * not yet received
266  						 */
267  	atomic_t		splice_pending;	/* number of spliced bytes
268  						 * pending processing
269  						 */
270  #ifndef KERNEL_HAS_ATOMIC64
271  	spinlock_t		acurs_lock;	/* protect cursors */
272  #endif
273  	struct work_struct	close_work;	/* peer sent some closing */
274  	struct work_struct	abort_work;	/* abort the connection */
275  	struct tasklet_struct	rx_tsklet;	/* Receiver tasklet for SMC-D */
276  	u8			rx_off;		/* receive offset:
277  						 * 0 for SMC-R, 32 for SMC-D
278  						 */
279  	u64			peer_token;	/* SMC-D token of peer */
280  	u8			killed : 1;	/* abnormal termination */
281  	u8			freed : 1;	/* normal termiation */
282  	u8			out_of_sync : 1; /* out of sync with peer */
283  };
284  
285  struct smc_sock {				/* smc sock container */
286  	struct sock		sk;
287  #if IS_ENABLED(CONFIG_IPV6)
288  	struct ipv6_pinfo	*pinet6;
289  #endif
290  	struct socket		*clcsock;	/* internal tcp socket */
291  	void			(*clcsk_state_change)(struct sock *sk);
292  						/* original stat_change fct. */
293  	void			(*clcsk_data_ready)(struct sock *sk);
294  						/* original data_ready fct. */
295  	void			(*clcsk_write_space)(struct sock *sk);
296  						/* original write_space fct. */
297  	void			(*clcsk_error_report)(struct sock *sk);
298  						/* original error_report fct. */
299  	struct smc_connection	conn;		/* smc connection */
300  	struct smc_sock		*listen_smc;	/* listen parent */
301  	struct work_struct	connect_work;	/* handle non-blocking connect*/
302  	struct work_struct	tcp_listen_work;/* handle tcp socket accepts */
303  	struct work_struct	smc_listen_work;/* prepare new accept socket */
304  	struct list_head	accept_q;	/* sockets to be accepted */
305  	spinlock_t		accept_q_lock;	/* protects accept_q */
306  	bool			limit_smc_hs;	/* put constraint on handshake */
307  	bool			use_fallback;	/* fallback to tcp */
308  	int			fallback_rsn;	/* reason for fallback */
309  	u32			peer_diagnosis; /* decline reason from peer */
310  	atomic_t                queued_smc_hs;  /* queued smc handshakes */
311  	struct inet_connection_sock_af_ops		af_ops;
312  	const struct inet_connection_sock_af_ops	*ori_af_ops;
313  						/* original af ops */
314  	int			sockopt_defer_accept;
315  						/* sockopt TCP_DEFER_ACCEPT
316  						 * value
317  						 */
318  	u8			wait_close_tx_prepared : 1;
319  						/* shutdown wr or close
320  						 * started, waiting for unsent
321  						 * data to be sent
322  						 */
323  	u8			connect_nonblock : 1;
324  						/* non-blocking connect in
325  						 * flight
326  						 */
327  	struct mutex            clcsock_release_lock;
328  						/* protects clcsock of a listen
329  						 * socket
330  						 * */
331  };
332  
333  #define smc_sk(ptr) container_of_const(ptr, struct smc_sock, sk)
334  
smc_init_saved_callbacks(struct smc_sock * smc)335  static inline void smc_init_saved_callbacks(struct smc_sock *smc)
336  {
337  	smc->clcsk_state_change	= NULL;
338  	smc->clcsk_data_ready	= NULL;
339  	smc->clcsk_write_space	= NULL;
340  	smc->clcsk_error_report	= NULL;
341  }
342  
smc_clcsock_user_data(const struct sock * clcsk)343  static inline struct smc_sock *smc_clcsock_user_data(const struct sock *clcsk)
344  {
345  	return (struct smc_sock *)
346  	       ((uintptr_t)clcsk->sk_user_data & ~SK_USER_DATA_NOCOPY);
347  }
348  
349  /* save target_cb in saved_cb, and replace target_cb with new_cb */
smc_clcsock_replace_cb(void (** target_cb)(struct sock *),void (* new_cb)(struct sock *),void (** saved_cb)(struct sock *))350  static inline void smc_clcsock_replace_cb(void (**target_cb)(struct sock *),
351  					  void (*new_cb)(struct sock *),
352  					  void (**saved_cb)(struct sock *))
353  {
354  	/* only save once */
355  	if (!*saved_cb)
356  		*saved_cb = *target_cb;
357  	*target_cb = new_cb;
358  }
359  
360  /* restore target_cb to saved_cb, and reset saved_cb to NULL */
smc_clcsock_restore_cb(void (** target_cb)(struct sock *),void (** saved_cb)(struct sock *))361  static inline void smc_clcsock_restore_cb(void (**target_cb)(struct sock *),
362  					  void (**saved_cb)(struct sock *))
363  {
364  	if (!*saved_cb)
365  		return;
366  	*target_cb = *saved_cb;
367  	*saved_cb = NULL;
368  }
369  
370  extern struct workqueue_struct	*smc_hs_wq;	/* wq for handshake work */
371  extern struct workqueue_struct	*smc_close_wq;	/* wq for close work */
372  
373  #define SMC_SYSTEMID_LEN		8
374  
375  extern u8	local_systemid[SMC_SYSTEMID_LEN]; /* unique system identifier */
376  
377  #define ntohll(x) be64_to_cpu(x)
378  #define htonll(x) cpu_to_be64(x)
379  
380  /* convert an u32 value into network byte order, store it into a 3 byte field */
hton24(u8 * net,u32 host)381  static inline void hton24(u8 *net, u32 host)
382  {
383  	__be32 t;
384  
385  	t = cpu_to_be32(host);
386  	memcpy(net, ((u8 *)&t) + 1, 3);
387  }
388  
389  /* convert a received 3 byte field into host byte order*/
ntoh24(u8 * net)390  static inline u32 ntoh24(u8 *net)
391  {
392  	__be32 t = 0;
393  
394  	memcpy(((u8 *)&t) + 1, net, 3);
395  	return be32_to_cpu(t);
396  }
397  
398  #ifdef CONFIG_XFRM
using_ipsec(struct smc_sock * smc)399  static inline bool using_ipsec(struct smc_sock *smc)
400  {
401  	return (smc->clcsock->sk->sk_policy[0] ||
402  		smc->clcsock->sk->sk_policy[1]) ? true : false;
403  }
404  #else
using_ipsec(struct smc_sock * smc)405  static inline bool using_ipsec(struct smc_sock *smc)
406  {
407  	return false;
408  }
409  #endif
410  
411  struct smc_gidlist;
412  
413  struct sock *smc_accept_dequeue(struct sock *parent, struct socket *new_sock);
414  void smc_close_non_accepted(struct sock *sk);
415  void smc_fill_gid_list(struct smc_link_group *lgr,
416  		       struct smc_gidlist *gidlist,
417  		       struct smc_ib_device *known_dev, u8 *known_gid);
418  
419  /* smc handshake limitation interface for netlink  */
420  int smc_nl_dump_hs_limitation(struct sk_buff *skb, struct netlink_callback *cb);
421  int smc_nl_enable_hs_limitation(struct sk_buff *skb, struct genl_info *info);
422  int smc_nl_disable_hs_limitation(struct sk_buff *skb, struct genl_info *info);
423  
smc_sock_set_flag(struct sock * sk,enum sock_flags flag)424  static inline void smc_sock_set_flag(struct sock *sk, enum sock_flags flag)
425  {
426  	set_bit(flag, &sk->sk_flags);
427  }
428  
429  #endif	/* __SMC_H */
430