1  // SPDX-License-Identifier: GPL-2.0
2  /*
3   *  Shared Memory Communications over RDMA (SMC-R) and RoCE
4   *
5   *  Socket Closing - normal and abnormal
6   *
7   *  Copyright IBM Corp. 2016
8   *
9   *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
10   */
11  
12  #include <linux/workqueue.h>
13  #include <linux/sched/signal.h>
14  
15  #include <net/sock.h>
16  #include <net/tcp.h>
17  
18  #include "smc.h"
19  #include "smc_tx.h"
20  #include "smc_cdc.h"
21  #include "smc_close.h"
22  
23  /* release the clcsock that is assigned to the smc_sock */
smc_clcsock_release(struct smc_sock * smc)24  void smc_clcsock_release(struct smc_sock *smc)
25  {
26  	struct socket *tcp;
27  
28  	if (smc->listen_smc && current_work() != &smc->smc_listen_work)
29  		cancel_work_sync(&smc->smc_listen_work);
30  	mutex_lock(&smc->clcsock_release_lock);
31  	if (smc->clcsock) {
32  		tcp = smc->clcsock;
33  		smc->clcsock = NULL;
34  		sock_release(tcp);
35  	}
36  	mutex_unlock(&smc->clcsock_release_lock);
37  }
38  
smc_close_cleanup_listen(struct sock * parent)39  static void smc_close_cleanup_listen(struct sock *parent)
40  {
41  	struct sock *sk;
42  
43  	/* Close non-accepted connections */
44  	while ((sk = smc_accept_dequeue(parent, NULL)))
45  		smc_close_non_accepted(sk);
46  }
47  
48  /* wait for sndbuf data being transmitted */
smc_close_stream_wait(struct smc_sock * smc,long timeout)49  static void smc_close_stream_wait(struct smc_sock *smc, long timeout)
50  {
51  	DEFINE_WAIT_FUNC(wait, woken_wake_function);
52  	struct sock *sk = &smc->sk;
53  
54  	if (!timeout)
55  		return;
56  
57  	if (!smc_tx_prepared_sends(&smc->conn))
58  		return;
59  
60  	/* Send out corked data remaining in sndbuf */
61  	smc_tx_pending(&smc->conn);
62  
63  	smc->wait_close_tx_prepared = 1;
64  	add_wait_queue(sk_sleep(sk), &wait);
65  	while (!signal_pending(current) && timeout) {
66  		int rc;
67  
68  		rc = sk_wait_event(sk, &timeout,
69  				   !smc_tx_prepared_sends(&smc->conn) ||
70  				   READ_ONCE(sk->sk_err) == ECONNABORTED ||
71  				   READ_ONCE(sk->sk_err) == ECONNRESET ||
72  				   smc->conn.killed,
73  				   &wait);
74  		if (rc)
75  			break;
76  	}
77  	remove_wait_queue(sk_sleep(sk), &wait);
78  	smc->wait_close_tx_prepared = 0;
79  }
80  
smc_close_wake_tx_prepared(struct smc_sock * smc)81  void smc_close_wake_tx_prepared(struct smc_sock *smc)
82  {
83  	if (smc->wait_close_tx_prepared)
84  		/* wake up socket closing */
85  		smc->sk.sk_state_change(&smc->sk);
86  }
87  
smc_close_wr(struct smc_connection * conn)88  static int smc_close_wr(struct smc_connection *conn)
89  {
90  	conn->local_tx_ctrl.conn_state_flags.peer_done_writing = 1;
91  
92  	return smc_cdc_get_slot_and_msg_send(conn);
93  }
94  
smc_close_final(struct smc_connection * conn)95  static int smc_close_final(struct smc_connection *conn)
96  {
97  	if (atomic_read(&conn->bytes_to_rcv))
98  		conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
99  	else
100  		conn->local_tx_ctrl.conn_state_flags.peer_conn_closed = 1;
101  	if (conn->killed)
102  		return -EPIPE;
103  
104  	return smc_cdc_get_slot_and_msg_send(conn);
105  }
106  
smc_close_abort(struct smc_connection * conn)107  int smc_close_abort(struct smc_connection *conn)
108  {
109  	conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
110  
111  	return smc_cdc_get_slot_and_msg_send(conn);
112  }
113  
smc_close_cancel_work(struct smc_sock * smc)114  static void smc_close_cancel_work(struct smc_sock *smc)
115  {
116  	struct sock *sk = &smc->sk;
117  
118  	release_sock(sk);
119  	if (cancel_work_sync(&smc->conn.close_work))
120  		sock_put(sk);
121  	cancel_delayed_work_sync(&smc->conn.tx_work);
122  	lock_sock(sk);
123  }
124  
125  /* terminate smc socket abnormally - active abort
126   * link group is terminated, i.e. RDMA communication no longer possible
127   */
smc_close_active_abort(struct smc_sock * smc)128  void smc_close_active_abort(struct smc_sock *smc)
129  {
130  	struct sock *sk = &smc->sk;
131  	bool release_clcsock = false;
132  
133  	if (sk->sk_state != SMC_INIT && smc->clcsock && smc->clcsock->sk) {
134  		sk->sk_err = ECONNABORTED;
135  		if (smc->clcsock && smc->clcsock->sk)
136  			tcp_abort(smc->clcsock->sk, ECONNABORTED);
137  	}
138  	switch (sk->sk_state) {
139  	case SMC_ACTIVE:
140  	case SMC_APPCLOSEWAIT1:
141  	case SMC_APPCLOSEWAIT2:
142  		sk->sk_state = SMC_PEERABORTWAIT;
143  		smc_close_cancel_work(smc);
144  		if (sk->sk_state != SMC_PEERABORTWAIT)
145  			break;
146  		sk->sk_state = SMC_CLOSED;
147  		sock_put(sk); /* (postponed) passive closing */
148  		break;
149  	case SMC_PEERCLOSEWAIT1:
150  	case SMC_PEERCLOSEWAIT2:
151  	case SMC_PEERFINCLOSEWAIT:
152  		sk->sk_state = SMC_PEERABORTWAIT;
153  		smc_close_cancel_work(smc);
154  		if (sk->sk_state != SMC_PEERABORTWAIT)
155  			break;
156  		sk->sk_state = SMC_CLOSED;
157  		smc_conn_free(&smc->conn);
158  		release_clcsock = true;
159  		sock_put(sk); /* passive closing */
160  		break;
161  	case SMC_PROCESSABORT:
162  	case SMC_APPFINCLOSEWAIT:
163  		sk->sk_state = SMC_PEERABORTWAIT;
164  		smc_close_cancel_work(smc);
165  		if (sk->sk_state != SMC_PEERABORTWAIT)
166  			break;
167  		sk->sk_state = SMC_CLOSED;
168  		smc_conn_free(&smc->conn);
169  		release_clcsock = true;
170  		break;
171  	case SMC_INIT:
172  	case SMC_PEERABORTWAIT:
173  	case SMC_CLOSED:
174  		break;
175  	}
176  
177  	smc_sock_set_flag(sk, SOCK_DEAD);
178  	sk->sk_state_change(sk);
179  
180  	if (release_clcsock) {
181  		release_sock(sk);
182  		smc_clcsock_release(smc);
183  		lock_sock(sk);
184  	}
185  }
186  
smc_close_sent_any_close(struct smc_connection * conn)187  static inline bool smc_close_sent_any_close(struct smc_connection *conn)
188  {
189  	return conn->local_tx_ctrl.conn_state_flags.peer_conn_abort ||
190  	       conn->local_tx_ctrl.conn_state_flags.peer_conn_closed;
191  }
192  
smc_close_active(struct smc_sock * smc)193  int smc_close_active(struct smc_sock *smc)
194  {
195  	struct smc_cdc_conn_state_flags *txflags =
196  		&smc->conn.local_tx_ctrl.conn_state_flags;
197  	struct smc_connection *conn = &smc->conn;
198  	struct sock *sk = &smc->sk;
199  	int old_state;
200  	long timeout;
201  	int rc = 0;
202  	int rc1 = 0;
203  
204  	timeout = current->flags & PF_EXITING ?
205  		  0 : sock_flag(sk, SOCK_LINGER) ?
206  		      sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT;
207  
208  	old_state = sk->sk_state;
209  again:
210  	switch (sk->sk_state) {
211  	case SMC_INIT:
212  		sk->sk_state = SMC_CLOSED;
213  		break;
214  	case SMC_LISTEN:
215  		sk->sk_state = SMC_CLOSED;
216  		sk->sk_state_change(sk); /* wake up accept */
217  		if (smc->clcsock && smc->clcsock->sk) {
218  			write_lock_bh(&smc->clcsock->sk->sk_callback_lock);
219  			smc_clcsock_restore_cb(&smc->clcsock->sk->sk_data_ready,
220  					       &smc->clcsk_data_ready);
221  			smc->clcsock->sk->sk_user_data = NULL;
222  			write_unlock_bh(&smc->clcsock->sk->sk_callback_lock);
223  			rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR);
224  		}
225  		smc_close_cleanup_listen(sk);
226  		release_sock(sk);
227  		flush_work(&smc->tcp_listen_work);
228  		lock_sock(sk);
229  		break;
230  	case SMC_ACTIVE:
231  		smc_close_stream_wait(smc, timeout);
232  		release_sock(sk);
233  		cancel_delayed_work_sync(&conn->tx_work);
234  		lock_sock(sk);
235  		if (sk->sk_state == SMC_ACTIVE) {
236  			/* send close request */
237  			rc = smc_close_final(conn);
238  			sk->sk_state = SMC_PEERCLOSEWAIT1;
239  
240  			/* actively shutdown clcsock before peer close it,
241  			 * prevent peer from entering TIME_WAIT state.
242  			 */
243  			if (smc->clcsock && smc->clcsock->sk) {
244  				rc1 = kernel_sock_shutdown(smc->clcsock,
245  							   SHUT_RDWR);
246  				rc = rc ? rc : rc1;
247  			}
248  		} else {
249  			/* peer event has changed the state */
250  			goto again;
251  		}
252  		break;
253  	case SMC_APPFINCLOSEWAIT:
254  		/* socket already shutdown wr or both (active close) */
255  		if (txflags->peer_done_writing &&
256  		    !smc_close_sent_any_close(conn)) {
257  			/* just shutdown wr done, send close request */
258  			rc = smc_close_final(conn);
259  		}
260  		sk->sk_state = SMC_CLOSED;
261  		break;
262  	case SMC_APPCLOSEWAIT1:
263  	case SMC_APPCLOSEWAIT2:
264  		if (!smc_cdc_rxed_any_close(conn))
265  			smc_close_stream_wait(smc, timeout);
266  		release_sock(sk);
267  		cancel_delayed_work_sync(&conn->tx_work);
268  		lock_sock(sk);
269  		if (sk->sk_state != SMC_APPCLOSEWAIT1 &&
270  		    sk->sk_state != SMC_APPCLOSEWAIT2)
271  			goto again;
272  		/* confirm close from peer */
273  		rc = smc_close_final(conn);
274  		if (smc_cdc_rxed_any_close(conn)) {
275  			/* peer has closed the socket already */
276  			sk->sk_state = SMC_CLOSED;
277  			sock_put(sk); /* postponed passive closing */
278  		} else {
279  			/* peer has just issued a shutdown write */
280  			sk->sk_state = SMC_PEERFINCLOSEWAIT;
281  		}
282  		break;
283  	case SMC_PEERCLOSEWAIT1:
284  	case SMC_PEERCLOSEWAIT2:
285  		if (txflags->peer_done_writing &&
286  		    !smc_close_sent_any_close(conn)) {
287  			/* just shutdown wr done, send close request */
288  			rc = smc_close_final(conn);
289  		}
290  		/* peer sending PeerConnectionClosed will cause transition */
291  		break;
292  	case SMC_PEERFINCLOSEWAIT:
293  		/* peer sending PeerConnectionClosed will cause transition */
294  		break;
295  	case SMC_PROCESSABORT:
296  		rc = smc_close_abort(conn);
297  		sk->sk_state = SMC_CLOSED;
298  		break;
299  	case SMC_PEERABORTWAIT:
300  		sk->sk_state = SMC_CLOSED;
301  		break;
302  	case SMC_CLOSED:
303  		/* nothing to do, add tracing in future patch */
304  		break;
305  	}
306  
307  	if (old_state != sk->sk_state)
308  		sk->sk_state_change(sk);
309  	return rc;
310  }
311  
smc_close_passive_abort_received(struct smc_sock * smc)312  static void smc_close_passive_abort_received(struct smc_sock *smc)
313  {
314  	struct smc_cdc_conn_state_flags *txflags =
315  		&smc->conn.local_tx_ctrl.conn_state_flags;
316  	struct sock *sk = &smc->sk;
317  
318  	switch (sk->sk_state) {
319  	case SMC_INIT:
320  	case SMC_ACTIVE:
321  	case SMC_APPCLOSEWAIT1:
322  		sk->sk_state = SMC_PROCESSABORT;
323  		sock_put(sk); /* passive closing */
324  		break;
325  	case SMC_APPFINCLOSEWAIT:
326  		sk->sk_state = SMC_PROCESSABORT;
327  		break;
328  	case SMC_PEERCLOSEWAIT1:
329  	case SMC_PEERCLOSEWAIT2:
330  		if (txflags->peer_done_writing &&
331  		    !smc_close_sent_any_close(&smc->conn))
332  			/* just shutdown, but not yet closed locally */
333  			sk->sk_state = SMC_PROCESSABORT;
334  		else
335  			sk->sk_state = SMC_CLOSED;
336  		sock_put(sk); /* passive closing */
337  		break;
338  	case SMC_APPCLOSEWAIT2:
339  	case SMC_PEERFINCLOSEWAIT:
340  		sk->sk_state = SMC_CLOSED;
341  		sock_put(sk); /* passive closing */
342  		break;
343  	case SMC_PEERABORTWAIT:
344  		sk->sk_state = SMC_CLOSED;
345  		break;
346  	case SMC_PROCESSABORT:
347  	/* nothing to do, add tracing in future patch */
348  		break;
349  	}
350  }
351  
352  /* Either some kind of closing has been received: peer_conn_closed,
353   * peer_conn_abort, or peer_done_writing
354   * or the link group of the connection terminates abnormally.
355   */
smc_close_passive_work(struct work_struct * work)356  static void smc_close_passive_work(struct work_struct *work)
357  {
358  	struct smc_connection *conn = container_of(work,
359  						   struct smc_connection,
360  						   close_work);
361  	struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
362  	struct smc_cdc_conn_state_flags *rxflags;
363  	bool release_clcsock = false;
364  	struct sock *sk = &smc->sk;
365  	int old_state;
366  
367  	lock_sock(sk);
368  	old_state = sk->sk_state;
369  
370  	rxflags = &conn->local_rx_ctrl.conn_state_flags;
371  	if (rxflags->peer_conn_abort) {
372  		/* peer has not received all data */
373  		smc_close_passive_abort_received(smc);
374  		release_sock(sk);
375  		cancel_delayed_work_sync(&conn->tx_work);
376  		lock_sock(sk);
377  		goto wakeup;
378  	}
379  
380  	switch (sk->sk_state) {
381  	case SMC_INIT:
382  		sk->sk_state = SMC_APPCLOSEWAIT1;
383  		break;
384  	case SMC_ACTIVE:
385  		sk->sk_state = SMC_APPCLOSEWAIT1;
386  		/* postpone sock_put() for passive closing to cover
387  		 * received SEND_SHUTDOWN as well
388  		 */
389  		break;
390  	case SMC_PEERCLOSEWAIT1:
391  		if (rxflags->peer_done_writing)
392  			sk->sk_state = SMC_PEERCLOSEWAIT2;
393  		fallthrough;
394  		/* to check for closing */
395  	case SMC_PEERCLOSEWAIT2:
396  		if (!smc_cdc_rxed_any_close(conn))
397  			break;
398  		if (sock_flag(sk, SOCK_DEAD) &&
399  		    smc_close_sent_any_close(conn)) {
400  			/* smc_release has already been called locally */
401  			sk->sk_state = SMC_CLOSED;
402  		} else {
403  			/* just shutdown, but not yet closed locally */
404  			sk->sk_state = SMC_APPFINCLOSEWAIT;
405  		}
406  		sock_put(sk); /* passive closing */
407  		break;
408  	case SMC_PEERFINCLOSEWAIT:
409  		if (smc_cdc_rxed_any_close(conn)) {
410  			sk->sk_state = SMC_CLOSED;
411  			sock_put(sk); /* passive closing */
412  		}
413  		break;
414  	case SMC_APPCLOSEWAIT1:
415  	case SMC_APPCLOSEWAIT2:
416  		/* postpone sock_put() for passive closing to cover
417  		 * received SEND_SHUTDOWN as well
418  		 */
419  		break;
420  	case SMC_APPFINCLOSEWAIT:
421  	case SMC_PEERABORTWAIT:
422  	case SMC_PROCESSABORT:
423  	case SMC_CLOSED:
424  		/* nothing to do, add tracing in future patch */
425  		break;
426  	}
427  
428  wakeup:
429  	sk->sk_data_ready(sk); /* wakeup blocked rcvbuf consumers */
430  	sk->sk_write_space(sk); /* wakeup blocked sndbuf producers */
431  
432  	if (old_state != sk->sk_state) {
433  		sk->sk_state_change(sk);
434  		if ((sk->sk_state == SMC_CLOSED) &&
435  		    (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) {
436  			smc_conn_free(conn);
437  			if (smc->clcsock)
438  				release_clcsock = true;
439  		}
440  	}
441  	release_sock(sk);
442  	if (release_clcsock)
443  		smc_clcsock_release(smc);
444  	sock_put(sk); /* sock_hold done by schedulers of close_work */
445  }
446  
smc_close_shutdown_write(struct smc_sock * smc)447  int smc_close_shutdown_write(struct smc_sock *smc)
448  {
449  	struct smc_connection *conn = &smc->conn;
450  	struct sock *sk = &smc->sk;
451  	int old_state;
452  	long timeout;
453  	int rc = 0;
454  
455  	timeout = current->flags & PF_EXITING ?
456  		  0 : sock_flag(sk, SOCK_LINGER) ?
457  		      sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT;
458  
459  	old_state = sk->sk_state;
460  again:
461  	switch (sk->sk_state) {
462  	case SMC_ACTIVE:
463  		smc_close_stream_wait(smc, timeout);
464  		release_sock(sk);
465  		cancel_delayed_work_sync(&conn->tx_work);
466  		lock_sock(sk);
467  		if (sk->sk_state != SMC_ACTIVE)
468  			goto again;
469  		/* send close wr request */
470  		rc = smc_close_wr(conn);
471  		sk->sk_state = SMC_PEERCLOSEWAIT1;
472  		break;
473  	case SMC_APPCLOSEWAIT1:
474  		/* passive close */
475  		if (!smc_cdc_rxed_any_close(conn))
476  			smc_close_stream_wait(smc, timeout);
477  		release_sock(sk);
478  		cancel_delayed_work_sync(&conn->tx_work);
479  		lock_sock(sk);
480  		if (sk->sk_state != SMC_APPCLOSEWAIT1)
481  			goto again;
482  		/* confirm close from peer */
483  		rc = smc_close_wr(conn);
484  		sk->sk_state = SMC_APPCLOSEWAIT2;
485  		break;
486  	case SMC_APPCLOSEWAIT2:
487  	case SMC_PEERFINCLOSEWAIT:
488  	case SMC_PEERCLOSEWAIT1:
489  	case SMC_PEERCLOSEWAIT2:
490  	case SMC_APPFINCLOSEWAIT:
491  	case SMC_PROCESSABORT:
492  	case SMC_PEERABORTWAIT:
493  		/* nothing to do, add tracing in future patch */
494  		break;
495  	}
496  
497  	if (old_state != sk->sk_state)
498  		sk->sk_state_change(sk);
499  	return rc;
500  }
501  
502  /* Initialize close properties on connection establishment. */
smc_close_init(struct smc_sock * smc)503  void smc_close_init(struct smc_sock *smc)
504  {
505  	INIT_WORK(&smc->conn.close_work, smc_close_passive_work);
506  }
507