1  // SPDX-License-Identifier: GPL-2.0-or-later
2  /*
3   *  net/dccp/output.c
4   *
5   *  An implementation of the DCCP protocol
6   *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
7   */
8  
9  #include <linux/dccp.h>
10  #include <linux/kernel.h>
11  #include <linux/skbuff.h>
12  #include <linux/slab.h>
13  #include <linux/sched/signal.h>
14  
15  #include <net/inet_sock.h>
16  #include <net/sock.h>
17  
18  #include "ackvec.h"
19  #include "ccid.h"
20  #include "dccp.h"
21  
dccp_event_ack_sent(struct sock * sk)22  static inline void dccp_event_ack_sent(struct sock *sk)
23  {
24  	inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
25  }
26  
27  /* enqueue @skb on sk_send_head for retransmission, return clone to send now */
dccp_skb_entail(struct sock * sk,struct sk_buff * skb)28  static struct sk_buff *dccp_skb_entail(struct sock *sk, struct sk_buff *skb)
29  {
30  	skb_set_owner_w(skb, sk);
31  	WARN_ON(sk->sk_send_head);
32  	sk->sk_send_head = skb;
33  	return skb_clone(sk->sk_send_head, gfp_any());
34  }
35  
36  /*
37   * All SKB's seen here are completely headerless. It is our
38   * job to build the DCCP header, and pass the packet down to
39   * IP so it can do the same plus pass the packet off to the
40   * device.
41   */
dccp_transmit_skb(struct sock * sk,struct sk_buff * skb)42  static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
43  {
44  	if (likely(skb != NULL)) {
45  		struct inet_sock *inet = inet_sk(sk);
46  		const struct inet_connection_sock *icsk = inet_csk(sk);
47  		struct dccp_sock *dp = dccp_sk(sk);
48  		struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
49  		struct dccp_hdr *dh;
50  		/* XXX For now we're using only 48 bits sequence numbers */
51  		const u32 dccp_header_size = sizeof(*dh) +
52  					     sizeof(struct dccp_hdr_ext) +
53  					  dccp_packet_hdr_len(dcb->dccpd_type);
54  		int err, set_ack = 1;
55  		u64 ackno = dp->dccps_gsr;
56  		/*
57  		 * Increment GSS here already in case the option code needs it.
58  		 * Update GSS for real only if option processing below succeeds.
59  		 */
60  		dcb->dccpd_seq = ADD48(dp->dccps_gss, 1);
61  
62  		switch (dcb->dccpd_type) {
63  		case DCCP_PKT_DATA:
64  			set_ack = 0;
65  			fallthrough;
66  		case DCCP_PKT_DATAACK:
67  		case DCCP_PKT_RESET:
68  			break;
69  
70  		case DCCP_PKT_REQUEST:
71  			set_ack = 0;
72  			/* Use ISS on the first (non-retransmitted) Request. */
73  			if (icsk->icsk_retransmits == 0)
74  				dcb->dccpd_seq = dp->dccps_iss;
75  			fallthrough;
76  
77  		case DCCP_PKT_SYNC:
78  		case DCCP_PKT_SYNCACK:
79  			ackno = dcb->dccpd_ack_seq;
80  			fallthrough;
81  		default:
82  			/*
83  			 * Set owner/destructor: some skbs are allocated via
84  			 * alloc_skb (e.g. when retransmission may happen).
85  			 * Only Data, DataAck, and Reset packets should come
86  			 * through here with skb->sk set.
87  			 */
88  			WARN_ON(skb->sk);
89  			skb_set_owner_w(skb, sk);
90  			break;
91  		}
92  
93  		if (dccp_insert_options(sk, skb)) {
94  			kfree_skb(skb);
95  			return -EPROTO;
96  		}
97  
98  
99  		/* Build DCCP header and checksum it. */
100  		dh = dccp_zeroed_hdr(skb, dccp_header_size);
101  		dh->dccph_type	= dcb->dccpd_type;
102  		dh->dccph_sport	= inet->inet_sport;
103  		dh->dccph_dport	= inet->inet_dport;
104  		dh->dccph_doff	= (dccp_header_size + dcb->dccpd_opt_len) / 4;
105  		dh->dccph_ccval	= dcb->dccpd_ccval;
106  		dh->dccph_cscov = dp->dccps_pcslen;
107  		/* XXX For now we're using only 48 bits sequence numbers */
108  		dh->dccph_x	= 1;
109  
110  		dccp_update_gss(sk, dcb->dccpd_seq);
111  		dccp_hdr_set_seq(dh, dp->dccps_gss);
112  		if (set_ack)
113  			dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), ackno);
114  
115  		switch (dcb->dccpd_type) {
116  		case DCCP_PKT_REQUEST:
117  			dccp_hdr_request(skb)->dccph_req_service =
118  							dp->dccps_service;
119  			/*
120  			 * Limit Ack window to ISS <= P.ackno <= GSS, so that
121  			 * only Responses to Requests we sent are considered.
122  			 */
123  			dp->dccps_awl = dp->dccps_iss;
124  			break;
125  		case DCCP_PKT_RESET:
126  			dccp_hdr_reset(skb)->dccph_reset_code =
127  							dcb->dccpd_reset_code;
128  			break;
129  		}
130  
131  		icsk->icsk_af_ops->send_check(sk, skb);
132  
133  		if (set_ack)
134  			dccp_event_ack_sent(sk);
135  
136  		DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
137  
138  		err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl);
139  		return net_xmit_eval(err);
140  	}
141  	return -ENOBUFS;
142  }
143  
144  /**
145   * dccp_determine_ccmps  -  Find out about CCID-specific packet-size limits
146   * @dp: socket to find packet size limits of
147   *
148   * We only consider the HC-sender CCID for setting the CCMPS (RFC 4340, 14.),
149   * since the RX CCID is restricted to feedback packets (Acks), which are small
150   * in comparison with the data traffic. A value of 0 means "no current CCMPS".
151   */
dccp_determine_ccmps(const struct dccp_sock * dp)152  static u32 dccp_determine_ccmps(const struct dccp_sock *dp)
153  {
154  	const struct ccid *tx_ccid = dp->dccps_hc_tx_ccid;
155  
156  	if (tx_ccid == NULL || tx_ccid->ccid_ops == NULL)
157  		return 0;
158  	return tx_ccid->ccid_ops->ccid_ccmps;
159  }
160  
dccp_sync_mss(struct sock * sk,u32 pmtu)161  unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu)
162  {
163  	struct inet_connection_sock *icsk = inet_csk(sk);
164  	struct dccp_sock *dp = dccp_sk(sk);
165  	u32 ccmps = dccp_determine_ccmps(dp);
166  	u32 cur_mps = ccmps ? min(pmtu, ccmps) : pmtu;
167  
168  	/* Account for header lengths and IPv4/v6 option overhead */
169  	cur_mps -= (icsk->icsk_af_ops->net_header_len + icsk->icsk_ext_hdr_len +
170  		    sizeof(struct dccp_hdr) + sizeof(struct dccp_hdr_ext));
171  
172  	/*
173  	 * Leave enough headroom for common DCCP header options.
174  	 * This only considers options which may appear on DCCP-Data packets, as
175  	 * per table 3 in RFC 4340, 5.8. When running out of space for other
176  	 * options (eg. Ack Vector which can take up to 255 bytes), it is better
177  	 * to schedule a separate Ack. Thus we leave headroom for the following:
178  	 *  - 1 byte for Slow Receiver (11.6)
179  	 *  - 6 bytes for Timestamp (13.1)
180  	 *  - 10 bytes for Timestamp Echo (13.3)
181  	 *  - 8 bytes for NDP count (7.7, when activated)
182  	 *  - 6 bytes for Data Checksum (9.3)
183  	 *  - %DCCPAV_MIN_OPTLEN bytes for Ack Vector size (11.4, when enabled)
184  	 */
185  	cur_mps -= roundup(1 + 6 + 10 + dp->dccps_send_ndp_count * 8 + 6 +
186  			   (dp->dccps_hc_rx_ackvec ? DCCPAV_MIN_OPTLEN : 0), 4);
187  
188  	/* And store cached results */
189  	icsk->icsk_pmtu_cookie = pmtu;
190  	WRITE_ONCE(dp->dccps_mss_cache, cur_mps);
191  
192  	return cur_mps;
193  }
194  
195  EXPORT_SYMBOL_GPL(dccp_sync_mss);
196  
dccp_write_space(struct sock * sk)197  void dccp_write_space(struct sock *sk)
198  {
199  	struct socket_wq *wq;
200  
201  	rcu_read_lock();
202  	wq = rcu_dereference(sk->sk_wq);
203  	if (skwq_has_sleeper(wq))
204  		wake_up_interruptible(&wq->wait);
205  	/* Should agree with poll, otherwise some programs break */
206  	if (sock_writeable(sk))
207  		sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT);
208  
209  	rcu_read_unlock();
210  }
211  
212  /**
213   * dccp_wait_for_ccid  -  Await CCID send permission
214   * @sk:    socket to wait for
215   * @delay: timeout in jiffies
216   *
217   * This is used by CCIDs which need to delay the send time in process context.
218   */
dccp_wait_for_ccid(struct sock * sk,unsigned long delay)219  static int dccp_wait_for_ccid(struct sock *sk, unsigned long delay)
220  {
221  	DEFINE_WAIT(wait);
222  	long remaining;
223  
224  	prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
225  	sk->sk_write_pending++;
226  	release_sock(sk);
227  
228  	remaining = schedule_timeout(delay);
229  
230  	lock_sock(sk);
231  	sk->sk_write_pending--;
232  	finish_wait(sk_sleep(sk), &wait);
233  
234  	if (signal_pending(current) || sk->sk_err)
235  		return -1;
236  	return remaining;
237  }
238  
239  /**
240   * dccp_xmit_packet  -  Send data packet under control of CCID
241   * @sk: socket to send data packet on
242   *
243   * Transmits next-queued payload and informs CCID to account for the packet.
244   */
dccp_xmit_packet(struct sock * sk)245  static void dccp_xmit_packet(struct sock *sk)
246  {
247  	int err, len;
248  	struct dccp_sock *dp = dccp_sk(sk);
249  	struct sk_buff *skb = dccp_qpolicy_pop(sk);
250  
251  	if (unlikely(skb == NULL))
252  		return;
253  	len = skb->len;
254  
255  	if (sk->sk_state == DCCP_PARTOPEN) {
256  		const u32 cur_mps = dp->dccps_mss_cache - DCCP_FEATNEG_OVERHEAD;
257  		/*
258  		 * See 8.1.5 - Handshake Completion.
259  		 *
260  		 * For robustness we resend Confirm options until the client has
261  		 * entered OPEN. During the initial feature negotiation, the MPS
262  		 * is smaller than usual, reduced by the Change/Confirm options.
263  		 */
264  		if (!list_empty(&dp->dccps_featneg) && len > cur_mps) {
265  			DCCP_WARN("Payload too large (%d) for featneg.\n", len);
266  			dccp_send_ack(sk);
267  			dccp_feat_list_purge(&dp->dccps_featneg);
268  		}
269  
270  		inet_csk_schedule_ack(sk);
271  		inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
272  					      inet_csk(sk)->icsk_rto,
273  					      DCCP_RTO_MAX);
274  		DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATAACK;
275  	} else if (dccp_ack_pending(sk)) {
276  		DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATAACK;
277  	} else {
278  		DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATA;
279  	}
280  
281  	err = dccp_transmit_skb(sk, skb);
282  	if (err)
283  		dccp_pr_debug("transmit_skb() returned err=%d\n", err);
284  	/*
285  	 * Register this one as sent even if an error occurred. To the remote
286  	 * end a local packet drop is indistinguishable from network loss, i.e.
287  	 * any local drop will eventually be reported via receiver feedback.
288  	 */
289  	ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, len);
290  
291  	/*
292  	 * If the CCID needs to transfer additional header options out-of-band
293  	 * (e.g. Ack Vectors or feature-negotiation options), it activates this
294  	 * flag to schedule a Sync. The Sync will automatically incorporate all
295  	 * currently pending header options, thus clearing the backlog.
296  	 */
297  	if (dp->dccps_sync_scheduled)
298  		dccp_send_sync(sk, dp->dccps_gsr, DCCP_PKT_SYNC);
299  }
300  
301  /**
302   * dccp_flush_write_queue  -  Drain queue at end of connection
303   * @sk: socket to be drained
304   * @time_budget: time allowed to drain the queue
305   *
306   * Since dccp_sendmsg queues packets without waiting for them to be sent, it may
307   * happen that the TX queue is not empty at the end of a connection. We give the
308   * HC-sender CCID a grace period of up to @time_budget jiffies. If this function
309   * returns with a non-empty write queue, it will be purged later.
310   */
dccp_flush_write_queue(struct sock * sk,long * time_budget)311  void dccp_flush_write_queue(struct sock *sk, long *time_budget)
312  {
313  	struct dccp_sock *dp = dccp_sk(sk);
314  	struct sk_buff *skb;
315  	long delay, rc;
316  
317  	while (*time_budget > 0 && (skb = skb_peek(&sk->sk_write_queue))) {
318  		rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
319  
320  		switch (ccid_packet_dequeue_eval(rc)) {
321  		case CCID_PACKET_WILL_DEQUEUE_LATER:
322  			/*
323  			 * If the CCID determines when to send, the next sending
324  			 * time is unknown or the CCID may not even send again
325  			 * (e.g. remote host crashes or lost Ack packets).
326  			 */
327  			DCCP_WARN("CCID did not manage to send all packets\n");
328  			return;
329  		case CCID_PACKET_DELAY:
330  			delay = msecs_to_jiffies(rc);
331  			if (delay > *time_budget)
332  				return;
333  			rc = dccp_wait_for_ccid(sk, delay);
334  			if (rc < 0)
335  				return;
336  			*time_budget -= (delay - rc);
337  			/* check again if we can send now */
338  			break;
339  		case CCID_PACKET_SEND_AT_ONCE:
340  			dccp_xmit_packet(sk);
341  			break;
342  		case CCID_PACKET_ERR:
343  			skb_dequeue(&sk->sk_write_queue);
344  			kfree_skb(skb);
345  			dccp_pr_debug("packet discarded due to err=%ld\n", rc);
346  		}
347  	}
348  }
349  
dccp_write_xmit(struct sock * sk)350  void dccp_write_xmit(struct sock *sk)
351  {
352  	struct dccp_sock *dp = dccp_sk(sk);
353  	struct sk_buff *skb;
354  
355  	while ((skb = dccp_qpolicy_top(sk))) {
356  		int rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
357  
358  		switch (ccid_packet_dequeue_eval(rc)) {
359  		case CCID_PACKET_WILL_DEQUEUE_LATER:
360  			return;
361  		case CCID_PACKET_DELAY:
362  			sk_reset_timer(sk, &dp->dccps_xmit_timer,
363  				       jiffies + msecs_to_jiffies(rc));
364  			return;
365  		case CCID_PACKET_SEND_AT_ONCE:
366  			dccp_xmit_packet(sk);
367  			break;
368  		case CCID_PACKET_ERR:
369  			dccp_qpolicy_drop(sk, skb);
370  			dccp_pr_debug("packet discarded due to err=%d\n", rc);
371  		}
372  	}
373  }
374  
375  /**
376   * dccp_retransmit_skb  -  Retransmit Request, Close, or CloseReq packets
377   * @sk: socket to perform retransmit on
378   *
379   * There are only four retransmittable packet types in DCCP:
380   * - Request  in client-REQUEST  state (sec. 8.1.1),
381   * - CloseReq in server-CLOSEREQ state (sec. 8.3),
382   * - Close    in   node-CLOSING  state (sec. 8.3),
383   * - Acks in client-PARTOPEN state (sec. 8.1.5, handled by dccp_delack_timer()).
384   * This function expects sk->sk_send_head to contain the original skb.
385   */
dccp_retransmit_skb(struct sock * sk)386  int dccp_retransmit_skb(struct sock *sk)
387  {
388  	WARN_ON(sk->sk_send_head == NULL);
389  
390  	if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk) != 0)
391  		return -EHOSTUNREACH; /* Routing failure or similar. */
392  
393  	/* this count is used to distinguish original and retransmitted skb */
394  	inet_csk(sk)->icsk_retransmits++;
395  
396  	return dccp_transmit_skb(sk, skb_clone(sk->sk_send_head, GFP_ATOMIC));
397  }
398  
dccp_make_response(const struct sock * sk,struct dst_entry * dst,struct request_sock * req)399  struct sk_buff *dccp_make_response(const struct sock *sk, struct dst_entry *dst,
400  				   struct request_sock *req)
401  {
402  	struct dccp_hdr *dh;
403  	struct dccp_request_sock *dreq;
404  	const u32 dccp_header_size = sizeof(struct dccp_hdr) +
405  				     sizeof(struct dccp_hdr_ext) +
406  				     sizeof(struct dccp_hdr_response);
407  	struct sk_buff *skb;
408  
409  	/* sk is marked const to clearly express we dont hold socket lock.
410  	 * sock_wmalloc() will atomically change sk->sk_wmem_alloc,
411  	 * it is safe to promote sk to non const.
412  	 */
413  	skb = sock_wmalloc((struct sock *)sk, MAX_DCCP_HEADER, 1,
414  			   GFP_ATOMIC);
415  	if (!skb)
416  		return NULL;
417  
418  	skb_reserve(skb, MAX_DCCP_HEADER);
419  
420  	skb_dst_set(skb, dst_clone(dst));
421  
422  	dreq = dccp_rsk(req);
423  	if (inet_rsk(req)->acked)	/* increase GSS upon retransmission */
424  		dccp_inc_seqno(&dreq->dreq_gss);
425  	DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE;
426  	DCCP_SKB_CB(skb)->dccpd_seq  = dreq->dreq_gss;
427  
428  	/* Resolve feature dependencies resulting from choice of CCID */
429  	if (dccp_feat_server_ccid_dependencies(dreq))
430  		goto response_failed;
431  
432  	if (dccp_insert_options_rsk(dreq, skb))
433  		goto response_failed;
434  
435  	/* Build and checksum header */
436  	dh = dccp_zeroed_hdr(skb, dccp_header_size);
437  
438  	dh->dccph_sport	= htons(inet_rsk(req)->ir_num);
439  	dh->dccph_dport	= inet_rsk(req)->ir_rmt_port;
440  	dh->dccph_doff	= (dccp_header_size +
441  			   DCCP_SKB_CB(skb)->dccpd_opt_len) / 4;
442  	dh->dccph_type	= DCCP_PKT_RESPONSE;
443  	dh->dccph_x	= 1;
444  	dccp_hdr_set_seq(dh, dreq->dreq_gss);
445  	dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dreq->dreq_gsr);
446  	dccp_hdr_response(skb)->dccph_resp_service = dreq->dreq_service;
447  
448  	dccp_csum_outgoing(skb);
449  
450  	/* We use `acked' to remember that a Response was already sent. */
451  	inet_rsk(req)->acked = 1;
452  	DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
453  	return skb;
454  response_failed:
455  	kfree_skb(skb);
456  	return NULL;
457  }
458  
459  EXPORT_SYMBOL_GPL(dccp_make_response);
460  
461  /* answer offending packet in @rcv_skb with Reset from control socket @ctl */
dccp_ctl_make_reset(struct sock * sk,struct sk_buff * rcv_skb)462  struct sk_buff *dccp_ctl_make_reset(struct sock *sk, struct sk_buff *rcv_skb)
463  {
464  	struct dccp_hdr *rxdh = dccp_hdr(rcv_skb), *dh;
465  	struct dccp_skb_cb *dcb = DCCP_SKB_CB(rcv_skb);
466  	const u32 dccp_hdr_reset_len = sizeof(struct dccp_hdr) +
467  				       sizeof(struct dccp_hdr_ext) +
468  				       sizeof(struct dccp_hdr_reset);
469  	struct dccp_hdr_reset *dhr;
470  	struct sk_buff *skb;
471  
472  	skb = alloc_skb(sk->sk_prot->max_header, GFP_ATOMIC);
473  	if (skb == NULL)
474  		return NULL;
475  
476  	skb_reserve(skb, sk->sk_prot->max_header);
477  
478  	/* Swap the send and the receive. */
479  	dh = dccp_zeroed_hdr(skb, dccp_hdr_reset_len);
480  	dh->dccph_type	= DCCP_PKT_RESET;
481  	dh->dccph_sport	= rxdh->dccph_dport;
482  	dh->dccph_dport	= rxdh->dccph_sport;
483  	dh->dccph_doff	= dccp_hdr_reset_len / 4;
484  	dh->dccph_x	= 1;
485  
486  	dhr = dccp_hdr_reset(skb);
487  	dhr->dccph_reset_code = dcb->dccpd_reset_code;
488  
489  	switch (dcb->dccpd_reset_code) {
490  	case DCCP_RESET_CODE_PACKET_ERROR:
491  		dhr->dccph_reset_data[0] = rxdh->dccph_type;
492  		break;
493  	case DCCP_RESET_CODE_OPTION_ERROR:
494  	case DCCP_RESET_CODE_MANDATORY_ERROR:
495  		memcpy(dhr->dccph_reset_data, dcb->dccpd_reset_data, 3);
496  		break;
497  	}
498  	/*
499  	 * From RFC 4340, 8.3.1:
500  	 *   If P.ackno exists, set R.seqno := P.ackno + 1.
501  	 *   Else set R.seqno := 0.
502  	 */
503  	if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
504  		dccp_hdr_set_seq(dh, ADD48(dcb->dccpd_ack_seq, 1));
505  	dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dcb->dccpd_seq);
506  
507  	dccp_csum_outgoing(skb);
508  	return skb;
509  }
510  
511  EXPORT_SYMBOL_GPL(dccp_ctl_make_reset);
512  
513  /* send Reset on established socket, to close or abort the connection */
dccp_send_reset(struct sock * sk,enum dccp_reset_codes code)514  int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code)
515  {
516  	struct sk_buff *skb;
517  	/*
518  	 * FIXME: what if rebuild_header fails?
519  	 * Should we be doing a rebuild_header here?
520  	 */
521  	int err = inet_csk(sk)->icsk_af_ops->rebuild_header(sk);
522  
523  	if (err != 0)
524  		return err;
525  
526  	skb = sock_wmalloc(sk, sk->sk_prot->max_header, 1, GFP_ATOMIC);
527  	if (skb == NULL)
528  		return -ENOBUFS;
529  
530  	/* Reserve space for headers and prepare control bits. */
531  	skb_reserve(skb, sk->sk_prot->max_header);
532  	DCCP_SKB_CB(skb)->dccpd_type	   = DCCP_PKT_RESET;
533  	DCCP_SKB_CB(skb)->dccpd_reset_code = code;
534  
535  	return dccp_transmit_skb(sk, skb);
536  }
537  
538  /*
539   * Do all connect socket setups that can be done AF independent.
540   */
dccp_connect(struct sock * sk)541  int dccp_connect(struct sock *sk)
542  {
543  	struct sk_buff *skb;
544  	struct dccp_sock *dp = dccp_sk(sk);
545  	struct dst_entry *dst = __sk_dst_get(sk);
546  	struct inet_connection_sock *icsk = inet_csk(sk);
547  
548  	sk->sk_err = 0;
549  	sock_reset_flag(sk, SOCK_DONE);
550  
551  	dccp_sync_mss(sk, dst_mtu(dst));
552  
553  	/* do not connect if feature negotiation setup fails */
554  	if (dccp_feat_finalise_settings(dccp_sk(sk)))
555  		return -EPROTO;
556  
557  	/* Initialise GAR as per 8.5; AWL/AWH are set in dccp_transmit_skb() */
558  	dp->dccps_gar = dp->dccps_iss;
559  
560  	skb = alloc_skb(sk->sk_prot->max_header, sk->sk_allocation);
561  	if (unlikely(skb == NULL))
562  		return -ENOBUFS;
563  
564  	/* Reserve space for headers. */
565  	skb_reserve(skb, sk->sk_prot->max_header);
566  
567  	DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST;
568  
569  	dccp_transmit_skb(sk, dccp_skb_entail(sk, skb));
570  	DCCP_INC_STATS(DCCP_MIB_ACTIVEOPENS);
571  
572  	/* Timer for repeating the REQUEST until an answer. */
573  	icsk->icsk_retransmits = 0;
574  	inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
575  				  icsk->icsk_rto, DCCP_RTO_MAX);
576  	return 0;
577  }
578  
579  EXPORT_SYMBOL_GPL(dccp_connect);
580  
dccp_send_ack(struct sock * sk)581  void dccp_send_ack(struct sock *sk)
582  {
583  	/* If we have been reset, we may not send again. */
584  	if (sk->sk_state != DCCP_CLOSED) {
585  		struct sk_buff *skb = alloc_skb(sk->sk_prot->max_header,
586  						GFP_ATOMIC);
587  
588  		if (skb == NULL) {
589  			inet_csk_schedule_ack(sk);
590  			inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
591  			inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
592  						  TCP_DELACK_MAX,
593  						  DCCP_RTO_MAX);
594  			return;
595  		}
596  
597  		/* Reserve space for headers */
598  		skb_reserve(skb, sk->sk_prot->max_header);
599  		DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_ACK;
600  		dccp_transmit_skb(sk, skb);
601  	}
602  }
603  
604  EXPORT_SYMBOL_GPL(dccp_send_ack);
605  
606  #if 0
607  /* FIXME: Is this still necessary (11.3) - currently nowhere used by DCCP. */
608  void dccp_send_delayed_ack(struct sock *sk)
609  {
610  	struct inet_connection_sock *icsk = inet_csk(sk);
611  	/*
612  	 * FIXME: tune this timer. elapsed time fixes the skew, so no problem
613  	 * with using 2s, and active senders also piggyback the ACK into a
614  	 * DATAACK packet, so this is really for quiescent senders.
615  	 */
616  	unsigned long timeout = jiffies + 2 * HZ;
617  
618  	/* Use new timeout only if there wasn't a older one earlier. */
619  	if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) {
620  		/* If delack timer was blocked or is about to expire,
621  		 * send ACK now.
622  		 *
623  		 * FIXME: check the "about to expire" part
624  		 */
625  		if (icsk->icsk_ack.blocked) {
626  			dccp_send_ack(sk);
627  			return;
628  		}
629  
630  		if (!time_before(timeout, icsk->icsk_ack.timeout))
631  			timeout = icsk->icsk_ack.timeout;
632  	}
633  	icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
634  	icsk->icsk_ack.timeout = timeout;
635  	sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout);
636  }
637  #endif
638  
dccp_send_sync(struct sock * sk,const u64 ackno,const enum dccp_pkt_type pkt_type)639  void dccp_send_sync(struct sock *sk, const u64 ackno,
640  		    const enum dccp_pkt_type pkt_type)
641  {
642  	/*
643  	 * We are not putting this on the write queue, so
644  	 * dccp_transmit_skb() will set the ownership to this
645  	 * sock.
646  	 */
647  	struct sk_buff *skb = alloc_skb(sk->sk_prot->max_header, GFP_ATOMIC);
648  
649  	if (skb == NULL) {
650  		/* FIXME: how to make sure the sync is sent? */
651  		DCCP_CRIT("could not send %s", dccp_packet_name(pkt_type));
652  		return;
653  	}
654  
655  	/* Reserve space for headers and prepare control bits. */
656  	skb_reserve(skb, sk->sk_prot->max_header);
657  	DCCP_SKB_CB(skb)->dccpd_type = pkt_type;
658  	DCCP_SKB_CB(skb)->dccpd_ack_seq = ackno;
659  
660  	/*
661  	 * Clear the flag in case the Sync was scheduled for out-of-band data,
662  	 * such as carrying a long Ack Vector.
663  	 */
664  	dccp_sk(sk)->dccps_sync_scheduled = 0;
665  
666  	dccp_transmit_skb(sk, skb);
667  }
668  
669  EXPORT_SYMBOL_GPL(dccp_send_sync);
670  
671  /*
672   * Send a DCCP_PKT_CLOSE/CLOSEREQ. The caller locks the socket for us. This
673   * cannot be allowed to fail queueing a DCCP_PKT_CLOSE/CLOSEREQ frame under
674   * any circumstances.
675   */
dccp_send_close(struct sock * sk,const int active)676  void dccp_send_close(struct sock *sk, const int active)
677  {
678  	struct dccp_sock *dp = dccp_sk(sk);
679  	struct sk_buff *skb;
680  	const gfp_t prio = active ? GFP_KERNEL : GFP_ATOMIC;
681  
682  	skb = alloc_skb(sk->sk_prot->max_header, prio);
683  	if (skb == NULL)
684  		return;
685  
686  	/* Reserve space for headers and prepare control bits. */
687  	skb_reserve(skb, sk->sk_prot->max_header);
688  	if (dp->dccps_role == DCCP_ROLE_SERVER && !dp->dccps_server_timewait)
689  		DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_CLOSEREQ;
690  	else
691  		DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_CLOSE;
692  
693  	if (active) {
694  		skb = dccp_skb_entail(sk, skb);
695  		/*
696  		 * Retransmission timer for active-close: RFC 4340, 8.3 requires
697  		 * to retransmit the Close/CloseReq until the CLOSING/CLOSEREQ
698  		 * state can be left. The initial timeout is 2 RTTs.
699  		 * Since RTT measurement is done by the CCIDs, there is no easy
700  		 * way to get an RTT sample. The fallback RTT from RFC 4340, 3.4
701  		 * is too low (200ms); we use a high value to avoid unnecessary
702  		 * retransmissions when the link RTT is > 0.2 seconds.
703  		 * FIXME: Let main module sample RTTs and use that instead.
704  		 */
705  		inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
706  					  DCCP_TIMEOUT_INIT, DCCP_RTO_MAX);
707  	}
708  	dccp_transmit_skb(sk, skb);
709  }
710