1  /*
2   * Copyright (c) 2004 Mellanox Technologies Ltd.  All rights reserved.
3   * Copyright (c) 2004 Infinicon Corporation.  All rights reserved.
4   * Copyright (c) 2004 Intel Corporation.  All rights reserved.
5   * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
6   * Copyright (c) 2004 Voltaire Corporation.  All rights reserved.
7   * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
8   * Copyright (c) 2005, 2006 Cisco Systems.  All rights reserved.
9   *
10   * This software is available to you under a choice of one of two
11   * licenses.  You may choose to be licensed under the terms of the GNU
12   * General Public License (GPL) Version 2, available from the file
13   * COPYING in the main directory of this source tree, or the
14   * OpenIB.org BSD license below:
15   *
16   *     Redistribution and use in source and binary forms, with or
17   *     without modification, are permitted provided that the following
18   *     conditions are met:
19   *
20   *      - Redistributions of source code must retain the above
21   *        copyright notice, this list of conditions and the following
22   *        disclaimer.
23   *
24   *      - Redistributions in binary form must reproduce the above
25   *        copyright notice, this list of conditions and the following
26   *        disclaimer in the documentation and/or other materials
27   *        provided with the distribution.
28   *
29   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
30   * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
31   * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
32   * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
33   * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
34   * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
35   * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
36   * SOFTWARE.
37   */
38  
39  #include <linux/errno.h>
40  #include <linux/err.h>
41  #include <linux/export.h>
42  #include <linux/string.h>
43  #include <linux/slab.h>
44  #include <linux/in.h>
45  #include <linux/in6.h>
46  #include <net/addrconf.h>
47  #include <linux/security.h>
48  
49  #include <rdma/ib_verbs.h>
50  #include <rdma/ib_cache.h>
51  #include <rdma/ib_addr.h>
52  #include <rdma/rw.h>
53  #include <rdma/lag.h>
54  
55  #include "core_priv.h"
56  #include <trace/events/rdma_core.h>
57  
58  static int ib_resolve_eth_dmac(struct ib_device *device,
59  			       struct rdma_ah_attr *ah_attr);
60  
61  static const char * const ib_events[] = {
62  	[IB_EVENT_CQ_ERR]		= "CQ error",
63  	[IB_EVENT_QP_FATAL]		= "QP fatal error",
64  	[IB_EVENT_QP_REQ_ERR]		= "QP request error",
65  	[IB_EVENT_QP_ACCESS_ERR]	= "QP access error",
66  	[IB_EVENT_COMM_EST]		= "communication established",
67  	[IB_EVENT_SQ_DRAINED]		= "send queue drained",
68  	[IB_EVENT_PATH_MIG]		= "path migration successful",
69  	[IB_EVENT_PATH_MIG_ERR]		= "path migration error",
70  	[IB_EVENT_DEVICE_FATAL]		= "device fatal error",
71  	[IB_EVENT_PORT_ACTIVE]		= "port active",
72  	[IB_EVENT_PORT_ERR]		= "port error",
73  	[IB_EVENT_LID_CHANGE]		= "LID change",
74  	[IB_EVENT_PKEY_CHANGE]		= "P_key change",
75  	[IB_EVENT_SM_CHANGE]		= "SM change",
76  	[IB_EVENT_SRQ_ERR]		= "SRQ error",
77  	[IB_EVENT_SRQ_LIMIT_REACHED]	= "SRQ limit reached",
78  	[IB_EVENT_QP_LAST_WQE_REACHED]	= "last WQE reached",
79  	[IB_EVENT_CLIENT_REREGISTER]	= "client reregister",
80  	[IB_EVENT_GID_CHANGE]		= "GID changed",
81  };
82  
ib_event_msg(enum ib_event_type event)83  const char *__attribute_const__ ib_event_msg(enum ib_event_type event)
84  {
85  	size_t index = event;
86  
87  	return (index < ARRAY_SIZE(ib_events) && ib_events[index]) ?
88  			ib_events[index] : "unrecognized event";
89  }
90  EXPORT_SYMBOL(ib_event_msg);
91  
92  static const char * const wc_statuses[] = {
93  	[IB_WC_SUCCESS]			= "success",
94  	[IB_WC_LOC_LEN_ERR]		= "local length error",
95  	[IB_WC_LOC_QP_OP_ERR]		= "local QP operation error",
96  	[IB_WC_LOC_EEC_OP_ERR]		= "local EE context operation error",
97  	[IB_WC_LOC_PROT_ERR]		= "local protection error",
98  	[IB_WC_WR_FLUSH_ERR]		= "WR flushed",
99  	[IB_WC_MW_BIND_ERR]		= "memory bind operation error",
100  	[IB_WC_BAD_RESP_ERR]		= "bad response error",
101  	[IB_WC_LOC_ACCESS_ERR]		= "local access error",
102  	[IB_WC_REM_INV_REQ_ERR]		= "remote invalid request error",
103  	[IB_WC_REM_ACCESS_ERR]		= "remote access error",
104  	[IB_WC_REM_OP_ERR]		= "remote operation error",
105  	[IB_WC_RETRY_EXC_ERR]		= "transport retry counter exceeded",
106  	[IB_WC_RNR_RETRY_EXC_ERR]	= "RNR retry counter exceeded",
107  	[IB_WC_LOC_RDD_VIOL_ERR]	= "local RDD violation error",
108  	[IB_WC_REM_INV_RD_REQ_ERR]	= "remote invalid RD request",
109  	[IB_WC_REM_ABORT_ERR]		= "operation aborted",
110  	[IB_WC_INV_EECN_ERR]		= "invalid EE context number",
111  	[IB_WC_INV_EEC_STATE_ERR]	= "invalid EE context state",
112  	[IB_WC_FATAL_ERR]		= "fatal error",
113  	[IB_WC_RESP_TIMEOUT_ERR]	= "response timeout error",
114  	[IB_WC_GENERAL_ERR]		= "general error",
115  };
116  
ib_wc_status_msg(enum ib_wc_status status)117  const char *__attribute_const__ ib_wc_status_msg(enum ib_wc_status status)
118  {
119  	size_t index = status;
120  
121  	return (index < ARRAY_SIZE(wc_statuses) && wc_statuses[index]) ?
122  			wc_statuses[index] : "unrecognized status";
123  }
124  EXPORT_SYMBOL(ib_wc_status_msg);
125  
ib_rate_to_mult(enum ib_rate rate)126  __attribute_const__ int ib_rate_to_mult(enum ib_rate rate)
127  {
128  	switch (rate) {
129  	case IB_RATE_2_5_GBPS: return   1;
130  	case IB_RATE_5_GBPS:   return   2;
131  	case IB_RATE_10_GBPS:  return   4;
132  	case IB_RATE_20_GBPS:  return   8;
133  	case IB_RATE_30_GBPS:  return  12;
134  	case IB_RATE_40_GBPS:  return  16;
135  	case IB_RATE_60_GBPS:  return  24;
136  	case IB_RATE_80_GBPS:  return  32;
137  	case IB_RATE_120_GBPS: return  48;
138  	case IB_RATE_14_GBPS:  return   6;
139  	case IB_RATE_56_GBPS:  return  22;
140  	case IB_RATE_112_GBPS: return  45;
141  	case IB_RATE_168_GBPS: return  67;
142  	case IB_RATE_25_GBPS:  return  10;
143  	case IB_RATE_100_GBPS: return  40;
144  	case IB_RATE_200_GBPS: return  80;
145  	case IB_RATE_300_GBPS: return 120;
146  	case IB_RATE_28_GBPS:  return  11;
147  	case IB_RATE_50_GBPS:  return  20;
148  	case IB_RATE_400_GBPS: return 160;
149  	case IB_RATE_600_GBPS: return 240;
150  	case IB_RATE_800_GBPS: return 320;
151  	default:	       return  -1;
152  	}
153  }
154  EXPORT_SYMBOL(ib_rate_to_mult);
155  
mult_to_ib_rate(int mult)156  __attribute_const__ enum ib_rate mult_to_ib_rate(int mult)
157  {
158  	switch (mult) {
159  	case 1:   return IB_RATE_2_5_GBPS;
160  	case 2:   return IB_RATE_5_GBPS;
161  	case 4:   return IB_RATE_10_GBPS;
162  	case 8:   return IB_RATE_20_GBPS;
163  	case 12:  return IB_RATE_30_GBPS;
164  	case 16:  return IB_RATE_40_GBPS;
165  	case 24:  return IB_RATE_60_GBPS;
166  	case 32:  return IB_RATE_80_GBPS;
167  	case 48:  return IB_RATE_120_GBPS;
168  	case 6:   return IB_RATE_14_GBPS;
169  	case 22:  return IB_RATE_56_GBPS;
170  	case 45:  return IB_RATE_112_GBPS;
171  	case 67:  return IB_RATE_168_GBPS;
172  	case 10:  return IB_RATE_25_GBPS;
173  	case 40:  return IB_RATE_100_GBPS;
174  	case 80:  return IB_RATE_200_GBPS;
175  	case 120: return IB_RATE_300_GBPS;
176  	case 11:  return IB_RATE_28_GBPS;
177  	case 20:  return IB_RATE_50_GBPS;
178  	case 160: return IB_RATE_400_GBPS;
179  	case 240: return IB_RATE_600_GBPS;
180  	case 320: return IB_RATE_800_GBPS;
181  	default:  return IB_RATE_PORT_CURRENT;
182  	}
183  }
184  EXPORT_SYMBOL(mult_to_ib_rate);
185  
ib_rate_to_mbps(enum ib_rate rate)186  __attribute_const__ int ib_rate_to_mbps(enum ib_rate rate)
187  {
188  	switch (rate) {
189  	case IB_RATE_2_5_GBPS: return 2500;
190  	case IB_RATE_5_GBPS:   return 5000;
191  	case IB_RATE_10_GBPS:  return 10000;
192  	case IB_RATE_20_GBPS:  return 20000;
193  	case IB_RATE_30_GBPS:  return 30000;
194  	case IB_RATE_40_GBPS:  return 40000;
195  	case IB_RATE_60_GBPS:  return 60000;
196  	case IB_RATE_80_GBPS:  return 80000;
197  	case IB_RATE_120_GBPS: return 120000;
198  	case IB_RATE_14_GBPS:  return 14062;
199  	case IB_RATE_56_GBPS:  return 56250;
200  	case IB_RATE_112_GBPS: return 112500;
201  	case IB_RATE_168_GBPS: return 168750;
202  	case IB_RATE_25_GBPS:  return 25781;
203  	case IB_RATE_100_GBPS: return 103125;
204  	case IB_RATE_200_GBPS: return 206250;
205  	case IB_RATE_300_GBPS: return 309375;
206  	case IB_RATE_28_GBPS:  return 28125;
207  	case IB_RATE_50_GBPS:  return 53125;
208  	case IB_RATE_400_GBPS: return 425000;
209  	case IB_RATE_600_GBPS: return 637500;
210  	case IB_RATE_800_GBPS: return 850000;
211  	default:	       return -1;
212  	}
213  }
214  EXPORT_SYMBOL(ib_rate_to_mbps);
215  
216  __attribute_const__ enum rdma_transport_type
rdma_node_get_transport(unsigned int node_type)217  rdma_node_get_transport(unsigned int node_type)
218  {
219  
220  	if (node_type == RDMA_NODE_USNIC)
221  		return RDMA_TRANSPORT_USNIC;
222  	if (node_type == RDMA_NODE_USNIC_UDP)
223  		return RDMA_TRANSPORT_USNIC_UDP;
224  	if (node_type == RDMA_NODE_RNIC)
225  		return RDMA_TRANSPORT_IWARP;
226  	if (node_type == RDMA_NODE_UNSPECIFIED)
227  		return RDMA_TRANSPORT_UNSPECIFIED;
228  
229  	return RDMA_TRANSPORT_IB;
230  }
231  EXPORT_SYMBOL(rdma_node_get_transport);
232  
rdma_port_get_link_layer(struct ib_device * device,u32 port_num)233  enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device,
234  					      u32 port_num)
235  {
236  	enum rdma_transport_type lt;
237  	if (device->ops.get_link_layer)
238  		return device->ops.get_link_layer(device, port_num);
239  
240  	lt = rdma_node_get_transport(device->node_type);
241  	if (lt == RDMA_TRANSPORT_IB)
242  		return IB_LINK_LAYER_INFINIBAND;
243  
244  	return IB_LINK_LAYER_ETHERNET;
245  }
246  EXPORT_SYMBOL(rdma_port_get_link_layer);
247  
248  /* Protection domains */
249  
250  /**
251   * __ib_alloc_pd - Allocates an unused protection domain.
252   * @device: The device on which to allocate the protection domain.
253   * @flags: protection domain flags
254   * @caller: caller's build-time module name
255   *
256   * A protection domain object provides an association between QPs, shared
257   * receive queues, address handles, memory regions, and memory windows.
258   *
259   * Every PD has a local_dma_lkey which can be used as the lkey value for local
260   * memory operations.
261   */
__ib_alloc_pd(struct ib_device * device,unsigned int flags,const char * caller)262  struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
263  		const char *caller)
264  {
265  	struct ib_pd *pd;
266  	int mr_access_flags = 0;
267  	int ret;
268  
269  	pd = rdma_zalloc_drv_obj(device, ib_pd);
270  	if (!pd)
271  		return ERR_PTR(-ENOMEM);
272  
273  	pd->device = device;
274  	pd->flags = flags;
275  
276  	rdma_restrack_new(&pd->res, RDMA_RESTRACK_PD);
277  	rdma_restrack_set_name(&pd->res, caller);
278  
279  	ret = device->ops.alloc_pd(pd, NULL);
280  	if (ret) {
281  		rdma_restrack_put(&pd->res);
282  		kfree(pd);
283  		return ERR_PTR(ret);
284  	}
285  	rdma_restrack_add(&pd->res);
286  
287  	if (device->attrs.kernel_cap_flags & IBK_LOCAL_DMA_LKEY)
288  		pd->local_dma_lkey = device->local_dma_lkey;
289  	else
290  		mr_access_flags |= IB_ACCESS_LOCAL_WRITE;
291  
292  	if (flags & IB_PD_UNSAFE_GLOBAL_RKEY) {
293  		pr_warn("%s: enabling unsafe global rkey\n", caller);
294  		mr_access_flags |= IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_WRITE;
295  	}
296  
297  	if (mr_access_flags) {
298  		struct ib_mr *mr;
299  
300  		mr = pd->device->ops.get_dma_mr(pd, mr_access_flags);
301  		if (IS_ERR(mr)) {
302  			ib_dealloc_pd(pd);
303  			return ERR_CAST(mr);
304  		}
305  
306  		mr->device	= pd->device;
307  		mr->pd		= pd;
308  		mr->type        = IB_MR_TYPE_DMA;
309  		mr->uobject	= NULL;
310  		mr->need_inval	= false;
311  
312  		pd->__internal_mr = mr;
313  
314  		if (!(device->attrs.kernel_cap_flags & IBK_LOCAL_DMA_LKEY))
315  			pd->local_dma_lkey = pd->__internal_mr->lkey;
316  
317  		if (flags & IB_PD_UNSAFE_GLOBAL_RKEY)
318  			pd->unsafe_global_rkey = pd->__internal_mr->rkey;
319  	}
320  
321  	return pd;
322  }
323  EXPORT_SYMBOL(__ib_alloc_pd);
324  
325  /**
326   * ib_dealloc_pd_user - Deallocates a protection domain.
327   * @pd: The protection domain to deallocate.
328   * @udata: Valid user data or NULL for kernel object
329   *
330   * It is an error to call this function while any resources in the pd still
331   * exist.  The caller is responsible to synchronously destroy them and
332   * guarantee no new allocations will happen.
333   */
ib_dealloc_pd_user(struct ib_pd * pd,struct ib_udata * udata)334  int ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata)
335  {
336  	int ret;
337  
338  	if (pd->__internal_mr) {
339  		ret = pd->device->ops.dereg_mr(pd->__internal_mr, NULL);
340  		WARN_ON(ret);
341  		pd->__internal_mr = NULL;
342  	}
343  
344  	ret = pd->device->ops.dealloc_pd(pd, udata);
345  	if (ret)
346  		return ret;
347  
348  	rdma_restrack_del(&pd->res);
349  	kfree(pd);
350  	return ret;
351  }
352  EXPORT_SYMBOL(ib_dealloc_pd_user);
353  
354  /* Address handles */
355  
356  /**
357   * rdma_copy_ah_attr - Copy rdma ah attribute from source to destination.
358   * @dest:       Pointer to destination ah_attr. Contents of the destination
359   *              pointer is assumed to be invalid and attribute are overwritten.
360   * @src:        Pointer to source ah_attr.
361   */
rdma_copy_ah_attr(struct rdma_ah_attr * dest,const struct rdma_ah_attr * src)362  void rdma_copy_ah_attr(struct rdma_ah_attr *dest,
363  		       const struct rdma_ah_attr *src)
364  {
365  	*dest = *src;
366  	if (dest->grh.sgid_attr)
367  		rdma_hold_gid_attr(dest->grh.sgid_attr);
368  }
369  EXPORT_SYMBOL(rdma_copy_ah_attr);
370  
371  /**
372   * rdma_replace_ah_attr - Replace valid ah_attr with new one.
373   * @old:        Pointer to existing ah_attr which needs to be replaced.
374   *              old is assumed to be valid or zero'd
375   * @new:        Pointer to the new ah_attr.
376   *
377   * rdma_replace_ah_attr() first releases any reference in the old ah_attr if
378   * old the ah_attr is valid; after that it copies the new attribute and holds
379   * the reference to the replaced ah_attr.
380   */
rdma_replace_ah_attr(struct rdma_ah_attr * old,const struct rdma_ah_attr * new)381  void rdma_replace_ah_attr(struct rdma_ah_attr *old,
382  			  const struct rdma_ah_attr *new)
383  {
384  	rdma_destroy_ah_attr(old);
385  	*old = *new;
386  	if (old->grh.sgid_attr)
387  		rdma_hold_gid_attr(old->grh.sgid_attr);
388  }
389  EXPORT_SYMBOL(rdma_replace_ah_attr);
390  
391  /**
392   * rdma_move_ah_attr - Move ah_attr pointed by source to destination.
393   * @dest:       Pointer to destination ah_attr to copy to.
394   *              dest is assumed to be valid or zero'd
395   * @src:        Pointer to the new ah_attr.
396   *
397   * rdma_move_ah_attr() first releases any reference in the destination ah_attr
398   * if it is valid. This also transfers ownership of internal references from
399   * src to dest, making src invalid in the process. No new reference of the src
400   * ah_attr is taken.
401   */
rdma_move_ah_attr(struct rdma_ah_attr * dest,struct rdma_ah_attr * src)402  void rdma_move_ah_attr(struct rdma_ah_attr *dest, struct rdma_ah_attr *src)
403  {
404  	rdma_destroy_ah_attr(dest);
405  	*dest = *src;
406  	src->grh.sgid_attr = NULL;
407  }
408  EXPORT_SYMBOL(rdma_move_ah_attr);
409  
410  /*
411   * Validate that the rdma_ah_attr is valid for the device before passing it
412   * off to the driver.
413   */
rdma_check_ah_attr(struct ib_device * device,struct rdma_ah_attr * ah_attr)414  static int rdma_check_ah_attr(struct ib_device *device,
415  			      struct rdma_ah_attr *ah_attr)
416  {
417  	if (!rdma_is_port_valid(device, ah_attr->port_num))
418  		return -EINVAL;
419  
420  	if ((rdma_is_grh_required(device, ah_attr->port_num) ||
421  	     ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) &&
422  	    !(ah_attr->ah_flags & IB_AH_GRH))
423  		return -EINVAL;
424  
425  	if (ah_attr->grh.sgid_attr) {
426  		/*
427  		 * Make sure the passed sgid_attr is consistent with the
428  		 * parameters
429  		 */
430  		if (ah_attr->grh.sgid_attr->index != ah_attr->grh.sgid_index ||
431  		    ah_attr->grh.sgid_attr->port_num != ah_attr->port_num)
432  			return -EINVAL;
433  	}
434  	return 0;
435  }
436  
437  /*
438   * If the ah requires a GRH then ensure that sgid_attr pointer is filled in.
439   * On success the caller is responsible to call rdma_unfill_sgid_attr().
440   */
rdma_fill_sgid_attr(struct ib_device * device,struct rdma_ah_attr * ah_attr,const struct ib_gid_attr ** old_sgid_attr)441  static int rdma_fill_sgid_attr(struct ib_device *device,
442  			       struct rdma_ah_attr *ah_attr,
443  			       const struct ib_gid_attr **old_sgid_attr)
444  {
445  	const struct ib_gid_attr *sgid_attr;
446  	struct ib_global_route *grh;
447  	int ret;
448  
449  	*old_sgid_attr = ah_attr->grh.sgid_attr;
450  
451  	ret = rdma_check_ah_attr(device, ah_attr);
452  	if (ret)
453  		return ret;
454  
455  	if (!(ah_attr->ah_flags & IB_AH_GRH))
456  		return 0;
457  
458  	grh = rdma_ah_retrieve_grh(ah_attr);
459  	if (grh->sgid_attr)
460  		return 0;
461  
462  	sgid_attr =
463  		rdma_get_gid_attr(device, ah_attr->port_num, grh->sgid_index);
464  	if (IS_ERR(sgid_attr))
465  		return PTR_ERR(sgid_attr);
466  
467  	/* Move ownerhip of the kref into the ah_attr */
468  	grh->sgid_attr = sgid_attr;
469  	return 0;
470  }
471  
rdma_unfill_sgid_attr(struct rdma_ah_attr * ah_attr,const struct ib_gid_attr * old_sgid_attr)472  static void rdma_unfill_sgid_attr(struct rdma_ah_attr *ah_attr,
473  				  const struct ib_gid_attr *old_sgid_attr)
474  {
475  	/*
476  	 * Fill didn't change anything, the caller retains ownership of
477  	 * whatever it passed
478  	 */
479  	if (ah_attr->grh.sgid_attr == old_sgid_attr)
480  		return;
481  
482  	/*
483  	 * Otherwise, we need to undo what rdma_fill_sgid_attr so the caller
484  	 * doesn't see any change in the rdma_ah_attr. If we get here
485  	 * old_sgid_attr is NULL.
486  	 */
487  	rdma_destroy_ah_attr(ah_attr);
488  }
489  
490  static const struct ib_gid_attr *
rdma_update_sgid_attr(struct rdma_ah_attr * ah_attr,const struct ib_gid_attr * old_attr)491  rdma_update_sgid_attr(struct rdma_ah_attr *ah_attr,
492  		      const struct ib_gid_attr *old_attr)
493  {
494  	if (old_attr)
495  		rdma_put_gid_attr(old_attr);
496  	if (ah_attr->ah_flags & IB_AH_GRH) {
497  		rdma_hold_gid_attr(ah_attr->grh.sgid_attr);
498  		return ah_attr->grh.sgid_attr;
499  	}
500  	return NULL;
501  }
502  
_rdma_create_ah(struct ib_pd * pd,struct rdma_ah_attr * ah_attr,u32 flags,struct ib_udata * udata,struct net_device * xmit_slave)503  static struct ib_ah *_rdma_create_ah(struct ib_pd *pd,
504  				     struct rdma_ah_attr *ah_attr,
505  				     u32 flags,
506  				     struct ib_udata *udata,
507  				     struct net_device *xmit_slave)
508  {
509  	struct rdma_ah_init_attr init_attr = {};
510  	struct ib_device *device = pd->device;
511  	struct ib_ah *ah;
512  	int ret;
513  
514  	might_sleep_if(flags & RDMA_CREATE_AH_SLEEPABLE);
515  
516  	if (!udata && !device->ops.create_ah)
517  		return ERR_PTR(-EOPNOTSUPP);
518  
519  	ah = rdma_zalloc_drv_obj_gfp(
520  		device, ib_ah,
521  		(flags & RDMA_CREATE_AH_SLEEPABLE) ? GFP_KERNEL : GFP_ATOMIC);
522  	if (!ah)
523  		return ERR_PTR(-ENOMEM);
524  
525  	ah->device = device;
526  	ah->pd = pd;
527  	ah->type = ah_attr->type;
528  	ah->sgid_attr = rdma_update_sgid_attr(ah_attr, NULL);
529  	init_attr.ah_attr = ah_attr;
530  	init_attr.flags = flags;
531  	init_attr.xmit_slave = xmit_slave;
532  
533  	if (udata)
534  		ret = device->ops.create_user_ah(ah, &init_attr, udata);
535  	else
536  		ret = device->ops.create_ah(ah, &init_attr, NULL);
537  	if (ret) {
538  		if (ah->sgid_attr)
539  			rdma_put_gid_attr(ah->sgid_attr);
540  		kfree(ah);
541  		return ERR_PTR(ret);
542  	}
543  
544  	atomic_inc(&pd->usecnt);
545  	return ah;
546  }
547  
548  /**
549   * rdma_create_ah - Creates an address handle for the
550   * given address vector.
551   * @pd: The protection domain associated with the address handle.
552   * @ah_attr: The attributes of the address vector.
553   * @flags: Create address handle flags (see enum rdma_create_ah_flags).
554   *
555   * It returns 0 on success and returns appropriate error code on error.
556   * The address handle is used to reference a local or global destination
557   * in all UD QP post sends.
558   */
rdma_create_ah(struct ib_pd * pd,struct rdma_ah_attr * ah_attr,u32 flags)559  struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
560  			     u32 flags)
561  {
562  	const struct ib_gid_attr *old_sgid_attr;
563  	struct net_device *slave;
564  	struct ib_ah *ah;
565  	int ret;
566  
567  	ret = rdma_fill_sgid_attr(pd->device, ah_attr, &old_sgid_attr);
568  	if (ret)
569  		return ERR_PTR(ret);
570  	slave = rdma_lag_get_ah_roce_slave(pd->device, ah_attr,
571  					   (flags & RDMA_CREATE_AH_SLEEPABLE) ?
572  					   GFP_KERNEL : GFP_ATOMIC);
573  	if (IS_ERR(slave)) {
574  		rdma_unfill_sgid_attr(ah_attr, old_sgid_attr);
575  		return (void *)slave;
576  	}
577  	ah = _rdma_create_ah(pd, ah_attr, flags, NULL, slave);
578  	rdma_lag_put_ah_roce_slave(slave);
579  	rdma_unfill_sgid_attr(ah_attr, old_sgid_attr);
580  	return ah;
581  }
582  EXPORT_SYMBOL(rdma_create_ah);
583  
584  /**
585   * rdma_create_user_ah - Creates an address handle for the
586   * given address vector.
587   * It resolves destination mac address for ah attribute of RoCE type.
588   * @pd: The protection domain associated with the address handle.
589   * @ah_attr: The attributes of the address vector.
590   * @udata: pointer to user's input output buffer information need by
591   *         provider driver.
592   *
593   * It returns 0 on success and returns appropriate error code on error.
594   * The address handle is used to reference a local or global destination
595   * in all UD QP post sends.
596   */
rdma_create_user_ah(struct ib_pd * pd,struct rdma_ah_attr * ah_attr,struct ib_udata * udata)597  struct ib_ah *rdma_create_user_ah(struct ib_pd *pd,
598  				  struct rdma_ah_attr *ah_attr,
599  				  struct ib_udata *udata)
600  {
601  	const struct ib_gid_attr *old_sgid_attr;
602  	struct ib_ah *ah;
603  	int err;
604  
605  	err = rdma_fill_sgid_attr(pd->device, ah_attr, &old_sgid_attr);
606  	if (err)
607  		return ERR_PTR(err);
608  
609  	if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) {
610  		err = ib_resolve_eth_dmac(pd->device, ah_attr);
611  		if (err) {
612  			ah = ERR_PTR(err);
613  			goto out;
614  		}
615  	}
616  
617  	ah = _rdma_create_ah(pd, ah_attr, RDMA_CREATE_AH_SLEEPABLE,
618  			     udata, NULL);
619  
620  out:
621  	rdma_unfill_sgid_attr(ah_attr, old_sgid_attr);
622  	return ah;
623  }
624  EXPORT_SYMBOL(rdma_create_user_ah);
625  
ib_get_rdma_header_version(const union rdma_network_hdr * hdr)626  int ib_get_rdma_header_version(const union rdma_network_hdr *hdr)
627  {
628  	const struct iphdr *ip4h = (struct iphdr *)&hdr->roce4grh;
629  	struct iphdr ip4h_checked;
630  	const struct ipv6hdr *ip6h = (struct ipv6hdr *)&hdr->ibgrh;
631  
632  	/* If it's IPv6, the version must be 6, otherwise, the first
633  	 * 20 bytes (before the IPv4 header) are garbled.
634  	 */
635  	if (ip6h->version != 6)
636  		return (ip4h->version == 4) ? 4 : 0;
637  	/* version may be 6 or 4 because the first 20 bytes could be garbled */
638  
639  	/* RoCE v2 requires no options, thus header length
640  	 * must be 5 words
641  	 */
642  	if (ip4h->ihl != 5)
643  		return 6;
644  
645  	/* Verify checksum.
646  	 * We can't write on scattered buffers so we need to copy to
647  	 * temp buffer.
648  	 */
649  	memcpy(&ip4h_checked, ip4h, sizeof(ip4h_checked));
650  	ip4h_checked.check = 0;
651  	ip4h_checked.check = ip_fast_csum((u8 *)&ip4h_checked, 5);
652  	/* if IPv4 header checksum is OK, believe it */
653  	if (ip4h->check == ip4h_checked.check)
654  		return 4;
655  	return 6;
656  }
657  EXPORT_SYMBOL(ib_get_rdma_header_version);
658  
ib_get_net_type_by_grh(struct ib_device * device,u32 port_num,const struct ib_grh * grh)659  static enum rdma_network_type ib_get_net_type_by_grh(struct ib_device *device,
660  						     u32 port_num,
661  						     const struct ib_grh *grh)
662  {
663  	int grh_version;
664  
665  	if (rdma_protocol_ib(device, port_num))
666  		return RDMA_NETWORK_IB;
667  
668  	grh_version = ib_get_rdma_header_version((union rdma_network_hdr *)grh);
669  
670  	if (grh_version == 4)
671  		return RDMA_NETWORK_IPV4;
672  
673  	if (grh->next_hdr == IPPROTO_UDP)
674  		return RDMA_NETWORK_IPV6;
675  
676  	return RDMA_NETWORK_ROCE_V1;
677  }
678  
679  struct find_gid_index_context {
680  	u16 vlan_id;
681  	enum ib_gid_type gid_type;
682  };
683  
find_gid_index(const union ib_gid * gid,const struct ib_gid_attr * gid_attr,void * context)684  static bool find_gid_index(const union ib_gid *gid,
685  			   const struct ib_gid_attr *gid_attr,
686  			   void *context)
687  {
688  	struct find_gid_index_context *ctx = context;
689  	u16 vlan_id = 0xffff;
690  	int ret;
691  
692  	if (ctx->gid_type != gid_attr->gid_type)
693  		return false;
694  
695  	ret = rdma_read_gid_l2_fields(gid_attr, &vlan_id, NULL);
696  	if (ret)
697  		return false;
698  
699  	return ctx->vlan_id == vlan_id;
700  }
701  
702  static const struct ib_gid_attr *
get_sgid_attr_from_eth(struct ib_device * device,u32 port_num,u16 vlan_id,const union ib_gid * sgid,enum ib_gid_type gid_type)703  get_sgid_attr_from_eth(struct ib_device *device, u32 port_num,
704  		       u16 vlan_id, const union ib_gid *sgid,
705  		       enum ib_gid_type gid_type)
706  {
707  	struct find_gid_index_context context = {.vlan_id = vlan_id,
708  						 .gid_type = gid_type};
709  
710  	return rdma_find_gid_by_filter(device, sgid, port_num, find_gid_index,
711  				       &context);
712  }
713  
ib_get_gids_from_rdma_hdr(const union rdma_network_hdr * hdr,enum rdma_network_type net_type,union ib_gid * sgid,union ib_gid * dgid)714  int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr,
715  			      enum rdma_network_type net_type,
716  			      union ib_gid *sgid, union ib_gid *dgid)
717  {
718  	struct sockaddr_in  src_in;
719  	struct sockaddr_in  dst_in;
720  	__be32 src_saddr, dst_saddr;
721  
722  	if (!sgid || !dgid)
723  		return -EINVAL;
724  
725  	if (net_type == RDMA_NETWORK_IPV4) {
726  		memcpy(&src_in.sin_addr.s_addr,
727  		       &hdr->roce4grh.saddr, 4);
728  		memcpy(&dst_in.sin_addr.s_addr,
729  		       &hdr->roce4grh.daddr, 4);
730  		src_saddr = src_in.sin_addr.s_addr;
731  		dst_saddr = dst_in.sin_addr.s_addr;
732  		ipv6_addr_set_v4mapped(src_saddr,
733  				       (struct in6_addr *)sgid);
734  		ipv6_addr_set_v4mapped(dst_saddr,
735  				       (struct in6_addr *)dgid);
736  		return 0;
737  	} else if (net_type == RDMA_NETWORK_IPV6 ||
738  		   net_type == RDMA_NETWORK_IB || RDMA_NETWORK_ROCE_V1) {
739  		*dgid = hdr->ibgrh.dgid;
740  		*sgid = hdr->ibgrh.sgid;
741  		return 0;
742  	} else {
743  		return -EINVAL;
744  	}
745  }
746  EXPORT_SYMBOL(ib_get_gids_from_rdma_hdr);
747  
748  /* Resolve destination mac address and hop limit for unicast destination
749   * GID entry, considering the source GID entry as well.
750   * ah_attribute must have valid port_num, sgid_index.
751   */
ib_resolve_unicast_gid_dmac(struct ib_device * device,struct rdma_ah_attr * ah_attr)752  static int ib_resolve_unicast_gid_dmac(struct ib_device *device,
753  				       struct rdma_ah_attr *ah_attr)
754  {
755  	struct ib_global_route *grh = rdma_ah_retrieve_grh(ah_attr);
756  	const struct ib_gid_attr *sgid_attr = grh->sgid_attr;
757  	int hop_limit = 0xff;
758  	int ret = 0;
759  
760  	/* If destination is link local and source GID is RoCEv1,
761  	 * IP stack is not used.
762  	 */
763  	if (rdma_link_local_addr((struct in6_addr *)grh->dgid.raw) &&
764  	    sgid_attr->gid_type == IB_GID_TYPE_ROCE) {
765  		rdma_get_ll_mac((struct in6_addr *)grh->dgid.raw,
766  				ah_attr->roce.dmac);
767  		return ret;
768  	}
769  
770  	ret = rdma_addr_find_l2_eth_by_grh(&sgid_attr->gid, &grh->dgid,
771  					   ah_attr->roce.dmac,
772  					   sgid_attr, &hop_limit);
773  
774  	grh->hop_limit = hop_limit;
775  	return ret;
776  }
777  
778  /*
779   * This function initializes address handle attributes from the incoming packet.
780   * Incoming packet has dgid of the receiver node on which this code is
781   * getting executed and, sgid contains the GID of the sender.
782   *
783   * When resolving mac address of destination, the arrived dgid is used
784   * as sgid and, sgid is used as dgid because sgid contains destinations
785   * GID whom to respond to.
786   *
787   * On success the caller is responsible to call rdma_destroy_ah_attr on the
788   * attr.
789   */
ib_init_ah_attr_from_wc(struct ib_device * device,u32 port_num,const struct ib_wc * wc,const struct ib_grh * grh,struct rdma_ah_attr * ah_attr)790  int ib_init_ah_attr_from_wc(struct ib_device *device, u32 port_num,
791  			    const struct ib_wc *wc, const struct ib_grh *grh,
792  			    struct rdma_ah_attr *ah_attr)
793  {
794  	u32 flow_class;
795  	int ret;
796  	enum rdma_network_type net_type = RDMA_NETWORK_IB;
797  	enum ib_gid_type gid_type = IB_GID_TYPE_IB;
798  	const struct ib_gid_attr *sgid_attr;
799  	int hoplimit = 0xff;
800  	union ib_gid dgid;
801  	union ib_gid sgid;
802  
803  	might_sleep();
804  
805  	memset(ah_attr, 0, sizeof *ah_attr);
806  	ah_attr->type = rdma_ah_find_type(device, port_num);
807  	if (rdma_cap_eth_ah(device, port_num)) {
808  		if (wc->wc_flags & IB_WC_WITH_NETWORK_HDR_TYPE)
809  			net_type = wc->network_hdr_type;
810  		else
811  			net_type = ib_get_net_type_by_grh(device, port_num, grh);
812  		gid_type = ib_network_to_gid_type(net_type);
813  	}
814  	ret = ib_get_gids_from_rdma_hdr((union rdma_network_hdr *)grh, net_type,
815  					&sgid, &dgid);
816  	if (ret)
817  		return ret;
818  
819  	rdma_ah_set_sl(ah_attr, wc->sl);
820  	rdma_ah_set_port_num(ah_attr, port_num);
821  
822  	if (rdma_protocol_roce(device, port_num)) {
823  		u16 vlan_id = wc->wc_flags & IB_WC_WITH_VLAN ?
824  				wc->vlan_id : 0xffff;
825  
826  		if (!(wc->wc_flags & IB_WC_GRH))
827  			return -EPROTOTYPE;
828  
829  		sgid_attr = get_sgid_attr_from_eth(device, port_num,
830  						   vlan_id, &dgid,
831  						   gid_type);
832  		if (IS_ERR(sgid_attr))
833  			return PTR_ERR(sgid_attr);
834  
835  		flow_class = be32_to_cpu(grh->version_tclass_flow);
836  		rdma_move_grh_sgid_attr(ah_attr,
837  					&sgid,
838  					flow_class & 0xFFFFF,
839  					hoplimit,
840  					(flow_class >> 20) & 0xFF,
841  					sgid_attr);
842  
843  		ret = ib_resolve_unicast_gid_dmac(device, ah_attr);
844  		if (ret)
845  			rdma_destroy_ah_attr(ah_attr);
846  
847  		return ret;
848  	} else {
849  		rdma_ah_set_dlid(ah_attr, wc->slid);
850  		rdma_ah_set_path_bits(ah_attr, wc->dlid_path_bits);
851  
852  		if ((wc->wc_flags & IB_WC_GRH) == 0)
853  			return 0;
854  
855  		if (dgid.global.interface_id !=
856  					cpu_to_be64(IB_SA_WELL_KNOWN_GUID)) {
857  			sgid_attr = rdma_find_gid_by_port(
858  				device, &dgid, IB_GID_TYPE_IB, port_num, NULL);
859  		} else
860  			sgid_attr = rdma_get_gid_attr(device, port_num, 0);
861  
862  		if (IS_ERR(sgid_attr))
863  			return PTR_ERR(sgid_attr);
864  		flow_class = be32_to_cpu(grh->version_tclass_flow);
865  		rdma_move_grh_sgid_attr(ah_attr,
866  					&sgid,
867  					flow_class & 0xFFFFF,
868  					hoplimit,
869  					(flow_class >> 20) & 0xFF,
870  					sgid_attr);
871  
872  		return 0;
873  	}
874  }
875  EXPORT_SYMBOL(ib_init_ah_attr_from_wc);
876  
877  /**
878   * rdma_move_grh_sgid_attr - Sets the sgid attribute of GRH, taking ownership
879   * of the reference
880   *
881   * @attr:	Pointer to AH attribute structure
882   * @dgid:	Destination GID
883   * @flow_label:	Flow label
884   * @hop_limit:	Hop limit
885   * @traffic_class: traffic class
886   * @sgid_attr:	Pointer to SGID attribute
887   *
888   * This takes ownership of the sgid_attr reference. The caller must ensure
889   * rdma_destroy_ah_attr() is called before destroying the rdma_ah_attr after
890   * calling this function.
891   */
rdma_move_grh_sgid_attr(struct rdma_ah_attr * attr,union ib_gid * dgid,u32 flow_label,u8 hop_limit,u8 traffic_class,const struct ib_gid_attr * sgid_attr)892  void rdma_move_grh_sgid_attr(struct rdma_ah_attr *attr, union ib_gid *dgid,
893  			     u32 flow_label, u8 hop_limit, u8 traffic_class,
894  			     const struct ib_gid_attr *sgid_attr)
895  {
896  	rdma_ah_set_grh(attr, dgid, flow_label, sgid_attr->index, hop_limit,
897  			traffic_class);
898  	attr->grh.sgid_attr = sgid_attr;
899  }
900  EXPORT_SYMBOL(rdma_move_grh_sgid_attr);
901  
902  /**
903   * rdma_destroy_ah_attr - Release reference to SGID attribute of
904   * ah attribute.
905   * @ah_attr: Pointer to ah attribute
906   *
907   * Release reference to the SGID attribute of the ah attribute if it is
908   * non NULL. It is safe to call this multiple times, and safe to call it on
909   * a zero initialized ah_attr.
910   */
rdma_destroy_ah_attr(struct rdma_ah_attr * ah_attr)911  void rdma_destroy_ah_attr(struct rdma_ah_attr *ah_attr)
912  {
913  	if (ah_attr->grh.sgid_attr) {
914  		rdma_put_gid_attr(ah_attr->grh.sgid_attr);
915  		ah_attr->grh.sgid_attr = NULL;
916  	}
917  }
918  EXPORT_SYMBOL(rdma_destroy_ah_attr);
919  
ib_create_ah_from_wc(struct ib_pd * pd,const struct ib_wc * wc,const struct ib_grh * grh,u32 port_num)920  struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc,
921  				   const struct ib_grh *grh, u32 port_num)
922  {
923  	struct rdma_ah_attr ah_attr;
924  	struct ib_ah *ah;
925  	int ret;
926  
927  	ret = ib_init_ah_attr_from_wc(pd->device, port_num, wc, grh, &ah_attr);
928  	if (ret)
929  		return ERR_PTR(ret);
930  
931  	ah = rdma_create_ah(pd, &ah_attr, RDMA_CREATE_AH_SLEEPABLE);
932  
933  	rdma_destroy_ah_attr(&ah_attr);
934  	return ah;
935  }
936  EXPORT_SYMBOL(ib_create_ah_from_wc);
937  
rdma_modify_ah(struct ib_ah * ah,struct rdma_ah_attr * ah_attr)938  int rdma_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr)
939  {
940  	const struct ib_gid_attr *old_sgid_attr;
941  	int ret;
942  
943  	if (ah->type != ah_attr->type)
944  		return -EINVAL;
945  
946  	ret = rdma_fill_sgid_attr(ah->device, ah_attr, &old_sgid_attr);
947  	if (ret)
948  		return ret;
949  
950  	ret = ah->device->ops.modify_ah ?
951  		ah->device->ops.modify_ah(ah, ah_attr) :
952  		-EOPNOTSUPP;
953  
954  	ah->sgid_attr = rdma_update_sgid_attr(ah_attr, ah->sgid_attr);
955  	rdma_unfill_sgid_attr(ah_attr, old_sgid_attr);
956  	return ret;
957  }
958  EXPORT_SYMBOL(rdma_modify_ah);
959  
rdma_query_ah(struct ib_ah * ah,struct rdma_ah_attr * ah_attr)960  int rdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr)
961  {
962  	ah_attr->grh.sgid_attr = NULL;
963  
964  	return ah->device->ops.query_ah ?
965  		ah->device->ops.query_ah(ah, ah_attr) :
966  		-EOPNOTSUPP;
967  }
968  EXPORT_SYMBOL(rdma_query_ah);
969  
rdma_destroy_ah_user(struct ib_ah * ah,u32 flags,struct ib_udata * udata)970  int rdma_destroy_ah_user(struct ib_ah *ah, u32 flags, struct ib_udata *udata)
971  {
972  	const struct ib_gid_attr *sgid_attr = ah->sgid_attr;
973  	struct ib_pd *pd;
974  	int ret;
975  
976  	might_sleep_if(flags & RDMA_DESTROY_AH_SLEEPABLE);
977  
978  	pd = ah->pd;
979  
980  	ret = ah->device->ops.destroy_ah(ah, flags);
981  	if (ret)
982  		return ret;
983  
984  	atomic_dec(&pd->usecnt);
985  	if (sgid_attr)
986  		rdma_put_gid_attr(sgid_attr);
987  
988  	kfree(ah);
989  	return ret;
990  }
991  EXPORT_SYMBOL(rdma_destroy_ah_user);
992  
993  /* Shared receive queues */
994  
995  /**
996   * ib_create_srq_user - Creates a SRQ associated with the specified protection
997   *   domain.
998   * @pd: The protection domain associated with the SRQ.
999   * @srq_init_attr: A list of initial attributes required to create the
1000   *   SRQ.  If SRQ creation succeeds, then the attributes are updated to
1001   *   the actual capabilities of the created SRQ.
1002   * @uobject: uobject pointer if this is not a kernel SRQ
1003   * @udata: udata pointer if this is not a kernel SRQ
1004   *
1005   * srq_attr->max_wr and srq_attr->max_sge are read the determine the
1006   * requested size of the SRQ, and set to the actual values allocated
1007   * on return.  If ib_create_srq() succeeds, then max_wr and max_sge
1008   * will always be at least as large as the requested values.
1009   */
ib_create_srq_user(struct ib_pd * pd,struct ib_srq_init_attr * srq_init_attr,struct ib_usrq_object * uobject,struct ib_udata * udata)1010  struct ib_srq *ib_create_srq_user(struct ib_pd *pd,
1011  				  struct ib_srq_init_attr *srq_init_attr,
1012  				  struct ib_usrq_object *uobject,
1013  				  struct ib_udata *udata)
1014  {
1015  	struct ib_srq *srq;
1016  	int ret;
1017  
1018  	srq = rdma_zalloc_drv_obj(pd->device, ib_srq);
1019  	if (!srq)
1020  		return ERR_PTR(-ENOMEM);
1021  
1022  	srq->device = pd->device;
1023  	srq->pd = pd;
1024  	srq->event_handler = srq_init_attr->event_handler;
1025  	srq->srq_context = srq_init_attr->srq_context;
1026  	srq->srq_type = srq_init_attr->srq_type;
1027  	srq->uobject = uobject;
1028  
1029  	if (ib_srq_has_cq(srq->srq_type)) {
1030  		srq->ext.cq = srq_init_attr->ext.cq;
1031  		atomic_inc(&srq->ext.cq->usecnt);
1032  	}
1033  	if (srq->srq_type == IB_SRQT_XRC) {
1034  		srq->ext.xrc.xrcd = srq_init_attr->ext.xrc.xrcd;
1035  		if (srq->ext.xrc.xrcd)
1036  			atomic_inc(&srq->ext.xrc.xrcd->usecnt);
1037  	}
1038  	atomic_inc(&pd->usecnt);
1039  
1040  	rdma_restrack_new(&srq->res, RDMA_RESTRACK_SRQ);
1041  	rdma_restrack_parent_name(&srq->res, &pd->res);
1042  
1043  	ret = pd->device->ops.create_srq(srq, srq_init_attr, udata);
1044  	if (ret) {
1045  		rdma_restrack_put(&srq->res);
1046  		atomic_dec(&pd->usecnt);
1047  		if (srq->srq_type == IB_SRQT_XRC && srq->ext.xrc.xrcd)
1048  			atomic_dec(&srq->ext.xrc.xrcd->usecnt);
1049  		if (ib_srq_has_cq(srq->srq_type))
1050  			atomic_dec(&srq->ext.cq->usecnt);
1051  		kfree(srq);
1052  		return ERR_PTR(ret);
1053  	}
1054  
1055  	rdma_restrack_add(&srq->res);
1056  
1057  	return srq;
1058  }
1059  EXPORT_SYMBOL(ib_create_srq_user);
1060  
ib_modify_srq(struct ib_srq * srq,struct ib_srq_attr * srq_attr,enum ib_srq_attr_mask srq_attr_mask)1061  int ib_modify_srq(struct ib_srq *srq,
1062  		  struct ib_srq_attr *srq_attr,
1063  		  enum ib_srq_attr_mask srq_attr_mask)
1064  {
1065  	return srq->device->ops.modify_srq ?
1066  		srq->device->ops.modify_srq(srq, srq_attr, srq_attr_mask,
1067  					    NULL) : -EOPNOTSUPP;
1068  }
1069  EXPORT_SYMBOL(ib_modify_srq);
1070  
ib_query_srq(struct ib_srq * srq,struct ib_srq_attr * srq_attr)1071  int ib_query_srq(struct ib_srq *srq,
1072  		 struct ib_srq_attr *srq_attr)
1073  {
1074  	return srq->device->ops.query_srq ?
1075  		srq->device->ops.query_srq(srq, srq_attr) : -EOPNOTSUPP;
1076  }
1077  EXPORT_SYMBOL(ib_query_srq);
1078  
ib_destroy_srq_user(struct ib_srq * srq,struct ib_udata * udata)1079  int ib_destroy_srq_user(struct ib_srq *srq, struct ib_udata *udata)
1080  {
1081  	int ret;
1082  
1083  	if (atomic_read(&srq->usecnt))
1084  		return -EBUSY;
1085  
1086  	ret = srq->device->ops.destroy_srq(srq, udata);
1087  	if (ret)
1088  		return ret;
1089  
1090  	atomic_dec(&srq->pd->usecnt);
1091  	if (srq->srq_type == IB_SRQT_XRC && srq->ext.xrc.xrcd)
1092  		atomic_dec(&srq->ext.xrc.xrcd->usecnt);
1093  	if (ib_srq_has_cq(srq->srq_type))
1094  		atomic_dec(&srq->ext.cq->usecnt);
1095  	rdma_restrack_del(&srq->res);
1096  	kfree(srq);
1097  
1098  	return ret;
1099  }
1100  EXPORT_SYMBOL(ib_destroy_srq_user);
1101  
1102  /* Queue pairs */
1103  
__ib_qp_event_handler(struct ib_event * event,void * context)1104  static void __ib_qp_event_handler(struct ib_event *event, void *context)
1105  {
1106  	struct ib_qp *qp = event->element.qp;
1107  
1108  	if (event->event == IB_EVENT_QP_LAST_WQE_REACHED)
1109  		complete(&qp->srq_completion);
1110  	if (qp->registered_event_handler)
1111  		qp->registered_event_handler(event, qp->qp_context);
1112  }
1113  
__ib_shared_qp_event_handler(struct ib_event * event,void * context)1114  static void __ib_shared_qp_event_handler(struct ib_event *event, void *context)
1115  {
1116  	struct ib_qp *qp = context;
1117  	unsigned long flags;
1118  
1119  	spin_lock_irqsave(&qp->device->qp_open_list_lock, flags);
1120  	list_for_each_entry(event->element.qp, &qp->open_list, open_list)
1121  		if (event->element.qp->event_handler)
1122  			event->element.qp->event_handler(event, event->element.qp->qp_context);
1123  	spin_unlock_irqrestore(&qp->device->qp_open_list_lock, flags);
1124  }
1125  
__ib_open_qp(struct ib_qp * real_qp,void (* event_handler)(struct ib_event *,void *),void * qp_context)1126  static struct ib_qp *__ib_open_qp(struct ib_qp *real_qp,
1127  				  void (*event_handler)(struct ib_event *, void *),
1128  				  void *qp_context)
1129  {
1130  	struct ib_qp *qp;
1131  	unsigned long flags;
1132  	int err;
1133  
1134  	qp = kzalloc(sizeof *qp, GFP_KERNEL);
1135  	if (!qp)
1136  		return ERR_PTR(-ENOMEM);
1137  
1138  	qp->real_qp = real_qp;
1139  	err = ib_open_shared_qp_security(qp, real_qp->device);
1140  	if (err) {
1141  		kfree(qp);
1142  		return ERR_PTR(err);
1143  	}
1144  
1145  	qp->real_qp = real_qp;
1146  	atomic_inc(&real_qp->usecnt);
1147  	qp->device = real_qp->device;
1148  	qp->event_handler = event_handler;
1149  	qp->qp_context = qp_context;
1150  	qp->qp_num = real_qp->qp_num;
1151  	qp->qp_type = real_qp->qp_type;
1152  
1153  	spin_lock_irqsave(&real_qp->device->qp_open_list_lock, flags);
1154  	list_add(&qp->open_list, &real_qp->open_list);
1155  	spin_unlock_irqrestore(&real_qp->device->qp_open_list_lock, flags);
1156  
1157  	return qp;
1158  }
1159  
ib_open_qp(struct ib_xrcd * xrcd,struct ib_qp_open_attr * qp_open_attr)1160  struct ib_qp *ib_open_qp(struct ib_xrcd *xrcd,
1161  			 struct ib_qp_open_attr *qp_open_attr)
1162  {
1163  	struct ib_qp *qp, *real_qp;
1164  
1165  	if (qp_open_attr->qp_type != IB_QPT_XRC_TGT)
1166  		return ERR_PTR(-EINVAL);
1167  
1168  	down_read(&xrcd->tgt_qps_rwsem);
1169  	real_qp = xa_load(&xrcd->tgt_qps, qp_open_attr->qp_num);
1170  	if (!real_qp) {
1171  		up_read(&xrcd->tgt_qps_rwsem);
1172  		return ERR_PTR(-EINVAL);
1173  	}
1174  	qp = __ib_open_qp(real_qp, qp_open_attr->event_handler,
1175  			  qp_open_attr->qp_context);
1176  	up_read(&xrcd->tgt_qps_rwsem);
1177  	return qp;
1178  }
1179  EXPORT_SYMBOL(ib_open_qp);
1180  
create_xrc_qp_user(struct ib_qp * qp,struct ib_qp_init_attr * qp_init_attr)1181  static struct ib_qp *create_xrc_qp_user(struct ib_qp *qp,
1182  					struct ib_qp_init_attr *qp_init_attr)
1183  {
1184  	struct ib_qp *real_qp = qp;
1185  	int err;
1186  
1187  	qp->event_handler = __ib_shared_qp_event_handler;
1188  	qp->qp_context = qp;
1189  	qp->pd = NULL;
1190  	qp->send_cq = qp->recv_cq = NULL;
1191  	qp->srq = NULL;
1192  	qp->xrcd = qp_init_attr->xrcd;
1193  	atomic_inc(&qp_init_attr->xrcd->usecnt);
1194  	INIT_LIST_HEAD(&qp->open_list);
1195  
1196  	qp = __ib_open_qp(real_qp, qp_init_attr->event_handler,
1197  			  qp_init_attr->qp_context);
1198  	if (IS_ERR(qp))
1199  		return qp;
1200  
1201  	err = xa_err(xa_store(&qp_init_attr->xrcd->tgt_qps, real_qp->qp_num,
1202  			      real_qp, GFP_KERNEL));
1203  	if (err) {
1204  		ib_close_qp(qp);
1205  		return ERR_PTR(err);
1206  	}
1207  	return qp;
1208  }
1209  
create_qp(struct ib_device * dev,struct ib_pd * pd,struct ib_qp_init_attr * attr,struct ib_udata * udata,struct ib_uqp_object * uobj,const char * caller)1210  static struct ib_qp *create_qp(struct ib_device *dev, struct ib_pd *pd,
1211  			       struct ib_qp_init_attr *attr,
1212  			       struct ib_udata *udata,
1213  			       struct ib_uqp_object *uobj, const char *caller)
1214  {
1215  	struct ib_udata dummy = {};
1216  	struct ib_qp *qp;
1217  	int ret;
1218  
1219  	if (!dev->ops.create_qp)
1220  		return ERR_PTR(-EOPNOTSUPP);
1221  
1222  	qp = rdma_zalloc_drv_obj_numa(dev, ib_qp);
1223  	if (!qp)
1224  		return ERR_PTR(-ENOMEM);
1225  
1226  	qp->device = dev;
1227  	qp->pd = pd;
1228  	qp->uobject = uobj;
1229  	qp->real_qp = qp;
1230  
1231  	qp->qp_type = attr->qp_type;
1232  	qp->rwq_ind_tbl = attr->rwq_ind_tbl;
1233  	qp->srq = attr->srq;
1234  	qp->event_handler = __ib_qp_event_handler;
1235  	qp->registered_event_handler = attr->event_handler;
1236  	qp->port = attr->port_num;
1237  	qp->qp_context = attr->qp_context;
1238  
1239  	spin_lock_init(&qp->mr_lock);
1240  	INIT_LIST_HEAD(&qp->rdma_mrs);
1241  	INIT_LIST_HEAD(&qp->sig_mrs);
1242  	init_completion(&qp->srq_completion);
1243  
1244  	qp->send_cq = attr->send_cq;
1245  	qp->recv_cq = attr->recv_cq;
1246  
1247  	rdma_restrack_new(&qp->res, RDMA_RESTRACK_QP);
1248  	WARN_ONCE(!udata && !caller, "Missing kernel QP owner");
1249  	rdma_restrack_set_name(&qp->res, udata ? NULL : caller);
1250  	ret = dev->ops.create_qp(qp, attr, udata);
1251  	if (ret)
1252  		goto err_create;
1253  
1254  	/*
1255  	 * TODO: The mlx4 internally overwrites send_cq and recv_cq.
1256  	 * Unfortunately, it is not an easy task to fix that driver.
1257  	 */
1258  	qp->send_cq = attr->send_cq;
1259  	qp->recv_cq = attr->recv_cq;
1260  
1261  	ret = ib_create_qp_security(qp, dev);
1262  	if (ret)
1263  		goto err_security;
1264  
1265  	rdma_restrack_add(&qp->res);
1266  	return qp;
1267  
1268  err_security:
1269  	qp->device->ops.destroy_qp(qp, udata ? &dummy : NULL);
1270  err_create:
1271  	rdma_restrack_put(&qp->res);
1272  	kfree(qp);
1273  	return ERR_PTR(ret);
1274  
1275  }
1276  
1277  /**
1278   * ib_create_qp_user - Creates a QP associated with the specified protection
1279   *   domain.
1280   * @dev: IB device
1281   * @pd: The protection domain associated with the QP.
1282   * @attr: A list of initial attributes required to create the
1283   *   QP.  If QP creation succeeds, then the attributes are updated to
1284   *   the actual capabilities of the created QP.
1285   * @udata: User data
1286   * @uobj: uverbs obect
1287   * @caller: caller's build-time module name
1288   */
ib_create_qp_user(struct ib_device * dev,struct ib_pd * pd,struct ib_qp_init_attr * attr,struct ib_udata * udata,struct ib_uqp_object * uobj,const char * caller)1289  struct ib_qp *ib_create_qp_user(struct ib_device *dev, struct ib_pd *pd,
1290  				struct ib_qp_init_attr *attr,
1291  				struct ib_udata *udata,
1292  				struct ib_uqp_object *uobj, const char *caller)
1293  {
1294  	struct ib_qp *qp, *xrc_qp;
1295  
1296  	if (attr->qp_type == IB_QPT_XRC_TGT)
1297  		qp = create_qp(dev, pd, attr, NULL, NULL, caller);
1298  	else
1299  		qp = create_qp(dev, pd, attr, udata, uobj, NULL);
1300  	if (attr->qp_type != IB_QPT_XRC_TGT || IS_ERR(qp))
1301  		return qp;
1302  
1303  	xrc_qp = create_xrc_qp_user(qp, attr);
1304  	if (IS_ERR(xrc_qp)) {
1305  		ib_destroy_qp(qp);
1306  		return xrc_qp;
1307  	}
1308  
1309  	xrc_qp->uobject = uobj;
1310  	return xrc_qp;
1311  }
1312  EXPORT_SYMBOL(ib_create_qp_user);
1313  
ib_qp_usecnt_inc(struct ib_qp * qp)1314  void ib_qp_usecnt_inc(struct ib_qp *qp)
1315  {
1316  	if (qp->pd)
1317  		atomic_inc(&qp->pd->usecnt);
1318  	if (qp->send_cq)
1319  		atomic_inc(&qp->send_cq->usecnt);
1320  	if (qp->recv_cq)
1321  		atomic_inc(&qp->recv_cq->usecnt);
1322  	if (qp->srq)
1323  		atomic_inc(&qp->srq->usecnt);
1324  	if (qp->rwq_ind_tbl)
1325  		atomic_inc(&qp->rwq_ind_tbl->usecnt);
1326  }
1327  EXPORT_SYMBOL(ib_qp_usecnt_inc);
1328  
ib_qp_usecnt_dec(struct ib_qp * qp)1329  void ib_qp_usecnt_dec(struct ib_qp *qp)
1330  {
1331  	if (qp->rwq_ind_tbl)
1332  		atomic_dec(&qp->rwq_ind_tbl->usecnt);
1333  	if (qp->srq)
1334  		atomic_dec(&qp->srq->usecnt);
1335  	if (qp->recv_cq)
1336  		atomic_dec(&qp->recv_cq->usecnt);
1337  	if (qp->send_cq)
1338  		atomic_dec(&qp->send_cq->usecnt);
1339  	if (qp->pd)
1340  		atomic_dec(&qp->pd->usecnt);
1341  }
1342  EXPORT_SYMBOL(ib_qp_usecnt_dec);
1343  
ib_create_qp_kernel(struct ib_pd * pd,struct ib_qp_init_attr * qp_init_attr,const char * caller)1344  struct ib_qp *ib_create_qp_kernel(struct ib_pd *pd,
1345  				  struct ib_qp_init_attr *qp_init_attr,
1346  				  const char *caller)
1347  {
1348  	struct ib_device *device = pd->device;
1349  	struct ib_qp *qp;
1350  	int ret;
1351  
1352  	/*
1353  	 * If the callers is using the RDMA API calculate the resources
1354  	 * needed for the RDMA READ/WRITE operations.
1355  	 *
1356  	 * Note that these callers need to pass in a port number.
1357  	 */
1358  	if (qp_init_attr->cap.max_rdma_ctxs)
1359  		rdma_rw_init_qp(device, qp_init_attr);
1360  
1361  	qp = create_qp(device, pd, qp_init_attr, NULL, NULL, caller);
1362  	if (IS_ERR(qp))
1363  		return qp;
1364  
1365  	ib_qp_usecnt_inc(qp);
1366  
1367  	if (qp_init_attr->cap.max_rdma_ctxs) {
1368  		ret = rdma_rw_init_mrs(qp, qp_init_attr);
1369  		if (ret)
1370  			goto err;
1371  	}
1372  
1373  	/*
1374  	 * Note: all hw drivers guarantee that max_send_sge is lower than
1375  	 * the device RDMA WRITE SGE limit but not all hw drivers ensure that
1376  	 * max_send_sge <= max_sge_rd.
1377  	 */
1378  	qp->max_write_sge = qp_init_attr->cap.max_send_sge;
1379  	qp->max_read_sge = min_t(u32, qp_init_attr->cap.max_send_sge,
1380  				 device->attrs.max_sge_rd);
1381  	if (qp_init_attr->create_flags & IB_QP_CREATE_INTEGRITY_EN)
1382  		qp->integrity_en = true;
1383  
1384  	return qp;
1385  
1386  err:
1387  	ib_destroy_qp(qp);
1388  	return ERR_PTR(ret);
1389  
1390  }
1391  EXPORT_SYMBOL(ib_create_qp_kernel);
1392  
1393  static const struct {
1394  	int			valid;
1395  	enum ib_qp_attr_mask	req_param[IB_QPT_MAX];
1396  	enum ib_qp_attr_mask	opt_param[IB_QPT_MAX];
1397  } qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
1398  	[IB_QPS_RESET] = {
1399  		[IB_QPS_RESET] = { .valid = 1 },
1400  		[IB_QPS_INIT]  = {
1401  			.valid = 1,
1402  			.req_param = {
1403  				[IB_QPT_UD]  = (IB_QP_PKEY_INDEX		|
1404  						IB_QP_PORT			|
1405  						IB_QP_QKEY),
1406  				[IB_QPT_RAW_PACKET] = IB_QP_PORT,
1407  				[IB_QPT_UC]  = (IB_QP_PKEY_INDEX		|
1408  						IB_QP_PORT			|
1409  						IB_QP_ACCESS_FLAGS),
1410  				[IB_QPT_RC]  = (IB_QP_PKEY_INDEX		|
1411  						IB_QP_PORT			|
1412  						IB_QP_ACCESS_FLAGS),
1413  				[IB_QPT_XRC_INI] = (IB_QP_PKEY_INDEX		|
1414  						IB_QP_PORT			|
1415  						IB_QP_ACCESS_FLAGS),
1416  				[IB_QPT_XRC_TGT] = (IB_QP_PKEY_INDEX		|
1417  						IB_QP_PORT			|
1418  						IB_QP_ACCESS_FLAGS),
1419  				[IB_QPT_SMI] = (IB_QP_PKEY_INDEX		|
1420  						IB_QP_QKEY),
1421  				[IB_QPT_GSI] = (IB_QP_PKEY_INDEX		|
1422  						IB_QP_QKEY),
1423  			}
1424  		},
1425  	},
1426  	[IB_QPS_INIT]  = {
1427  		[IB_QPS_RESET] = { .valid = 1 },
1428  		[IB_QPS_ERR] =   { .valid = 1 },
1429  		[IB_QPS_INIT]  = {
1430  			.valid = 1,
1431  			.opt_param = {
1432  				[IB_QPT_UD]  = (IB_QP_PKEY_INDEX		|
1433  						IB_QP_PORT			|
1434  						IB_QP_QKEY),
1435  				[IB_QPT_UC]  = (IB_QP_PKEY_INDEX		|
1436  						IB_QP_PORT			|
1437  						IB_QP_ACCESS_FLAGS),
1438  				[IB_QPT_RC]  = (IB_QP_PKEY_INDEX		|
1439  						IB_QP_PORT			|
1440  						IB_QP_ACCESS_FLAGS),
1441  				[IB_QPT_XRC_INI] = (IB_QP_PKEY_INDEX		|
1442  						IB_QP_PORT			|
1443  						IB_QP_ACCESS_FLAGS),
1444  				[IB_QPT_XRC_TGT] = (IB_QP_PKEY_INDEX		|
1445  						IB_QP_PORT			|
1446  						IB_QP_ACCESS_FLAGS),
1447  				[IB_QPT_SMI] = (IB_QP_PKEY_INDEX		|
1448  						IB_QP_QKEY),
1449  				[IB_QPT_GSI] = (IB_QP_PKEY_INDEX		|
1450  						IB_QP_QKEY),
1451  			}
1452  		},
1453  		[IB_QPS_RTR]   = {
1454  			.valid = 1,
1455  			.req_param = {
1456  				[IB_QPT_UC]  = (IB_QP_AV			|
1457  						IB_QP_PATH_MTU			|
1458  						IB_QP_DEST_QPN			|
1459  						IB_QP_RQ_PSN),
1460  				[IB_QPT_RC]  = (IB_QP_AV			|
1461  						IB_QP_PATH_MTU			|
1462  						IB_QP_DEST_QPN			|
1463  						IB_QP_RQ_PSN			|
1464  						IB_QP_MAX_DEST_RD_ATOMIC	|
1465  						IB_QP_MIN_RNR_TIMER),
1466  				[IB_QPT_XRC_INI] = (IB_QP_AV			|
1467  						IB_QP_PATH_MTU			|
1468  						IB_QP_DEST_QPN			|
1469  						IB_QP_RQ_PSN),
1470  				[IB_QPT_XRC_TGT] = (IB_QP_AV			|
1471  						IB_QP_PATH_MTU			|
1472  						IB_QP_DEST_QPN			|
1473  						IB_QP_RQ_PSN			|
1474  						IB_QP_MAX_DEST_RD_ATOMIC	|
1475  						IB_QP_MIN_RNR_TIMER),
1476  			},
1477  			.opt_param = {
1478  				 [IB_QPT_UD]  = (IB_QP_PKEY_INDEX		|
1479  						 IB_QP_QKEY),
1480  				 [IB_QPT_UC]  = (IB_QP_ALT_PATH			|
1481  						 IB_QP_ACCESS_FLAGS		|
1482  						 IB_QP_PKEY_INDEX),
1483  				 [IB_QPT_RC]  = (IB_QP_ALT_PATH			|
1484  						 IB_QP_ACCESS_FLAGS		|
1485  						 IB_QP_PKEY_INDEX),
1486  				 [IB_QPT_XRC_INI] = (IB_QP_ALT_PATH		|
1487  						 IB_QP_ACCESS_FLAGS		|
1488  						 IB_QP_PKEY_INDEX),
1489  				 [IB_QPT_XRC_TGT] = (IB_QP_ALT_PATH		|
1490  						 IB_QP_ACCESS_FLAGS		|
1491  						 IB_QP_PKEY_INDEX),
1492  				 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX		|
1493  						 IB_QP_QKEY),
1494  				 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX		|
1495  						 IB_QP_QKEY),
1496  			 },
1497  		},
1498  	},
1499  	[IB_QPS_RTR]   = {
1500  		[IB_QPS_RESET] = { .valid = 1 },
1501  		[IB_QPS_ERR] =   { .valid = 1 },
1502  		[IB_QPS_RTS]   = {
1503  			.valid = 1,
1504  			.req_param = {
1505  				[IB_QPT_UD]  = IB_QP_SQ_PSN,
1506  				[IB_QPT_UC]  = IB_QP_SQ_PSN,
1507  				[IB_QPT_RC]  = (IB_QP_TIMEOUT			|
1508  						IB_QP_RETRY_CNT			|
1509  						IB_QP_RNR_RETRY			|
1510  						IB_QP_SQ_PSN			|
1511  						IB_QP_MAX_QP_RD_ATOMIC),
1512  				[IB_QPT_XRC_INI] = (IB_QP_TIMEOUT		|
1513  						IB_QP_RETRY_CNT			|
1514  						IB_QP_RNR_RETRY			|
1515  						IB_QP_SQ_PSN			|
1516  						IB_QP_MAX_QP_RD_ATOMIC),
1517  				[IB_QPT_XRC_TGT] = (IB_QP_TIMEOUT		|
1518  						IB_QP_SQ_PSN),
1519  				[IB_QPT_SMI] = IB_QP_SQ_PSN,
1520  				[IB_QPT_GSI] = IB_QP_SQ_PSN,
1521  			},
1522  			.opt_param = {
1523  				 [IB_QPT_UD]  = (IB_QP_CUR_STATE		|
1524  						 IB_QP_QKEY),
1525  				 [IB_QPT_UC]  = (IB_QP_CUR_STATE		|
1526  						 IB_QP_ALT_PATH			|
1527  						 IB_QP_ACCESS_FLAGS		|
1528  						 IB_QP_PATH_MIG_STATE),
1529  				 [IB_QPT_RC]  = (IB_QP_CUR_STATE		|
1530  						 IB_QP_ALT_PATH			|
1531  						 IB_QP_ACCESS_FLAGS		|
1532  						 IB_QP_MIN_RNR_TIMER		|
1533  						 IB_QP_PATH_MIG_STATE),
1534  				 [IB_QPT_XRC_INI] = (IB_QP_CUR_STATE		|
1535  						 IB_QP_ALT_PATH			|
1536  						 IB_QP_ACCESS_FLAGS		|
1537  						 IB_QP_PATH_MIG_STATE),
1538  				 [IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE		|
1539  						 IB_QP_ALT_PATH			|
1540  						 IB_QP_ACCESS_FLAGS		|
1541  						 IB_QP_MIN_RNR_TIMER		|
1542  						 IB_QP_PATH_MIG_STATE),
1543  				 [IB_QPT_SMI] = (IB_QP_CUR_STATE		|
1544  						 IB_QP_QKEY),
1545  				 [IB_QPT_GSI] = (IB_QP_CUR_STATE		|
1546  						 IB_QP_QKEY),
1547  				 [IB_QPT_RAW_PACKET] = IB_QP_RATE_LIMIT,
1548  			 }
1549  		}
1550  	},
1551  	[IB_QPS_RTS]   = {
1552  		[IB_QPS_RESET] = { .valid = 1 },
1553  		[IB_QPS_ERR] =   { .valid = 1 },
1554  		[IB_QPS_RTS]   = {
1555  			.valid = 1,
1556  			.opt_param = {
1557  				[IB_QPT_UD]  = (IB_QP_CUR_STATE			|
1558  						IB_QP_QKEY),
1559  				[IB_QPT_UC]  = (IB_QP_CUR_STATE			|
1560  						IB_QP_ACCESS_FLAGS		|
1561  						IB_QP_ALT_PATH			|
1562  						IB_QP_PATH_MIG_STATE),
1563  				[IB_QPT_RC]  = (IB_QP_CUR_STATE			|
1564  						IB_QP_ACCESS_FLAGS		|
1565  						IB_QP_ALT_PATH			|
1566  						IB_QP_PATH_MIG_STATE		|
1567  						IB_QP_MIN_RNR_TIMER),
1568  				[IB_QPT_XRC_INI] = (IB_QP_CUR_STATE		|
1569  						IB_QP_ACCESS_FLAGS		|
1570  						IB_QP_ALT_PATH			|
1571  						IB_QP_PATH_MIG_STATE),
1572  				[IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE		|
1573  						IB_QP_ACCESS_FLAGS		|
1574  						IB_QP_ALT_PATH			|
1575  						IB_QP_PATH_MIG_STATE		|
1576  						IB_QP_MIN_RNR_TIMER),
1577  				[IB_QPT_SMI] = (IB_QP_CUR_STATE			|
1578  						IB_QP_QKEY),
1579  				[IB_QPT_GSI] = (IB_QP_CUR_STATE			|
1580  						IB_QP_QKEY),
1581  				[IB_QPT_RAW_PACKET] = IB_QP_RATE_LIMIT,
1582  			}
1583  		},
1584  		[IB_QPS_SQD]   = {
1585  			.valid = 1,
1586  			.opt_param = {
1587  				[IB_QPT_UD]  = IB_QP_EN_SQD_ASYNC_NOTIFY,
1588  				[IB_QPT_UC]  = IB_QP_EN_SQD_ASYNC_NOTIFY,
1589  				[IB_QPT_RC]  = IB_QP_EN_SQD_ASYNC_NOTIFY,
1590  				[IB_QPT_XRC_INI] = IB_QP_EN_SQD_ASYNC_NOTIFY,
1591  				[IB_QPT_XRC_TGT] = IB_QP_EN_SQD_ASYNC_NOTIFY, /* ??? */
1592  				[IB_QPT_SMI] = IB_QP_EN_SQD_ASYNC_NOTIFY,
1593  				[IB_QPT_GSI] = IB_QP_EN_SQD_ASYNC_NOTIFY
1594  			}
1595  		},
1596  	},
1597  	[IB_QPS_SQD]   = {
1598  		[IB_QPS_RESET] = { .valid = 1 },
1599  		[IB_QPS_ERR] =   { .valid = 1 },
1600  		[IB_QPS_RTS]   = {
1601  			.valid = 1,
1602  			.opt_param = {
1603  				[IB_QPT_UD]  = (IB_QP_CUR_STATE			|
1604  						IB_QP_QKEY),
1605  				[IB_QPT_UC]  = (IB_QP_CUR_STATE			|
1606  						IB_QP_ALT_PATH			|
1607  						IB_QP_ACCESS_FLAGS		|
1608  						IB_QP_PATH_MIG_STATE),
1609  				[IB_QPT_RC]  = (IB_QP_CUR_STATE			|
1610  						IB_QP_ALT_PATH			|
1611  						IB_QP_ACCESS_FLAGS		|
1612  						IB_QP_MIN_RNR_TIMER		|
1613  						IB_QP_PATH_MIG_STATE),
1614  				[IB_QPT_XRC_INI] = (IB_QP_CUR_STATE		|
1615  						IB_QP_ALT_PATH			|
1616  						IB_QP_ACCESS_FLAGS		|
1617  						IB_QP_PATH_MIG_STATE),
1618  				[IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE		|
1619  						IB_QP_ALT_PATH			|
1620  						IB_QP_ACCESS_FLAGS		|
1621  						IB_QP_MIN_RNR_TIMER		|
1622  						IB_QP_PATH_MIG_STATE),
1623  				[IB_QPT_SMI] = (IB_QP_CUR_STATE			|
1624  						IB_QP_QKEY),
1625  				[IB_QPT_GSI] = (IB_QP_CUR_STATE			|
1626  						IB_QP_QKEY),
1627  			}
1628  		},
1629  		[IB_QPS_SQD]   = {
1630  			.valid = 1,
1631  			.opt_param = {
1632  				[IB_QPT_UD]  = (IB_QP_PKEY_INDEX		|
1633  						IB_QP_QKEY),
1634  				[IB_QPT_UC]  = (IB_QP_AV			|
1635  						IB_QP_ALT_PATH			|
1636  						IB_QP_ACCESS_FLAGS		|
1637  						IB_QP_PKEY_INDEX		|
1638  						IB_QP_PATH_MIG_STATE),
1639  				[IB_QPT_RC]  = (IB_QP_PORT			|
1640  						IB_QP_AV			|
1641  						IB_QP_TIMEOUT			|
1642  						IB_QP_RETRY_CNT			|
1643  						IB_QP_RNR_RETRY			|
1644  						IB_QP_MAX_QP_RD_ATOMIC		|
1645  						IB_QP_MAX_DEST_RD_ATOMIC	|
1646  						IB_QP_ALT_PATH			|
1647  						IB_QP_ACCESS_FLAGS		|
1648  						IB_QP_PKEY_INDEX		|
1649  						IB_QP_MIN_RNR_TIMER		|
1650  						IB_QP_PATH_MIG_STATE),
1651  				[IB_QPT_XRC_INI] = (IB_QP_PORT			|
1652  						IB_QP_AV			|
1653  						IB_QP_TIMEOUT			|
1654  						IB_QP_RETRY_CNT			|
1655  						IB_QP_RNR_RETRY			|
1656  						IB_QP_MAX_QP_RD_ATOMIC		|
1657  						IB_QP_ALT_PATH			|
1658  						IB_QP_ACCESS_FLAGS		|
1659  						IB_QP_PKEY_INDEX		|
1660  						IB_QP_PATH_MIG_STATE),
1661  				[IB_QPT_XRC_TGT] = (IB_QP_PORT			|
1662  						IB_QP_AV			|
1663  						IB_QP_TIMEOUT			|
1664  						IB_QP_MAX_DEST_RD_ATOMIC	|
1665  						IB_QP_ALT_PATH			|
1666  						IB_QP_ACCESS_FLAGS		|
1667  						IB_QP_PKEY_INDEX		|
1668  						IB_QP_MIN_RNR_TIMER		|
1669  						IB_QP_PATH_MIG_STATE),
1670  				[IB_QPT_SMI] = (IB_QP_PKEY_INDEX		|
1671  						IB_QP_QKEY),
1672  				[IB_QPT_GSI] = (IB_QP_PKEY_INDEX		|
1673  						IB_QP_QKEY),
1674  			}
1675  		}
1676  	},
1677  	[IB_QPS_SQE]   = {
1678  		[IB_QPS_RESET] = { .valid = 1 },
1679  		[IB_QPS_ERR] =   { .valid = 1 },
1680  		[IB_QPS_RTS]   = {
1681  			.valid = 1,
1682  			.opt_param = {
1683  				[IB_QPT_UD]  = (IB_QP_CUR_STATE			|
1684  						IB_QP_QKEY),
1685  				[IB_QPT_UC]  = (IB_QP_CUR_STATE			|
1686  						IB_QP_ACCESS_FLAGS),
1687  				[IB_QPT_SMI] = (IB_QP_CUR_STATE			|
1688  						IB_QP_QKEY),
1689  				[IB_QPT_GSI] = (IB_QP_CUR_STATE			|
1690  						IB_QP_QKEY),
1691  			}
1692  		}
1693  	},
1694  	[IB_QPS_ERR] = {
1695  		[IB_QPS_RESET] = { .valid = 1 },
1696  		[IB_QPS_ERR] =   { .valid = 1 }
1697  	}
1698  };
1699  
ib_modify_qp_is_ok(enum ib_qp_state cur_state,enum ib_qp_state next_state,enum ib_qp_type type,enum ib_qp_attr_mask mask)1700  bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
1701  			enum ib_qp_type type, enum ib_qp_attr_mask mask)
1702  {
1703  	enum ib_qp_attr_mask req_param, opt_param;
1704  
1705  	if (mask & IB_QP_CUR_STATE  &&
1706  	    cur_state != IB_QPS_RTR && cur_state != IB_QPS_RTS &&
1707  	    cur_state != IB_QPS_SQD && cur_state != IB_QPS_SQE)
1708  		return false;
1709  
1710  	if (!qp_state_table[cur_state][next_state].valid)
1711  		return false;
1712  
1713  	req_param = qp_state_table[cur_state][next_state].req_param[type];
1714  	opt_param = qp_state_table[cur_state][next_state].opt_param[type];
1715  
1716  	if ((mask & req_param) != req_param)
1717  		return false;
1718  
1719  	if (mask & ~(req_param | opt_param | IB_QP_STATE))
1720  		return false;
1721  
1722  	return true;
1723  }
1724  EXPORT_SYMBOL(ib_modify_qp_is_ok);
1725  
1726  /**
1727   * ib_resolve_eth_dmac - Resolve destination mac address
1728   * @device:		Device to consider
1729   * @ah_attr:		address handle attribute which describes the
1730   *			source and destination parameters
1731   * ib_resolve_eth_dmac() resolves destination mac address and L3 hop limit It
1732   * returns 0 on success or appropriate error code. It initializes the
1733   * necessary ah_attr fields when call is successful.
1734   */
ib_resolve_eth_dmac(struct ib_device * device,struct rdma_ah_attr * ah_attr)1735  static int ib_resolve_eth_dmac(struct ib_device *device,
1736  			       struct rdma_ah_attr *ah_attr)
1737  {
1738  	int ret = 0;
1739  
1740  	if (rdma_is_multicast_addr((struct in6_addr *)ah_attr->grh.dgid.raw)) {
1741  		if (ipv6_addr_v4mapped((struct in6_addr *)ah_attr->grh.dgid.raw)) {
1742  			__be32 addr = 0;
1743  
1744  			memcpy(&addr, ah_attr->grh.dgid.raw + 12, 4);
1745  			ip_eth_mc_map(addr, (char *)ah_attr->roce.dmac);
1746  		} else {
1747  			ipv6_eth_mc_map((struct in6_addr *)ah_attr->grh.dgid.raw,
1748  					(char *)ah_attr->roce.dmac);
1749  		}
1750  	} else {
1751  		ret = ib_resolve_unicast_gid_dmac(device, ah_attr);
1752  	}
1753  	return ret;
1754  }
1755  
is_qp_type_connected(const struct ib_qp * qp)1756  static bool is_qp_type_connected(const struct ib_qp *qp)
1757  {
1758  	return (qp->qp_type == IB_QPT_UC ||
1759  		qp->qp_type == IB_QPT_RC ||
1760  		qp->qp_type == IB_QPT_XRC_INI ||
1761  		qp->qp_type == IB_QPT_XRC_TGT);
1762  }
1763  
1764  /*
1765   * IB core internal function to perform QP attributes modification.
1766   */
_ib_modify_qp(struct ib_qp * qp,struct ib_qp_attr * attr,int attr_mask,struct ib_udata * udata)1767  static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
1768  			 int attr_mask, struct ib_udata *udata)
1769  {
1770  	u32 port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
1771  	const struct ib_gid_attr *old_sgid_attr_av;
1772  	const struct ib_gid_attr *old_sgid_attr_alt_av;
1773  	int ret;
1774  
1775  	attr->xmit_slave = NULL;
1776  	if (attr_mask & IB_QP_AV) {
1777  		ret = rdma_fill_sgid_attr(qp->device, &attr->ah_attr,
1778  					  &old_sgid_attr_av);
1779  		if (ret)
1780  			return ret;
1781  
1782  		if (attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE &&
1783  		    is_qp_type_connected(qp)) {
1784  			struct net_device *slave;
1785  
1786  			/*
1787  			 * If the user provided the qp_attr then we have to
1788  			 * resolve it. Kerne users have to provide already
1789  			 * resolved rdma_ah_attr's.
1790  			 */
1791  			if (udata) {
1792  				ret = ib_resolve_eth_dmac(qp->device,
1793  							  &attr->ah_attr);
1794  				if (ret)
1795  					goto out_av;
1796  			}
1797  			slave = rdma_lag_get_ah_roce_slave(qp->device,
1798  							   &attr->ah_attr,
1799  							   GFP_KERNEL);
1800  			if (IS_ERR(slave)) {
1801  				ret = PTR_ERR(slave);
1802  				goto out_av;
1803  			}
1804  			attr->xmit_slave = slave;
1805  		}
1806  	}
1807  	if (attr_mask & IB_QP_ALT_PATH) {
1808  		/*
1809  		 * FIXME: This does not track the migration state, so if the
1810  		 * user loads a new alternate path after the HW has migrated
1811  		 * from primary->alternate we will keep the wrong
1812  		 * references. This is OK for IB because the reference
1813  		 * counting does not serve any functional purpose.
1814  		 */
1815  		ret = rdma_fill_sgid_attr(qp->device, &attr->alt_ah_attr,
1816  					  &old_sgid_attr_alt_av);
1817  		if (ret)
1818  			goto out_av;
1819  
1820  		/*
1821  		 * Today the core code can only handle alternate paths and APM
1822  		 * for IB. Ban them in roce mode.
1823  		 */
1824  		if (!(rdma_protocol_ib(qp->device,
1825  				       attr->alt_ah_attr.port_num) &&
1826  		      rdma_protocol_ib(qp->device, port))) {
1827  			ret = -EINVAL;
1828  			goto out;
1829  		}
1830  	}
1831  
1832  	if (rdma_ib_or_roce(qp->device, port)) {
1833  		if (attr_mask & IB_QP_RQ_PSN && attr->rq_psn & ~0xffffff) {
1834  			dev_warn(&qp->device->dev,
1835  				 "%s rq_psn overflow, masking to 24 bits\n",
1836  				 __func__);
1837  			attr->rq_psn &= 0xffffff;
1838  		}
1839  
1840  		if (attr_mask & IB_QP_SQ_PSN && attr->sq_psn & ~0xffffff) {
1841  			dev_warn(&qp->device->dev,
1842  				 " %s sq_psn overflow, masking to 24 bits\n",
1843  				 __func__);
1844  			attr->sq_psn &= 0xffffff;
1845  		}
1846  	}
1847  
1848  	/*
1849  	 * Bind this qp to a counter automatically based on the rdma counter
1850  	 * rules. This only set in RST2INIT with port specified
1851  	 */
1852  	if (!qp->counter && (attr_mask & IB_QP_PORT) &&
1853  	    ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_INIT))
1854  		rdma_counter_bind_qp_auto(qp, attr->port_num);
1855  
1856  	ret = ib_security_modify_qp(qp, attr, attr_mask, udata);
1857  	if (ret)
1858  		goto out;
1859  
1860  	if (attr_mask & IB_QP_PORT)
1861  		qp->port = attr->port_num;
1862  	if (attr_mask & IB_QP_AV)
1863  		qp->av_sgid_attr =
1864  			rdma_update_sgid_attr(&attr->ah_attr, qp->av_sgid_attr);
1865  	if (attr_mask & IB_QP_ALT_PATH)
1866  		qp->alt_path_sgid_attr = rdma_update_sgid_attr(
1867  			&attr->alt_ah_attr, qp->alt_path_sgid_attr);
1868  
1869  out:
1870  	if (attr_mask & IB_QP_ALT_PATH)
1871  		rdma_unfill_sgid_attr(&attr->alt_ah_attr, old_sgid_attr_alt_av);
1872  out_av:
1873  	if (attr_mask & IB_QP_AV) {
1874  		rdma_lag_put_ah_roce_slave(attr->xmit_slave);
1875  		rdma_unfill_sgid_attr(&attr->ah_attr, old_sgid_attr_av);
1876  	}
1877  	return ret;
1878  }
1879  
1880  /**
1881   * ib_modify_qp_with_udata - Modifies the attributes for the specified QP.
1882   * @ib_qp: The QP to modify.
1883   * @attr: On input, specifies the QP attributes to modify.  On output,
1884   *   the current values of selected QP attributes are returned.
1885   * @attr_mask: A bit-mask used to specify which attributes of the QP
1886   *   are being modified.
1887   * @udata: pointer to user's input output buffer information
1888   *   are being modified.
1889   * It returns 0 on success and returns appropriate error code on error.
1890   */
ib_modify_qp_with_udata(struct ib_qp * ib_qp,struct ib_qp_attr * attr,int attr_mask,struct ib_udata * udata)1891  int ib_modify_qp_with_udata(struct ib_qp *ib_qp, struct ib_qp_attr *attr,
1892  			    int attr_mask, struct ib_udata *udata)
1893  {
1894  	return _ib_modify_qp(ib_qp->real_qp, attr, attr_mask, udata);
1895  }
1896  EXPORT_SYMBOL(ib_modify_qp_with_udata);
1897  
ib_get_width_and_speed(u32 netdev_speed,u32 lanes,u16 * speed,u8 * width)1898  static void ib_get_width_and_speed(u32 netdev_speed, u32 lanes,
1899  				   u16 *speed, u8 *width)
1900  {
1901  	if (!lanes) {
1902  		if (netdev_speed <= SPEED_1000) {
1903  			*width = IB_WIDTH_1X;
1904  			*speed = IB_SPEED_SDR;
1905  		} else if (netdev_speed <= SPEED_10000) {
1906  			*width = IB_WIDTH_1X;
1907  			*speed = IB_SPEED_FDR10;
1908  		} else if (netdev_speed <= SPEED_20000) {
1909  			*width = IB_WIDTH_4X;
1910  			*speed = IB_SPEED_DDR;
1911  		} else if (netdev_speed <= SPEED_25000) {
1912  			*width = IB_WIDTH_1X;
1913  			*speed = IB_SPEED_EDR;
1914  		} else if (netdev_speed <= SPEED_40000) {
1915  			*width = IB_WIDTH_4X;
1916  			*speed = IB_SPEED_FDR10;
1917  		} else if (netdev_speed <= SPEED_50000) {
1918  			*width = IB_WIDTH_2X;
1919  			*speed = IB_SPEED_EDR;
1920  		} else if (netdev_speed <= SPEED_100000) {
1921  			*width = IB_WIDTH_4X;
1922  			*speed = IB_SPEED_EDR;
1923  		} else if (netdev_speed <= SPEED_200000) {
1924  			*width = IB_WIDTH_4X;
1925  			*speed = IB_SPEED_HDR;
1926  		} else {
1927  			*width = IB_WIDTH_4X;
1928  			*speed = IB_SPEED_NDR;
1929  		}
1930  
1931  		return;
1932  	}
1933  
1934  	switch (lanes) {
1935  	case 1:
1936  		*width = IB_WIDTH_1X;
1937  		break;
1938  	case 2:
1939  		*width = IB_WIDTH_2X;
1940  		break;
1941  	case 4:
1942  		*width = IB_WIDTH_4X;
1943  		break;
1944  	case 8:
1945  		*width = IB_WIDTH_8X;
1946  		break;
1947  	case 12:
1948  		*width = IB_WIDTH_12X;
1949  		break;
1950  	default:
1951  		*width = IB_WIDTH_1X;
1952  	}
1953  
1954  	switch (netdev_speed / lanes) {
1955  	case SPEED_2500:
1956  		*speed = IB_SPEED_SDR;
1957  		break;
1958  	case SPEED_5000:
1959  		*speed = IB_SPEED_DDR;
1960  		break;
1961  	case SPEED_10000:
1962  		*speed = IB_SPEED_FDR10;
1963  		break;
1964  	case SPEED_14000:
1965  		*speed = IB_SPEED_FDR;
1966  		break;
1967  	case SPEED_25000:
1968  		*speed = IB_SPEED_EDR;
1969  		break;
1970  	case SPEED_50000:
1971  		*speed = IB_SPEED_HDR;
1972  		break;
1973  	case SPEED_100000:
1974  		*speed = IB_SPEED_NDR;
1975  		break;
1976  	default:
1977  		*speed = IB_SPEED_SDR;
1978  	}
1979  }
1980  
ib_get_eth_speed(struct ib_device * dev,u32 port_num,u16 * speed,u8 * width)1981  int ib_get_eth_speed(struct ib_device *dev, u32 port_num, u16 *speed, u8 *width)
1982  {
1983  	int rc;
1984  	u32 netdev_speed;
1985  	struct net_device *netdev;
1986  	struct ethtool_link_ksettings lksettings = {};
1987  
1988  	if (rdma_port_get_link_layer(dev, port_num) != IB_LINK_LAYER_ETHERNET)
1989  		return -EINVAL;
1990  
1991  	netdev = ib_device_get_netdev(dev, port_num);
1992  	if (!netdev)
1993  		return -ENODEV;
1994  
1995  	rtnl_lock();
1996  	rc = __ethtool_get_link_ksettings(netdev, &lksettings);
1997  	rtnl_unlock();
1998  
1999  	dev_put(netdev);
2000  
2001  	if (!rc && lksettings.base.speed != (u32)SPEED_UNKNOWN) {
2002  		netdev_speed = lksettings.base.speed;
2003  	} else {
2004  		netdev_speed = SPEED_1000;
2005  		if (rc)
2006  			pr_warn("%s speed is unknown, defaulting to %u\n",
2007  				netdev->name, netdev_speed);
2008  	}
2009  
2010  	ib_get_width_and_speed(netdev_speed, lksettings.lanes,
2011  			       speed, width);
2012  
2013  	return 0;
2014  }
2015  EXPORT_SYMBOL(ib_get_eth_speed);
2016  
ib_modify_qp(struct ib_qp * qp,struct ib_qp_attr * qp_attr,int qp_attr_mask)2017  int ib_modify_qp(struct ib_qp *qp,
2018  		 struct ib_qp_attr *qp_attr,
2019  		 int qp_attr_mask)
2020  {
2021  	return _ib_modify_qp(qp->real_qp, qp_attr, qp_attr_mask, NULL);
2022  }
2023  EXPORT_SYMBOL(ib_modify_qp);
2024  
ib_query_qp(struct ib_qp * qp,struct ib_qp_attr * qp_attr,int qp_attr_mask,struct ib_qp_init_attr * qp_init_attr)2025  int ib_query_qp(struct ib_qp *qp,
2026  		struct ib_qp_attr *qp_attr,
2027  		int qp_attr_mask,
2028  		struct ib_qp_init_attr *qp_init_attr)
2029  {
2030  	qp_attr->ah_attr.grh.sgid_attr = NULL;
2031  	qp_attr->alt_ah_attr.grh.sgid_attr = NULL;
2032  
2033  	return qp->device->ops.query_qp ?
2034  		qp->device->ops.query_qp(qp->real_qp, qp_attr, qp_attr_mask,
2035  					 qp_init_attr) : -EOPNOTSUPP;
2036  }
2037  EXPORT_SYMBOL(ib_query_qp);
2038  
ib_close_qp(struct ib_qp * qp)2039  int ib_close_qp(struct ib_qp *qp)
2040  {
2041  	struct ib_qp *real_qp;
2042  	unsigned long flags;
2043  
2044  	real_qp = qp->real_qp;
2045  	if (real_qp == qp)
2046  		return -EINVAL;
2047  
2048  	spin_lock_irqsave(&real_qp->device->qp_open_list_lock, flags);
2049  	list_del(&qp->open_list);
2050  	spin_unlock_irqrestore(&real_qp->device->qp_open_list_lock, flags);
2051  
2052  	atomic_dec(&real_qp->usecnt);
2053  	if (qp->qp_sec)
2054  		ib_close_shared_qp_security(qp->qp_sec);
2055  	kfree(qp);
2056  
2057  	return 0;
2058  }
2059  EXPORT_SYMBOL(ib_close_qp);
2060  
__ib_destroy_shared_qp(struct ib_qp * qp)2061  static int __ib_destroy_shared_qp(struct ib_qp *qp)
2062  {
2063  	struct ib_xrcd *xrcd;
2064  	struct ib_qp *real_qp;
2065  	int ret;
2066  
2067  	real_qp = qp->real_qp;
2068  	xrcd = real_qp->xrcd;
2069  	down_write(&xrcd->tgt_qps_rwsem);
2070  	ib_close_qp(qp);
2071  	if (atomic_read(&real_qp->usecnt) == 0)
2072  		xa_erase(&xrcd->tgt_qps, real_qp->qp_num);
2073  	else
2074  		real_qp = NULL;
2075  	up_write(&xrcd->tgt_qps_rwsem);
2076  
2077  	if (real_qp) {
2078  		ret = ib_destroy_qp(real_qp);
2079  		if (!ret)
2080  			atomic_dec(&xrcd->usecnt);
2081  	}
2082  
2083  	return 0;
2084  }
2085  
ib_destroy_qp_user(struct ib_qp * qp,struct ib_udata * udata)2086  int ib_destroy_qp_user(struct ib_qp *qp, struct ib_udata *udata)
2087  {
2088  	const struct ib_gid_attr *alt_path_sgid_attr = qp->alt_path_sgid_attr;
2089  	const struct ib_gid_attr *av_sgid_attr = qp->av_sgid_attr;
2090  	struct ib_qp_security *sec;
2091  	int ret;
2092  
2093  	WARN_ON_ONCE(qp->mrs_used > 0);
2094  
2095  	if (atomic_read(&qp->usecnt))
2096  		return -EBUSY;
2097  
2098  	if (qp->real_qp != qp)
2099  		return __ib_destroy_shared_qp(qp);
2100  
2101  	sec  = qp->qp_sec;
2102  	if (sec)
2103  		ib_destroy_qp_security_begin(sec);
2104  
2105  	if (!qp->uobject)
2106  		rdma_rw_cleanup_mrs(qp);
2107  
2108  	rdma_counter_unbind_qp(qp, true);
2109  	ret = qp->device->ops.destroy_qp(qp, udata);
2110  	if (ret) {
2111  		if (sec)
2112  			ib_destroy_qp_security_abort(sec);
2113  		return ret;
2114  	}
2115  
2116  	if (alt_path_sgid_attr)
2117  		rdma_put_gid_attr(alt_path_sgid_attr);
2118  	if (av_sgid_attr)
2119  		rdma_put_gid_attr(av_sgid_attr);
2120  
2121  	ib_qp_usecnt_dec(qp);
2122  	if (sec)
2123  		ib_destroy_qp_security_end(sec);
2124  
2125  	rdma_restrack_del(&qp->res);
2126  	kfree(qp);
2127  	return ret;
2128  }
2129  EXPORT_SYMBOL(ib_destroy_qp_user);
2130  
2131  /* Completion queues */
2132  
__ib_create_cq(struct ib_device * device,ib_comp_handler comp_handler,void (* event_handler)(struct ib_event *,void *),void * cq_context,const struct ib_cq_init_attr * cq_attr,const char * caller)2133  struct ib_cq *__ib_create_cq(struct ib_device *device,
2134  			     ib_comp_handler comp_handler,
2135  			     void (*event_handler)(struct ib_event *, void *),
2136  			     void *cq_context,
2137  			     const struct ib_cq_init_attr *cq_attr,
2138  			     const char *caller)
2139  {
2140  	struct ib_cq *cq;
2141  	int ret;
2142  
2143  	cq = rdma_zalloc_drv_obj(device, ib_cq);
2144  	if (!cq)
2145  		return ERR_PTR(-ENOMEM);
2146  
2147  	cq->device = device;
2148  	cq->uobject = NULL;
2149  	cq->comp_handler = comp_handler;
2150  	cq->event_handler = event_handler;
2151  	cq->cq_context = cq_context;
2152  	atomic_set(&cq->usecnt, 0);
2153  
2154  	rdma_restrack_new(&cq->res, RDMA_RESTRACK_CQ);
2155  	rdma_restrack_set_name(&cq->res, caller);
2156  
2157  	ret = device->ops.create_cq(cq, cq_attr, NULL);
2158  	if (ret) {
2159  		rdma_restrack_put(&cq->res);
2160  		kfree(cq);
2161  		return ERR_PTR(ret);
2162  	}
2163  
2164  	rdma_restrack_add(&cq->res);
2165  	return cq;
2166  }
2167  EXPORT_SYMBOL(__ib_create_cq);
2168  
rdma_set_cq_moderation(struct ib_cq * cq,u16 cq_count,u16 cq_period)2169  int rdma_set_cq_moderation(struct ib_cq *cq, u16 cq_count, u16 cq_period)
2170  {
2171  	if (cq->shared)
2172  		return -EOPNOTSUPP;
2173  
2174  	return cq->device->ops.modify_cq ?
2175  		cq->device->ops.modify_cq(cq, cq_count,
2176  					  cq_period) : -EOPNOTSUPP;
2177  }
2178  EXPORT_SYMBOL(rdma_set_cq_moderation);
2179  
ib_destroy_cq_user(struct ib_cq * cq,struct ib_udata * udata)2180  int ib_destroy_cq_user(struct ib_cq *cq, struct ib_udata *udata)
2181  {
2182  	int ret;
2183  
2184  	if (WARN_ON_ONCE(cq->shared))
2185  		return -EOPNOTSUPP;
2186  
2187  	if (atomic_read(&cq->usecnt))
2188  		return -EBUSY;
2189  
2190  	ret = cq->device->ops.destroy_cq(cq, udata);
2191  	if (ret)
2192  		return ret;
2193  
2194  	rdma_restrack_del(&cq->res);
2195  	kfree(cq);
2196  	return ret;
2197  }
2198  EXPORT_SYMBOL(ib_destroy_cq_user);
2199  
ib_resize_cq(struct ib_cq * cq,int cqe)2200  int ib_resize_cq(struct ib_cq *cq, int cqe)
2201  {
2202  	if (cq->shared)
2203  		return -EOPNOTSUPP;
2204  
2205  	return cq->device->ops.resize_cq ?
2206  		cq->device->ops.resize_cq(cq, cqe, NULL) : -EOPNOTSUPP;
2207  }
2208  EXPORT_SYMBOL(ib_resize_cq);
2209  
2210  /* Memory regions */
2211  
ib_reg_user_mr(struct ib_pd * pd,u64 start,u64 length,u64 virt_addr,int access_flags)2212  struct ib_mr *ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
2213  			     u64 virt_addr, int access_flags)
2214  {
2215  	struct ib_mr *mr;
2216  
2217  	if (access_flags & IB_ACCESS_ON_DEMAND) {
2218  		if (!(pd->device->attrs.kernel_cap_flags &
2219  		      IBK_ON_DEMAND_PAGING)) {
2220  			pr_debug("ODP support not available\n");
2221  			return ERR_PTR(-EINVAL);
2222  		}
2223  	}
2224  
2225  	mr = pd->device->ops.reg_user_mr(pd, start, length, virt_addr,
2226  					 access_flags, NULL);
2227  
2228  	if (IS_ERR(mr))
2229  		return mr;
2230  
2231  	mr->device = pd->device;
2232  	mr->type = IB_MR_TYPE_USER;
2233  	mr->pd = pd;
2234  	mr->dm = NULL;
2235  	atomic_inc(&pd->usecnt);
2236  	mr->iova =  virt_addr;
2237  	mr->length = length;
2238  
2239  	rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR);
2240  	rdma_restrack_parent_name(&mr->res, &pd->res);
2241  	rdma_restrack_add(&mr->res);
2242  
2243  	return mr;
2244  }
2245  EXPORT_SYMBOL(ib_reg_user_mr);
2246  
ib_advise_mr(struct ib_pd * pd,enum ib_uverbs_advise_mr_advice advice,u32 flags,struct ib_sge * sg_list,u32 num_sge)2247  int ib_advise_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice,
2248  		 u32 flags, struct ib_sge *sg_list, u32 num_sge)
2249  {
2250  	if (!pd->device->ops.advise_mr)
2251  		return -EOPNOTSUPP;
2252  
2253  	if (!num_sge)
2254  		return 0;
2255  
2256  	return pd->device->ops.advise_mr(pd, advice, flags, sg_list, num_sge,
2257  					 NULL);
2258  }
2259  EXPORT_SYMBOL(ib_advise_mr);
2260  
ib_dereg_mr_user(struct ib_mr * mr,struct ib_udata * udata)2261  int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata)
2262  {
2263  	struct ib_pd *pd = mr->pd;
2264  	struct ib_dm *dm = mr->dm;
2265  	struct ib_sig_attrs *sig_attrs = mr->sig_attrs;
2266  	int ret;
2267  
2268  	trace_mr_dereg(mr);
2269  	rdma_restrack_del(&mr->res);
2270  	ret = mr->device->ops.dereg_mr(mr, udata);
2271  	if (!ret) {
2272  		atomic_dec(&pd->usecnt);
2273  		if (dm)
2274  			atomic_dec(&dm->usecnt);
2275  		kfree(sig_attrs);
2276  	}
2277  
2278  	return ret;
2279  }
2280  EXPORT_SYMBOL(ib_dereg_mr_user);
2281  
2282  /**
2283   * ib_alloc_mr() - Allocates a memory region
2284   * @pd:            protection domain associated with the region
2285   * @mr_type:       memory region type
2286   * @max_num_sg:    maximum sg entries available for registration.
2287   *
2288   * Notes:
2289   * Memory registeration page/sg lists must not exceed max_num_sg.
2290   * For mr_type IB_MR_TYPE_MEM_REG, the total length cannot exceed
2291   * max_num_sg * used_page_size.
2292   *
2293   */
ib_alloc_mr(struct ib_pd * pd,enum ib_mr_type mr_type,u32 max_num_sg)2294  struct ib_mr *ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
2295  			  u32 max_num_sg)
2296  {
2297  	struct ib_mr *mr;
2298  
2299  	if (!pd->device->ops.alloc_mr) {
2300  		mr = ERR_PTR(-EOPNOTSUPP);
2301  		goto out;
2302  	}
2303  
2304  	if (mr_type == IB_MR_TYPE_INTEGRITY) {
2305  		WARN_ON_ONCE(1);
2306  		mr = ERR_PTR(-EINVAL);
2307  		goto out;
2308  	}
2309  
2310  	mr = pd->device->ops.alloc_mr(pd, mr_type, max_num_sg);
2311  	if (IS_ERR(mr))
2312  		goto out;
2313  
2314  	mr->device = pd->device;
2315  	mr->pd = pd;
2316  	mr->dm = NULL;
2317  	mr->uobject = NULL;
2318  	atomic_inc(&pd->usecnt);
2319  	mr->need_inval = false;
2320  	mr->type = mr_type;
2321  	mr->sig_attrs = NULL;
2322  
2323  	rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR);
2324  	rdma_restrack_parent_name(&mr->res, &pd->res);
2325  	rdma_restrack_add(&mr->res);
2326  out:
2327  	trace_mr_alloc(pd, mr_type, max_num_sg, mr);
2328  	return mr;
2329  }
2330  EXPORT_SYMBOL(ib_alloc_mr);
2331  
2332  /**
2333   * ib_alloc_mr_integrity() - Allocates an integrity memory region
2334   * @pd:                      protection domain associated with the region
2335   * @max_num_data_sg:         maximum data sg entries available for registration
2336   * @max_num_meta_sg:         maximum metadata sg entries available for
2337   *                           registration
2338   *
2339   * Notes:
2340   * Memory registration page/sg lists must not exceed max_num_sg,
2341   * also the integrity page/sg lists must not exceed max_num_meta_sg.
2342   *
2343   */
ib_alloc_mr_integrity(struct ib_pd * pd,u32 max_num_data_sg,u32 max_num_meta_sg)2344  struct ib_mr *ib_alloc_mr_integrity(struct ib_pd *pd,
2345  				    u32 max_num_data_sg,
2346  				    u32 max_num_meta_sg)
2347  {
2348  	struct ib_mr *mr;
2349  	struct ib_sig_attrs *sig_attrs;
2350  
2351  	if (!pd->device->ops.alloc_mr_integrity ||
2352  	    !pd->device->ops.map_mr_sg_pi) {
2353  		mr = ERR_PTR(-EOPNOTSUPP);
2354  		goto out;
2355  	}
2356  
2357  	if (!max_num_meta_sg) {
2358  		mr = ERR_PTR(-EINVAL);
2359  		goto out;
2360  	}
2361  
2362  	sig_attrs = kzalloc(sizeof(struct ib_sig_attrs), GFP_KERNEL);
2363  	if (!sig_attrs) {
2364  		mr = ERR_PTR(-ENOMEM);
2365  		goto out;
2366  	}
2367  
2368  	mr = pd->device->ops.alloc_mr_integrity(pd, max_num_data_sg,
2369  						max_num_meta_sg);
2370  	if (IS_ERR(mr)) {
2371  		kfree(sig_attrs);
2372  		goto out;
2373  	}
2374  
2375  	mr->device = pd->device;
2376  	mr->pd = pd;
2377  	mr->dm = NULL;
2378  	mr->uobject = NULL;
2379  	atomic_inc(&pd->usecnt);
2380  	mr->need_inval = false;
2381  	mr->type = IB_MR_TYPE_INTEGRITY;
2382  	mr->sig_attrs = sig_attrs;
2383  
2384  	rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR);
2385  	rdma_restrack_parent_name(&mr->res, &pd->res);
2386  	rdma_restrack_add(&mr->res);
2387  out:
2388  	trace_mr_integ_alloc(pd, max_num_data_sg, max_num_meta_sg, mr);
2389  	return mr;
2390  }
2391  EXPORT_SYMBOL(ib_alloc_mr_integrity);
2392  
2393  /* Multicast groups */
2394  
is_valid_mcast_lid(struct ib_qp * qp,u16 lid)2395  static bool is_valid_mcast_lid(struct ib_qp *qp, u16 lid)
2396  {
2397  	struct ib_qp_init_attr init_attr = {};
2398  	struct ib_qp_attr attr = {};
2399  	int num_eth_ports = 0;
2400  	unsigned int port;
2401  
2402  	/* If QP state >= init, it is assigned to a port and we can check this
2403  	 * port only.
2404  	 */
2405  	if (!ib_query_qp(qp, &attr, IB_QP_STATE | IB_QP_PORT, &init_attr)) {
2406  		if (attr.qp_state >= IB_QPS_INIT) {
2407  			if (rdma_port_get_link_layer(qp->device, attr.port_num) !=
2408  			    IB_LINK_LAYER_INFINIBAND)
2409  				return true;
2410  			goto lid_check;
2411  		}
2412  	}
2413  
2414  	/* Can't get a quick answer, iterate over all ports */
2415  	rdma_for_each_port(qp->device, port)
2416  		if (rdma_port_get_link_layer(qp->device, port) !=
2417  		    IB_LINK_LAYER_INFINIBAND)
2418  			num_eth_ports++;
2419  
2420  	/* If we have at lease one Ethernet port, RoCE annex declares that
2421  	 * multicast LID should be ignored. We can't tell at this step if the
2422  	 * QP belongs to an IB or Ethernet port.
2423  	 */
2424  	if (num_eth_ports)
2425  		return true;
2426  
2427  	/* If all the ports are IB, we can check according to IB spec. */
2428  lid_check:
2429  	return !(lid < be16_to_cpu(IB_MULTICAST_LID_BASE) ||
2430  		 lid == be16_to_cpu(IB_LID_PERMISSIVE));
2431  }
2432  
ib_attach_mcast(struct ib_qp * qp,union ib_gid * gid,u16 lid)2433  int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
2434  {
2435  	int ret;
2436  
2437  	if (!qp->device->ops.attach_mcast)
2438  		return -EOPNOTSUPP;
2439  
2440  	if (!rdma_is_multicast_addr((struct in6_addr *)gid->raw) ||
2441  	    qp->qp_type != IB_QPT_UD || !is_valid_mcast_lid(qp, lid))
2442  		return -EINVAL;
2443  
2444  	ret = qp->device->ops.attach_mcast(qp, gid, lid);
2445  	if (!ret)
2446  		atomic_inc(&qp->usecnt);
2447  	return ret;
2448  }
2449  EXPORT_SYMBOL(ib_attach_mcast);
2450  
ib_detach_mcast(struct ib_qp * qp,union ib_gid * gid,u16 lid)2451  int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
2452  {
2453  	int ret;
2454  
2455  	if (!qp->device->ops.detach_mcast)
2456  		return -EOPNOTSUPP;
2457  
2458  	if (!rdma_is_multicast_addr((struct in6_addr *)gid->raw) ||
2459  	    qp->qp_type != IB_QPT_UD || !is_valid_mcast_lid(qp, lid))
2460  		return -EINVAL;
2461  
2462  	ret = qp->device->ops.detach_mcast(qp, gid, lid);
2463  	if (!ret)
2464  		atomic_dec(&qp->usecnt);
2465  	return ret;
2466  }
2467  EXPORT_SYMBOL(ib_detach_mcast);
2468  
2469  /**
2470   * ib_alloc_xrcd_user - Allocates an XRC domain.
2471   * @device: The device on which to allocate the XRC domain.
2472   * @inode: inode to connect XRCD
2473   * @udata: Valid user data or NULL for kernel object
2474   */
ib_alloc_xrcd_user(struct ib_device * device,struct inode * inode,struct ib_udata * udata)2475  struct ib_xrcd *ib_alloc_xrcd_user(struct ib_device *device,
2476  				   struct inode *inode, struct ib_udata *udata)
2477  {
2478  	struct ib_xrcd *xrcd;
2479  	int ret;
2480  
2481  	if (!device->ops.alloc_xrcd)
2482  		return ERR_PTR(-EOPNOTSUPP);
2483  
2484  	xrcd = rdma_zalloc_drv_obj(device, ib_xrcd);
2485  	if (!xrcd)
2486  		return ERR_PTR(-ENOMEM);
2487  
2488  	xrcd->device = device;
2489  	xrcd->inode = inode;
2490  	atomic_set(&xrcd->usecnt, 0);
2491  	init_rwsem(&xrcd->tgt_qps_rwsem);
2492  	xa_init(&xrcd->tgt_qps);
2493  
2494  	ret = device->ops.alloc_xrcd(xrcd, udata);
2495  	if (ret)
2496  		goto err;
2497  	return xrcd;
2498  err:
2499  	kfree(xrcd);
2500  	return ERR_PTR(ret);
2501  }
2502  EXPORT_SYMBOL(ib_alloc_xrcd_user);
2503  
2504  /**
2505   * ib_dealloc_xrcd_user - Deallocates an XRC domain.
2506   * @xrcd: The XRC domain to deallocate.
2507   * @udata: Valid user data or NULL for kernel object
2508   */
ib_dealloc_xrcd_user(struct ib_xrcd * xrcd,struct ib_udata * udata)2509  int ib_dealloc_xrcd_user(struct ib_xrcd *xrcd, struct ib_udata *udata)
2510  {
2511  	int ret;
2512  
2513  	if (atomic_read(&xrcd->usecnt))
2514  		return -EBUSY;
2515  
2516  	WARN_ON(!xa_empty(&xrcd->tgt_qps));
2517  	ret = xrcd->device->ops.dealloc_xrcd(xrcd, udata);
2518  	if (ret)
2519  		return ret;
2520  	kfree(xrcd);
2521  	return ret;
2522  }
2523  EXPORT_SYMBOL(ib_dealloc_xrcd_user);
2524  
2525  /**
2526   * ib_create_wq - Creates a WQ associated with the specified protection
2527   * domain.
2528   * @pd: The protection domain associated with the WQ.
2529   * @wq_attr: A list of initial attributes required to create the
2530   * WQ. If WQ creation succeeds, then the attributes are updated to
2531   * the actual capabilities of the created WQ.
2532   *
2533   * wq_attr->max_wr and wq_attr->max_sge determine
2534   * the requested size of the WQ, and set to the actual values allocated
2535   * on return.
2536   * If ib_create_wq() succeeds, then max_wr and max_sge will always be
2537   * at least as large as the requested values.
2538   */
ib_create_wq(struct ib_pd * pd,struct ib_wq_init_attr * wq_attr)2539  struct ib_wq *ib_create_wq(struct ib_pd *pd,
2540  			   struct ib_wq_init_attr *wq_attr)
2541  {
2542  	struct ib_wq *wq;
2543  
2544  	if (!pd->device->ops.create_wq)
2545  		return ERR_PTR(-EOPNOTSUPP);
2546  
2547  	wq = pd->device->ops.create_wq(pd, wq_attr, NULL);
2548  	if (!IS_ERR(wq)) {
2549  		wq->event_handler = wq_attr->event_handler;
2550  		wq->wq_context = wq_attr->wq_context;
2551  		wq->wq_type = wq_attr->wq_type;
2552  		wq->cq = wq_attr->cq;
2553  		wq->device = pd->device;
2554  		wq->pd = pd;
2555  		wq->uobject = NULL;
2556  		atomic_inc(&pd->usecnt);
2557  		atomic_inc(&wq_attr->cq->usecnt);
2558  		atomic_set(&wq->usecnt, 0);
2559  	}
2560  	return wq;
2561  }
2562  EXPORT_SYMBOL(ib_create_wq);
2563  
2564  /**
2565   * ib_destroy_wq_user - Destroys the specified user WQ.
2566   * @wq: The WQ to destroy.
2567   * @udata: Valid user data
2568   */
ib_destroy_wq_user(struct ib_wq * wq,struct ib_udata * udata)2569  int ib_destroy_wq_user(struct ib_wq *wq, struct ib_udata *udata)
2570  {
2571  	struct ib_cq *cq = wq->cq;
2572  	struct ib_pd *pd = wq->pd;
2573  	int ret;
2574  
2575  	if (atomic_read(&wq->usecnt))
2576  		return -EBUSY;
2577  
2578  	ret = wq->device->ops.destroy_wq(wq, udata);
2579  	if (ret)
2580  		return ret;
2581  
2582  	atomic_dec(&pd->usecnt);
2583  	atomic_dec(&cq->usecnt);
2584  	return ret;
2585  }
2586  EXPORT_SYMBOL(ib_destroy_wq_user);
2587  
ib_check_mr_status(struct ib_mr * mr,u32 check_mask,struct ib_mr_status * mr_status)2588  int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
2589  		       struct ib_mr_status *mr_status)
2590  {
2591  	if (!mr->device->ops.check_mr_status)
2592  		return -EOPNOTSUPP;
2593  
2594  	return mr->device->ops.check_mr_status(mr, check_mask, mr_status);
2595  }
2596  EXPORT_SYMBOL(ib_check_mr_status);
2597  
ib_set_vf_link_state(struct ib_device * device,int vf,u32 port,int state)2598  int ib_set_vf_link_state(struct ib_device *device, int vf, u32 port,
2599  			 int state)
2600  {
2601  	if (!device->ops.set_vf_link_state)
2602  		return -EOPNOTSUPP;
2603  
2604  	return device->ops.set_vf_link_state(device, vf, port, state);
2605  }
2606  EXPORT_SYMBOL(ib_set_vf_link_state);
2607  
ib_get_vf_config(struct ib_device * device,int vf,u32 port,struct ifla_vf_info * info)2608  int ib_get_vf_config(struct ib_device *device, int vf, u32 port,
2609  		     struct ifla_vf_info *info)
2610  {
2611  	if (!device->ops.get_vf_config)
2612  		return -EOPNOTSUPP;
2613  
2614  	return device->ops.get_vf_config(device, vf, port, info);
2615  }
2616  EXPORT_SYMBOL(ib_get_vf_config);
2617  
ib_get_vf_stats(struct ib_device * device,int vf,u32 port,struct ifla_vf_stats * stats)2618  int ib_get_vf_stats(struct ib_device *device, int vf, u32 port,
2619  		    struct ifla_vf_stats *stats)
2620  {
2621  	if (!device->ops.get_vf_stats)
2622  		return -EOPNOTSUPP;
2623  
2624  	return device->ops.get_vf_stats(device, vf, port, stats);
2625  }
2626  EXPORT_SYMBOL(ib_get_vf_stats);
2627  
ib_set_vf_guid(struct ib_device * device,int vf,u32 port,u64 guid,int type)2628  int ib_set_vf_guid(struct ib_device *device, int vf, u32 port, u64 guid,
2629  		   int type)
2630  {
2631  	if (!device->ops.set_vf_guid)
2632  		return -EOPNOTSUPP;
2633  
2634  	return device->ops.set_vf_guid(device, vf, port, guid, type);
2635  }
2636  EXPORT_SYMBOL(ib_set_vf_guid);
2637  
ib_get_vf_guid(struct ib_device * device,int vf,u32 port,struct ifla_vf_guid * node_guid,struct ifla_vf_guid * port_guid)2638  int ib_get_vf_guid(struct ib_device *device, int vf, u32 port,
2639  		   struct ifla_vf_guid *node_guid,
2640  		   struct ifla_vf_guid *port_guid)
2641  {
2642  	if (!device->ops.get_vf_guid)
2643  		return -EOPNOTSUPP;
2644  
2645  	return device->ops.get_vf_guid(device, vf, port, node_guid, port_guid);
2646  }
2647  EXPORT_SYMBOL(ib_get_vf_guid);
2648  /**
2649   * ib_map_mr_sg_pi() - Map the dma mapped SG lists for PI (protection
2650   *     information) and set an appropriate memory region for registration.
2651   * @mr:             memory region
2652   * @data_sg:        dma mapped scatterlist for data
2653   * @data_sg_nents:  number of entries in data_sg
2654   * @data_sg_offset: offset in bytes into data_sg
2655   * @meta_sg:        dma mapped scatterlist for metadata
2656   * @meta_sg_nents:  number of entries in meta_sg
2657   * @meta_sg_offset: offset in bytes into meta_sg
2658   * @page_size:      page vector desired page size
2659   *
2660   * Constraints:
2661   * - The MR must be allocated with type IB_MR_TYPE_INTEGRITY.
2662   *
2663   * Return: 0 on success.
2664   *
2665   * After this completes successfully, the  memory region
2666   * is ready for registration.
2667   */
ib_map_mr_sg_pi(struct ib_mr * mr,struct scatterlist * data_sg,int data_sg_nents,unsigned int * data_sg_offset,struct scatterlist * meta_sg,int meta_sg_nents,unsigned int * meta_sg_offset,unsigned int page_size)2668  int ib_map_mr_sg_pi(struct ib_mr *mr, struct scatterlist *data_sg,
2669  		    int data_sg_nents, unsigned int *data_sg_offset,
2670  		    struct scatterlist *meta_sg, int meta_sg_nents,
2671  		    unsigned int *meta_sg_offset, unsigned int page_size)
2672  {
2673  	if (unlikely(!mr->device->ops.map_mr_sg_pi ||
2674  		     WARN_ON_ONCE(mr->type != IB_MR_TYPE_INTEGRITY)))
2675  		return -EOPNOTSUPP;
2676  
2677  	mr->page_size = page_size;
2678  
2679  	return mr->device->ops.map_mr_sg_pi(mr, data_sg, data_sg_nents,
2680  					    data_sg_offset, meta_sg,
2681  					    meta_sg_nents, meta_sg_offset);
2682  }
2683  EXPORT_SYMBOL(ib_map_mr_sg_pi);
2684  
2685  /**
2686   * ib_map_mr_sg() - Map the largest prefix of a dma mapped SG list
2687   *     and set it the memory region.
2688   * @mr:            memory region
2689   * @sg:            dma mapped scatterlist
2690   * @sg_nents:      number of entries in sg
2691   * @sg_offset:     offset in bytes into sg
2692   * @page_size:     page vector desired page size
2693   *
2694   * Constraints:
2695   *
2696   * - The first sg element is allowed to have an offset.
2697   * - Each sg element must either be aligned to page_size or virtually
2698   *   contiguous to the previous element. In case an sg element has a
2699   *   non-contiguous offset, the mapping prefix will not include it.
2700   * - The last sg element is allowed to have length less than page_size.
2701   * - If sg_nents total byte length exceeds the mr max_num_sge * page_size
2702   *   then only max_num_sg entries will be mapped.
2703   * - If the MR was allocated with type IB_MR_TYPE_SG_GAPS, none of these
2704   *   constraints holds and the page_size argument is ignored.
2705   *
2706   * Returns the number of sg elements that were mapped to the memory region.
2707   *
2708   * After this completes successfully, the  memory region
2709   * is ready for registration.
2710   */
ib_map_mr_sg(struct ib_mr * mr,struct scatterlist * sg,int sg_nents,unsigned int * sg_offset,unsigned int page_size)2711  int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
2712  		 unsigned int *sg_offset, unsigned int page_size)
2713  {
2714  	if (unlikely(!mr->device->ops.map_mr_sg))
2715  		return -EOPNOTSUPP;
2716  
2717  	mr->page_size = page_size;
2718  
2719  	return mr->device->ops.map_mr_sg(mr, sg, sg_nents, sg_offset);
2720  }
2721  EXPORT_SYMBOL(ib_map_mr_sg);
2722  
2723  /**
2724   * ib_sg_to_pages() - Convert the largest prefix of a sg list
2725   *     to a page vector
2726   * @mr:            memory region
2727   * @sgl:           dma mapped scatterlist
2728   * @sg_nents:      number of entries in sg
2729   * @sg_offset_p:   ==== =======================================================
2730   *                 IN   start offset in bytes into sg
2731   *                 OUT  offset in bytes for element n of the sg of the first
2732   *                      byte that has not been processed where n is the return
2733   *                      value of this function.
2734   *                 ==== =======================================================
2735   * @set_page:      driver page assignment function pointer
2736   *
2737   * Core service helper for drivers to convert the largest
2738   * prefix of given sg list to a page vector. The sg list
2739   * prefix converted is the prefix that meet the requirements
2740   * of ib_map_mr_sg.
2741   *
2742   * Returns the number of sg elements that were assigned to
2743   * a page vector.
2744   */
ib_sg_to_pages(struct ib_mr * mr,struct scatterlist * sgl,int sg_nents,unsigned int * sg_offset_p,int (* set_page)(struct ib_mr *,u64))2745  int ib_sg_to_pages(struct ib_mr *mr, struct scatterlist *sgl, int sg_nents,
2746  		unsigned int *sg_offset_p, int (*set_page)(struct ib_mr *, u64))
2747  {
2748  	struct scatterlist *sg;
2749  	u64 last_end_dma_addr = 0;
2750  	unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
2751  	unsigned int last_page_off = 0;
2752  	u64 page_mask = ~((u64)mr->page_size - 1);
2753  	int i, ret;
2754  
2755  	if (unlikely(sg_nents <= 0 || sg_offset > sg_dma_len(&sgl[0])))
2756  		return -EINVAL;
2757  
2758  	mr->iova = sg_dma_address(&sgl[0]) + sg_offset;
2759  	mr->length = 0;
2760  
2761  	for_each_sg(sgl, sg, sg_nents, i) {
2762  		u64 dma_addr = sg_dma_address(sg) + sg_offset;
2763  		u64 prev_addr = dma_addr;
2764  		unsigned int dma_len = sg_dma_len(sg) - sg_offset;
2765  		u64 end_dma_addr = dma_addr + dma_len;
2766  		u64 page_addr = dma_addr & page_mask;
2767  
2768  		/*
2769  		 * For the second and later elements, check whether either the
2770  		 * end of element i-1 or the start of element i is not aligned
2771  		 * on a page boundary.
2772  		 */
2773  		if (i && (last_page_off != 0 || page_addr != dma_addr)) {
2774  			/* Stop mapping if there is a gap. */
2775  			if (last_end_dma_addr != dma_addr)
2776  				break;
2777  
2778  			/*
2779  			 * Coalesce this element with the last. If it is small
2780  			 * enough just update mr->length. Otherwise start
2781  			 * mapping from the next page.
2782  			 */
2783  			goto next_page;
2784  		}
2785  
2786  		do {
2787  			ret = set_page(mr, page_addr);
2788  			if (unlikely(ret < 0)) {
2789  				sg_offset = prev_addr - sg_dma_address(sg);
2790  				mr->length += prev_addr - dma_addr;
2791  				if (sg_offset_p)
2792  					*sg_offset_p = sg_offset;
2793  				return i || sg_offset ? i : ret;
2794  			}
2795  			prev_addr = page_addr;
2796  next_page:
2797  			page_addr += mr->page_size;
2798  		} while (page_addr < end_dma_addr);
2799  
2800  		mr->length += dma_len;
2801  		last_end_dma_addr = end_dma_addr;
2802  		last_page_off = end_dma_addr & ~page_mask;
2803  
2804  		sg_offset = 0;
2805  	}
2806  
2807  	if (sg_offset_p)
2808  		*sg_offset_p = 0;
2809  	return i;
2810  }
2811  EXPORT_SYMBOL(ib_sg_to_pages);
2812  
2813  struct ib_drain_cqe {
2814  	struct ib_cqe cqe;
2815  	struct completion done;
2816  };
2817  
ib_drain_qp_done(struct ib_cq * cq,struct ib_wc * wc)2818  static void ib_drain_qp_done(struct ib_cq *cq, struct ib_wc *wc)
2819  {
2820  	struct ib_drain_cqe *cqe = container_of(wc->wr_cqe, struct ib_drain_cqe,
2821  						cqe);
2822  
2823  	complete(&cqe->done);
2824  }
2825  
2826  /*
2827   * Post a WR and block until its completion is reaped for the SQ.
2828   */
__ib_drain_sq(struct ib_qp * qp)2829  static void __ib_drain_sq(struct ib_qp *qp)
2830  {
2831  	struct ib_cq *cq = qp->send_cq;
2832  	struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
2833  	struct ib_drain_cqe sdrain;
2834  	struct ib_rdma_wr swr = {
2835  		.wr = {
2836  			.next = NULL,
2837  			{ .wr_cqe	= &sdrain.cqe, },
2838  			.opcode	= IB_WR_RDMA_WRITE,
2839  		},
2840  	};
2841  	int ret;
2842  
2843  	ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
2844  	if (ret) {
2845  		WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
2846  		return;
2847  	}
2848  
2849  	sdrain.cqe.done = ib_drain_qp_done;
2850  	init_completion(&sdrain.done);
2851  
2852  	ret = ib_post_send(qp, &swr.wr, NULL);
2853  	if (ret) {
2854  		WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
2855  		return;
2856  	}
2857  
2858  	if (cq->poll_ctx == IB_POLL_DIRECT)
2859  		while (wait_for_completion_timeout(&sdrain.done, HZ / 10) <= 0)
2860  			ib_process_cq_direct(cq, -1);
2861  	else
2862  		wait_for_completion(&sdrain.done);
2863  }
2864  
2865  /*
2866   * Post a WR and block until its completion is reaped for the RQ.
2867   */
__ib_drain_rq(struct ib_qp * qp)2868  static void __ib_drain_rq(struct ib_qp *qp)
2869  {
2870  	struct ib_cq *cq = qp->recv_cq;
2871  	struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
2872  	struct ib_drain_cqe rdrain;
2873  	struct ib_recv_wr rwr = {};
2874  	int ret;
2875  
2876  	ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
2877  	if (ret) {
2878  		WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
2879  		return;
2880  	}
2881  
2882  	rwr.wr_cqe = &rdrain.cqe;
2883  	rdrain.cqe.done = ib_drain_qp_done;
2884  	init_completion(&rdrain.done);
2885  
2886  	ret = ib_post_recv(qp, &rwr, NULL);
2887  	if (ret) {
2888  		WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
2889  		return;
2890  	}
2891  
2892  	if (cq->poll_ctx == IB_POLL_DIRECT)
2893  		while (wait_for_completion_timeout(&rdrain.done, HZ / 10) <= 0)
2894  			ib_process_cq_direct(cq, -1);
2895  	else
2896  		wait_for_completion(&rdrain.done);
2897  }
2898  
2899  /*
2900   * __ib_drain_srq() - Block until Last WQE Reached event arrives, or timeout
2901   *                    expires.
2902   * @qp:               queue pair associated with SRQ to drain
2903   *
2904   * Quoting 10.3.1 Queue Pair and EE Context States:
2905   *
2906   * Note, for QPs that are associated with an SRQ, the Consumer should take the
2907   * QP through the Error State before invoking a Destroy QP or a Modify QP to the
2908   * Reset State.  The Consumer may invoke the Destroy QP without first performing
2909   * a Modify QP to the Error State and waiting for the Affiliated Asynchronous
2910   * Last WQE Reached Event. However, if the Consumer does not wait for the
2911   * Affiliated Asynchronous Last WQE Reached Event, then WQE and Data Segment
2912   * leakage may occur. Therefore, it is good programming practice to tear down a
2913   * QP that is associated with an SRQ by using the following process:
2914   *
2915   * - Put the QP in the Error State
2916   * - Wait for the Affiliated Asynchronous Last WQE Reached Event;
2917   * - either:
2918   *       drain the CQ by invoking the Poll CQ verb and either wait for CQ
2919   *       to be empty or the number of Poll CQ operations has exceeded
2920   *       CQ capacity size;
2921   * - or
2922   *       post another WR that completes on the same CQ and wait for this
2923   *       WR to return as a WC;
2924   * - and then invoke a Destroy QP or Reset QP.
2925   *
2926   * We use the first option.
2927   */
__ib_drain_srq(struct ib_qp * qp)2928  static void __ib_drain_srq(struct ib_qp *qp)
2929  {
2930  	struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
2931  	struct ib_cq *cq;
2932  	int n, polled = 0;
2933  	int ret;
2934  
2935  	if (!qp->srq) {
2936  		WARN_ONCE(1, "QP 0x%p is not associated with SRQ\n", qp);
2937  		return;
2938  	}
2939  
2940  	ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
2941  	if (ret) {
2942  		WARN_ONCE(ret, "failed to drain shared recv queue: %d\n", ret);
2943  		return;
2944  	}
2945  
2946  	if (ib_srq_has_cq(qp->srq->srq_type)) {
2947  		cq = qp->srq->ext.cq;
2948  	} else if (qp->recv_cq) {
2949  		cq = qp->recv_cq;
2950  	} else {
2951  		WARN_ONCE(1, "QP 0x%p has no CQ associated with SRQ\n", qp);
2952  		return;
2953  	}
2954  
2955  	if (wait_for_completion_timeout(&qp->srq_completion, 60 * HZ) > 0) {
2956  		while (polled != cq->cqe) {
2957  			n = ib_process_cq_direct(cq, cq->cqe - polled);
2958  			if (!n)
2959  				return;
2960  			polled += n;
2961  		}
2962  	}
2963  }
2964  
2965  /**
2966   * ib_drain_sq() - Block until all SQ CQEs have been consumed by the
2967   *		   application.
2968   * @qp:            queue pair to drain
2969   *
2970   * If the device has a provider-specific drain function, then
2971   * call that.  Otherwise call the generic drain function
2972   * __ib_drain_sq().
2973   *
2974   * The caller must:
2975   *
2976   * ensure there is room in the CQ and SQ for the drain work request and
2977   * completion.
2978   *
2979   * allocate the CQ using ib_alloc_cq().
2980   *
2981   * ensure that there are no other contexts that are posting WRs concurrently.
2982   * Otherwise the drain is not guaranteed.
2983   */
ib_drain_sq(struct ib_qp * qp)2984  void ib_drain_sq(struct ib_qp *qp)
2985  {
2986  	if (qp->device->ops.drain_sq)
2987  		qp->device->ops.drain_sq(qp);
2988  	else
2989  		__ib_drain_sq(qp);
2990  	trace_cq_drain_complete(qp->send_cq);
2991  }
2992  EXPORT_SYMBOL(ib_drain_sq);
2993  
2994  /**
2995   * ib_drain_rq() - Block until all RQ CQEs have been consumed by the
2996   *		   application.
2997   * @qp:            queue pair to drain
2998   *
2999   * If the device has a provider-specific drain function, then
3000   * call that.  Otherwise call the generic drain function
3001   * __ib_drain_rq().
3002   *
3003   * The caller must:
3004   *
3005   * ensure there is room in the CQ and RQ for the drain work request and
3006   * completion.
3007   *
3008   * allocate the CQ using ib_alloc_cq().
3009   *
3010   * ensure that there are no other contexts that are posting WRs concurrently.
3011   * Otherwise the drain is not guaranteed.
3012   */
ib_drain_rq(struct ib_qp * qp)3013  void ib_drain_rq(struct ib_qp *qp)
3014  {
3015  	if (qp->device->ops.drain_rq)
3016  		qp->device->ops.drain_rq(qp);
3017  	else
3018  		__ib_drain_rq(qp);
3019  	trace_cq_drain_complete(qp->recv_cq);
3020  }
3021  EXPORT_SYMBOL(ib_drain_rq);
3022  
3023  /**
3024   * ib_drain_qp() - Block until all CQEs have been consumed by the
3025   *		   application on both the RQ and SQ.
3026   * @qp:            queue pair to drain
3027   *
3028   * The caller must:
3029   *
3030   * ensure there is room in the CQ(s), SQ, and RQ for drain work requests
3031   * and completions.
3032   *
3033   * allocate the CQs using ib_alloc_cq().
3034   *
3035   * ensure that there are no other contexts that are posting WRs concurrently.
3036   * Otherwise the drain is not guaranteed.
3037   */
ib_drain_qp(struct ib_qp * qp)3038  void ib_drain_qp(struct ib_qp *qp)
3039  {
3040  	ib_drain_sq(qp);
3041  	if (!qp->srq)
3042  		ib_drain_rq(qp);
3043  	else
3044  		__ib_drain_srq(qp);
3045  }
3046  EXPORT_SYMBOL(ib_drain_qp);
3047  
rdma_alloc_netdev(struct ib_device * device,u32 port_num,enum rdma_netdev_t type,const char * name,unsigned char name_assign_type,void (* setup)(struct net_device *))3048  struct net_device *rdma_alloc_netdev(struct ib_device *device, u32 port_num,
3049  				     enum rdma_netdev_t type, const char *name,
3050  				     unsigned char name_assign_type,
3051  				     void (*setup)(struct net_device *))
3052  {
3053  	struct rdma_netdev_alloc_params params;
3054  	struct net_device *netdev;
3055  	int rc;
3056  
3057  	if (!device->ops.rdma_netdev_get_params)
3058  		return ERR_PTR(-EOPNOTSUPP);
3059  
3060  	rc = device->ops.rdma_netdev_get_params(device, port_num, type,
3061  						&params);
3062  	if (rc)
3063  		return ERR_PTR(rc);
3064  
3065  	netdev = alloc_netdev_mqs(params.sizeof_priv, name, name_assign_type,
3066  				  setup, params.txqs, params.rxqs);
3067  	if (!netdev)
3068  		return ERR_PTR(-ENOMEM);
3069  
3070  	return netdev;
3071  }
3072  EXPORT_SYMBOL(rdma_alloc_netdev);
3073  
rdma_init_netdev(struct ib_device * device,u32 port_num,enum rdma_netdev_t type,const char * name,unsigned char name_assign_type,void (* setup)(struct net_device *),struct net_device * netdev)3074  int rdma_init_netdev(struct ib_device *device, u32 port_num,
3075  		     enum rdma_netdev_t type, const char *name,
3076  		     unsigned char name_assign_type,
3077  		     void (*setup)(struct net_device *),
3078  		     struct net_device *netdev)
3079  {
3080  	struct rdma_netdev_alloc_params params;
3081  	int rc;
3082  
3083  	if (!device->ops.rdma_netdev_get_params)
3084  		return -EOPNOTSUPP;
3085  
3086  	rc = device->ops.rdma_netdev_get_params(device, port_num, type,
3087  						&params);
3088  	if (rc)
3089  		return rc;
3090  
3091  	return params.initialize_rdma_netdev(device, port_num,
3092  					     netdev, params.param);
3093  }
3094  EXPORT_SYMBOL(rdma_init_netdev);
3095  
__rdma_block_iter_start(struct ib_block_iter * biter,struct scatterlist * sglist,unsigned int nents,unsigned long pgsz)3096  void __rdma_block_iter_start(struct ib_block_iter *biter,
3097  			     struct scatterlist *sglist, unsigned int nents,
3098  			     unsigned long pgsz)
3099  {
3100  	memset(biter, 0, sizeof(struct ib_block_iter));
3101  	biter->__sg = sglist;
3102  	biter->__sg_nents = nents;
3103  
3104  	/* Driver provides best block size to use */
3105  	biter->__pg_bit = __fls(pgsz);
3106  }
3107  EXPORT_SYMBOL(__rdma_block_iter_start);
3108  
__rdma_block_iter_next(struct ib_block_iter * biter)3109  bool __rdma_block_iter_next(struct ib_block_iter *biter)
3110  {
3111  	unsigned int block_offset;
3112  	unsigned int sg_delta;
3113  
3114  	if (!biter->__sg_nents || !biter->__sg)
3115  		return false;
3116  
3117  	biter->__dma_addr = sg_dma_address(biter->__sg) + biter->__sg_advance;
3118  	block_offset = biter->__dma_addr & (BIT_ULL(biter->__pg_bit) - 1);
3119  	sg_delta = BIT_ULL(biter->__pg_bit) - block_offset;
3120  
3121  	if (sg_dma_len(biter->__sg) - biter->__sg_advance > sg_delta) {
3122  		biter->__sg_advance += sg_delta;
3123  	} else {
3124  		biter->__sg_advance = 0;
3125  		biter->__sg = sg_next(biter->__sg);
3126  		biter->__sg_nents--;
3127  	}
3128  
3129  	return true;
3130  }
3131  EXPORT_SYMBOL(__rdma_block_iter_next);
3132  
3133  /**
3134   * rdma_alloc_hw_stats_struct - Helper function to allocate dynamic struct
3135   *   for the drivers.
3136   * @descs: array of static descriptors
3137   * @num_counters: number of elements in array
3138   * @lifespan: milliseconds between updates
3139   */
rdma_alloc_hw_stats_struct(const struct rdma_stat_desc * descs,int num_counters,unsigned long lifespan)3140  struct rdma_hw_stats *rdma_alloc_hw_stats_struct(
3141  	const struct rdma_stat_desc *descs, int num_counters,
3142  	unsigned long lifespan)
3143  {
3144  	struct rdma_hw_stats *stats;
3145  
3146  	stats = kzalloc(struct_size(stats, value, num_counters), GFP_KERNEL);
3147  	if (!stats)
3148  		return NULL;
3149  
3150  	stats->is_disabled = kcalloc(BITS_TO_LONGS(num_counters),
3151  				     sizeof(*stats->is_disabled), GFP_KERNEL);
3152  	if (!stats->is_disabled)
3153  		goto err;
3154  
3155  	stats->descs = descs;
3156  	stats->num_counters = num_counters;
3157  	stats->lifespan = msecs_to_jiffies(lifespan);
3158  	mutex_init(&stats->lock);
3159  
3160  	return stats;
3161  
3162  err:
3163  	kfree(stats);
3164  	return NULL;
3165  }
3166  EXPORT_SYMBOL(rdma_alloc_hw_stats_struct);
3167  
3168  /**
3169   * rdma_free_hw_stats_struct - Helper function to release rdma_hw_stats
3170   * @stats: statistics to release
3171   */
rdma_free_hw_stats_struct(struct rdma_hw_stats * stats)3172  void rdma_free_hw_stats_struct(struct rdma_hw_stats *stats)
3173  {
3174  	if (!stats)
3175  		return;
3176  
3177  	kfree(stats->is_disabled);
3178  	kfree(stats);
3179  }
3180  EXPORT_SYMBOL(rdma_free_hw_stats_struct);
3181