1  // SPDX-License-Identifier: GPL-2.0
2  /*
3   * Common code for the NVMe target.
4   * Copyright (c) 2015-2016 HGST, a Western Digital Company.
5   */
6  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7  #include <linux/module.h>
8  #include <linux/random.h>
9  #include <linux/rculist.h>
10  #include <linux/pci-p2pdma.h>
11  #include <linux/scatterlist.h>
12  
13  #include <generated/utsrelease.h>
14  
15  #define CREATE_TRACE_POINTS
16  #include "trace.h"
17  
18  #include "nvmet.h"
19  #include "debugfs.h"
20  
21  struct kmem_cache *nvmet_bvec_cache;
22  struct workqueue_struct *buffered_io_wq;
23  struct workqueue_struct *zbd_wq;
24  static const struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX];
25  static DEFINE_IDA(cntlid_ida);
26  
27  struct workqueue_struct *nvmet_wq;
28  EXPORT_SYMBOL_GPL(nvmet_wq);
29  
30  /*
31   * This read/write semaphore is used to synchronize access to configuration
32   * information on a target system that will result in discovery log page
33   * information change for at least one host.
34   * The full list of resources to protected by this semaphore is:
35   *
36   *  - subsystems list
37   *  - per-subsystem allowed hosts list
38   *  - allow_any_host subsystem attribute
39   *  - nvmet_genctr
40   *  - the nvmet_transports array
41   *
42   * When updating any of those lists/structures write lock should be obtained,
43   * while when reading (popolating discovery log page or checking host-subsystem
44   * link) read lock is obtained to allow concurrent reads.
45   */
46  DECLARE_RWSEM(nvmet_config_sem);
47  
48  u32 nvmet_ana_group_enabled[NVMET_MAX_ANAGRPS + 1];
49  u64 nvmet_ana_chgcnt;
50  DECLARE_RWSEM(nvmet_ana_sem);
51  
errno_to_nvme_status(struct nvmet_req * req,int errno)52  inline u16 errno_to_nvme_status(struct nvmet_req *req, int errno)
53  {
54  	switch (errno) {
55  	case 0:
56  		return NVME_SC_SUCCESS;
57  	case -ENOSPC:
58  		req->error_loc = offsetof(struct nvme_rw_command, length);
59  		return NVME_SC_CAP_EXCEEDED | NVME_STATUS_DNR;
60  	case -EREMOTEIO:
61  		req->error_loc = offsetof(struct nvme_rw_command, slba);
62  		return  NVME_SC_LBA_RANGE | NVME_STATUS_DNR;
63  	case -EOPNOTSUPP:
64  		req->error_loc = offsetof(struct nvme_common_command, opcode);
65  		switch (req->cmd->common.opcode) {
66  		case nvme_cmd_dsm:
67  		case nvme_cmd_write_zeroes:
68  			return NVME_SC_ONCS_NOT_SUPPORTED | NVME_STATUS_DNR;
69  		default:
70  			return NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR;
71  		}
72  		break;
73  	case -ENODATA:
74  		req->error_loc = offsetof(struct nvme_rw_command, nsid);
75  		return NVME_SC_ACCESS_DENIED;
76  	case -EIO:
77  		fallthrough;
78  	default:
79  		req->error_loc = offsetof(struct nvme_common_command, opcode);
80  		return NVME_SC_INTERNAL | NVME_STATUS_DNR;
81  	}
82  }
83  
nvmet_report_invalid_opcode(struct nvmet_req * req)84  u16 nvmet_report_invalid_opcode(struct nvmet_req *req)
85  {
86  	pr_debug("unhandled cmd %d on qid %d\n", req->cmd->common.opcode,
87  		 req->sq->qid);
88  
89  	req->error_loc = offsetof(struct nvme_common_command, opcode);
90  	return NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR;
91  }
92  
93  static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port,
94  		const char *subsysnqn);
95  
nvmet_copy_to_sgl(struct nvmet_req * req,off_t off,const void * buf,size_t len)96  u16 nvmet_copy_to_sgl(struct nvmet_req *req, off_t off, const void *buf,
97  		size_t len)
98  {
99  	if (sg_pcopy_from_buffer(req->sg, req->sg_cnt, buf, len, off) != len) {
100  		req->error_loc = offsetof(struct nvme_common_command, dptr);
101  		return NVME_SC_SGL_INVALID_DATA | NVME_STATUS_DNR;
102  	}
103  	return 0;
104  }
105  
nvmet_copy_from_sgl(struct nvmet_req * req,off_t off,void * buf,size_t len)106  u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf, size_t len)
107  {
108  	if (sg_pcopy_to_buffer(req->sg, req->sg_cnt, buf, len, off) != len) {
109  		req->error_loc = offsetof(struct nvme_common_command, dptr);
110  		return NVME_SC_SGL_INVALID_DATA | NVME_STATUS_DNR;
111  	}
112  	return 0;
113  }
114  
nvmet_zero_sgl(struct nvmet_req * req,off_t off,size_t len)115  u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len)
116  {
117  	if (sg_zero_buffer(req->sg, req->sg_cnt, len, off) != len) {
118  		req->error_loc = offsetof(struct nvme_common_command, dptr);
119  		return NVME_SC_SGL_INVALID_DATA | NVME_STATUS_DNR;
120  	}
121  	return 0;
122  }
123  
nvmet_max_nsid(struct nvmet_subsys * subsys)124  static u32 nvmet_max_nsid(struct nvmet_subsys *subsys)
125  {
126  	struct nvmet_ns *cur;
127  	unsigned long idx;
128  	u32 nsid = 0;
129  
130  	xa_for_each(&subsys->namespaces, idx, cur)
131  		nsid = cur->nsid;
132  
133  	return nsid;
134  }
135  
nvmet_async_event_result(struct nvmet_async_event * aen)136  static u32 nvmet_async_event_result(struct nvmet_async_event *aen)
137  {
138  	return aen->event_type | (aen->event_info << 8) | (aen->log_page << 16);
139  }
140  
nvmet_async_events_failall(struct nvmet_ctrl * ctrl)141  static void nvmet_async_events_failall(struct nvmet_ctrl *ctrl)
142  {
143  	struct nvmet_req *req;
144  
145  	mutex_lock(&ctrl->lock);
146  	while (ctrl->nr_async_event_cmds) {
147  		req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
148  		mutex_unlock(&ctrl->lock);
149  		nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_STATUS_DNR);
150  		mutex_lock(&ctrl->lock);
151  	}
152  	mutex_unlock(&ctrl->lock);
153  }
154  
nvmet_async_events_process(struct nvmet_ctrl * ctrl)155  static void nvmet_async_events_process(struct nvmet_ctrl *ctrl)
156  {
157  	struct nvmet_async_event *aen;
158  	struct nvmet_req *req;
159  
160  	mutex_lock(&ctrl->lock);
161  	while (ctrl->nr_async_event_cmds && !list_empty(&ctrl->async_events)) {
162  		aen = list_first_entry(&ctrl->async_events,
163  				       struct nvmet_async_event, entry);
164  		req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
165  		nvmet_set_result(req, nvmet_async_event_result(aen));
166  
167  		list_del(&aen->entry);
168  		kfree(aen);
169  
170  		mutex_unlock(&ctrl->lock);
171  		trace_nvmet_async_event(ctrl, req->cqe->result.u32);
172  		nvmet_req_complete(req, 0);
173  		mutex_lock(&ctrl->lock);
174  	}
175  	mutex_unlock(&ctrl->lock);
176  }
177  
nvmet_async_events_free(struct nvmet_ctrl * ctrl)178  static void nvmet_async_events_free(struct nvmet_ctrl *ctrl)
179  {
180  	struct nvmet_async_event *aen, *tmp;
181  
182  	mutex_lock(&ctrl->lock);
183  	list_for_each_entry_safe(aen, tmp, &ctrl->async_events, entry) {
184  		list_del(&aen->entry);
185  		kfree(aen);
186  	}
187  	mutex_unlock(&ctrl->lock);
188  }
189  
nvmet_async_event_work(struct work_struct * work)190  static void nvmet_async_event_work(struct work_struct *work)
191  {
192  	struct nvmet_ctrl *ctrl =
193  		container_of(work, struct nvmet_ctrl, async_event_work);
194  
195  	nvmet_async_events_process(ctrl);
196  }
197  
nvmet_add_async_event(struct nvmet_ctrl * ctrl,u8 event_type,u8 event_info,u8 log_page)198  void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type,
199  		u8 event_info, u8 log_page)
200  {
201  	struct nvmet_async_event *aen;
202  
203  	aen = kmalloc(sizeof(*aen), GFP_KERNEL);
204  	if (!aen)
205  		return;
206  
207  	aen->event_type = event_type;
208  	aen->event_info = event_info;
209  	aen->log_page = log_page;
210  
211  	mutex_lock(&ctrl->lock);
212  	list_add_tail(&aen->entry, &ctrl->async_events);
213  	mutex_unlock(&ctrl->lock);
214  
215  	queue_work(nvmet_wq, &ctrl->async_event_work);
216  }
217  
nvmet_add_to_changed_ns_log(struct nvmet_ctrl * ctrl,__le32 nsid)218  static void nvmet_add_to_changed_ns_log(struct nvmet_ctrl *ctrl, __le32 nsid)
219  {
220  	u32 i;
221  
222  	mutex_lock(&ctrl->lock);
223  	if (ctrl->nr_changed_ns > NVME_MAX_CHANGED_NAMESPACES)
224  		goto out_unlock;
225  
226  	for (i = 0; i < ctrl->nr_changed_ns; i++) {
227  		if (ctrl->changed_ns_list[i] == nsid)
228  			goto out_unlock;
229  	}
230  
231  	if (ctrl->nr_changed_ns == NVME_MAX_CHANGED_NAMESPACES) {
232  		ctrl->changed_ns_list[0] = cpu_to_le32(0xffffffff);
233  		ctrl->nr_changed_ns = U32_MAX;
234  		goto out_unlock;
235  	}
236  
237  	ctrl->changed_ns_list[ctrl->nr_changed_ns++] = nsid;
238  out_unlock:
239  	mutex_unlock(&ctrl->lock);
240  }
241  
nvmet_ns_changed(struct nvmet_subsys * subsys,u32 nsid)242  void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid)
243  {
244  	struct nvmet_ctrl *ctrl;
245  
246  	lockdep_assert_held(&subsys->lock);
247  
248  	list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
249  		nvmet_add_to_changed_ns_log(ctrl, cpu_to_le32(nsid));
250  		if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_NS_ATTR))
251  			continue;
252  		nvmet_add_async_event(ctrl, NVME_AER_NOTICE,
253  				NVME_AER_NOTICE_NS_CHANGED,
254  				NVME_LOG_CHANGED_NS);
255  	}
256  }
257  
nvmet_send_ana_event(struct nvmet_subsys * subsys,struct nvmet_port * port)258  void nvmet_send_ana_event(struct nvmet_subsys *subsys,
259  		struct nvmet_port *port)
260  {
261  	struct nvmet_ctrl *ctrl;
262  
263  	mutex_lock(&subsys->lock);
264  	list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
265  		if (port && ctrl->port != port)
266  			continue;
267  		if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_ANA_CHANGE))
268  			continue;
269  		nvmet_add_async_event(ctrl, NVME_AER_NOTICE,
270  				NVME_AER_NOTICE_ANA, NVME_LOG_ANA);
271  	}
272  	mutex_unlock(&subsys->lock);
273  }
274  
nvmet_port_send_ana_event(struct nvmet_port * port)275  void nvmet_port_send_ana_event(struct nvmet_port *port)
276  {
277  	struct nvmet_subsys_link *p;
278  
279  	down_read(&nvmet_config_sem);
280  	list_for_each_entry(p, &port->subsystems, entry)
281  		nvmet_send_ana_event(p->subsys, port);
282  	up_read(&nvmet_config_sem);
283  }
284  
nvmet_register_transport(const struct nvmet_fabrics_ops * ops)285  int nvmet_register_transport(const struct nvmet_fabrics_ops *ops)
286  {
287  	int ret = 0;
288  
289  	down_write(&nvmet_config_sem);
290  	if (nvmet_transports[ops->type])
291  		ret = -EINVAL;
292  	else
293  		nvmet_transports[ops->type] = ops;
294  	up_write(&nvmet_config_sem);
295  
296  	return ret;
297  }
298  EXPORT_SYMBOL_GPL(nvmet_register_transport);
299  
nvmet_unregister_transport(const struct nvmet_fabrics_ops * ops)300  void nvmet_unregister_transport(const struct nvmet_fabrics_ops *ops)
301  {
302  	down_write(&nvmet_config_sem);
303  	nvmet_transports[ops->type] = NULL;
304  	up_write(&nvmet_config_sem);
305  }
306  EXPORT_SYMBOL_GPL(nvmet_unregister_transport);
307  
nvmet_port_del_ctrls(struct nvmet_port * port,struct nvmet_subsys * subsys)308  void nvmet_port_del_ctrls(struct nvmet_port *port, struct nvmet_subsys *subsys)
309  {
310  	struct nvmet_ctrl *ctrl;
311  
312  	mutex_lock(&subsys->lock);
313  	list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
314  		if (ctrl->port == port)
315  			ctrl->ops->delete_ctrl(ctrl);
316  	}
317  	mutex_unlock(&subsys->lock);
318  }
319  
nvmet_enable_port(struct nvmet_port * port)320  int nvmet_enable_port(struct nvmet_port *port)
321  {
322  	const struct nvmet_fabrics_ops *ops;
323  	int ret;
324  
325  	lockdep_assert_held(&nvmet_config_sem);
326  
327  	ops = nvmet_transports[port->disc_addr.trtype];
328  	if (!ops) {
329  		up_write(&nvmet_config_sem);
330  		request_module("nvmet-transport-%d", port->disc_addr.trtype);
331  		down_write(&nvmet_config_sem);
332  		ops = nvmet_transports[port->disc_addr.trtype];
333  		if (!ops) {
334  			pr_err("transport type %d not supported\n",
335  				port->disc_addr.trtype);
336  			return -EINVAL;
337  		}
338  	}
339  
340  	if (!try_module_get(ops->owner))
341  		return -EINVAL;
342  
343  	/*
344  	 * If the user requested PI support and the transport isn't pi capable,
345  	 * don't enable the port.
346  	 */
347  	if (port->pi_enable && !(ops->flags & NVMF_METADATA_SUPPORTED)) {
348  		pr_err("T10-PI is not supported by transport type %d\n",
349  		       port->disc_addr.trtype);
350  		ret = -EINVAL;
351  		goto out_put;
352  	}
353  
354  	ret = ops->add_port(port);
355  	if (ret)
356  		goto out_put;
357  
358  	/* If the transport didn't set inline_data_size, then disable it. */
359  	if (port->inline_data_size < 0)
360  		port->inline_data_size = 0;
361  
362  	/*
363  	 * If the transport didn't set the max_queue_size properly, then clamp
364  	 * it to the target limits. Also set default values in case the
365  	 * transport didn't set it at all.
366  	 */
367  	if (port->max_queue_size < 0)
368  		port->max_queue_size = NVMET_MAX_QUEUE_SIZE;
369  	else
370  		port->max_queue_size = clamp_t(int, port->max_queue_size,
371  					       NVMET_MIN_QUEUE_SIZE,
372  					       NVMET_MAX_QUEUE_SIZE);
373  
374  	port->enabled = true;
375  	port->tr_ops = ops;
376  	return 0;
377  
378  out_put:
379  	module_put(ops->owner);
380  	return ret;
381  }
382  
nvmet_disable_port(struct nvmet_port * port)383  void nvmet_disable_port(struct nvmet_port *port)
384  {
385  	const struct nvmet_fabrics_ops *ops;
386  
387  	lockdep_assert_held(&nvmet_config_sem);
388  
389  	port->enabled = false;
390  	port->tr_ops = NULL;
391  
392  	ops = nvmet_transports[port->disc_addr.trtype];
393  	ops->remove_port(port);
394  	module_put(ops->owner);
395  }
396  
nvmet_keep_alive_timer(struct work_struct * work)397  static void nvmet_keep_alive_timer(struct work_struct *work)
398  {
399  	struct nvmet_ctrl *ctrl = container_of(to_delayed_work(work),
400  			struct nvmet_ctrl, ka_work);
401  	bool reset_tbkas = ctrl->reset_tbkas;
402  
403  	ctrl->reset_tbkas = false;
404  	if (reset_tbkas) {
405  		pr_debug("ctrl %d reschedule traffic based keep-alive timer\n",
406  			ctrl->cntlid);
407  		queue_delayed_work(nvmet_wq, &ctrl->ka_work, ctrl->kato * HZ);
408  		return;
409  	}
410  
411  	pr_err("ctrl %d keep-alive timer (%d seconds) expired!\n",
412  		ctrl->cntlid, ctrl->kato);
413  
414  	nvmet_ctrl_fatal_error(ctrl);
415  }
416  
nvmet_start_keep_alive_timer(struct nvmet_ctrl * ctrl)417  void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl)
418  {
419  	if (unlikely(ctrl->kato == 0))
420  		return;
421  
422  	pr_debug("ctrl %d start keep-alive timer for %d secs\n",
423  		ctrl->cntlid, ctrl->kato);
424  
425  	queue_delayed_work(nvmet_wq, &ctrl->ka_work, ctrl->kato * HZ);
426  }
427  
nvmet_stop_keep_alive_timer(struct nvmet_ctrl * ctrl)428  void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl)
429  {
430  	if (unlikely(ctrl->kato == 0))
431  		return;
432  
433  	pr_debug("ctrl %d stop keep-alive\n", ctrl->cntlid);
434  
435  	cancel_delayed_work_sync(&ctrl->ka_work);
436  }
437  
nvmet_req_find_ns(struct nvmet_req * req)438  u16 nvmet_req_find_ns(struct nvmet_req *req)
439  {
440  	u32 nsid = le32_to_cpu(req->cmd->common.nsid);
441  	struct nvmet_subsys *subsys = nvmet_req_subsys(req);
442  
443  	req->ns = xa_load(&subsys->namespaces, nsid);
444  	if (unlikely(!req->ns)) {
445  		req->error_loc = offsetof(struct nvme_common_command, nsid);
446  		if (nvmet_subsys_nsid_exists(subsys, nsid))
447  			return NVME_SC_INTERNAL_PATH_ERROR;
448  		return NVME_SC_INVALID_NS | NVME_STATUS_DNR;
449  	}
450  
451  	percpu_ref_get(&req->ns->ref);
452  	return NVME_SC_SUCCESS;
453  }
454  
nvmet_destroy_namespace(struct percpu_ref * ref)455  static void nvmet_destroy_namespace(struct percpu_ref *ref)
456  {
457  	struct nvmet_ns *ns = container_of(ref, struct nvmet_ns, ref);
458  
459  	complete(&ns->disable_done);
460  }
461  
nvmet_put_namespace(struct nvmet_ns * ns)462  void nvmet_put_namespace(struct nvmet_ns *ns)
463  {
464  	percpu_ref_put(&ns->ref);
465  }
466  
nvmet_ns_dev_disable(struct nvmet_ns * ns)467  static void nvmet_ns_dev_disable(struct nvmet_ns *ns)
468  {
469  	nvmet_bdev_ns_disable(ns);
470  	nvmet_file_ns_disable(ns);
471  }
472  
nvmet_p2pmem_ns_enable(struct nvmet_ns * ns)473  static int nvmet_p2pmem_ns_enable(struct nvmet_ns *ns)
474  {
475  	int ret;
476  	struct pci_dev *p2p_dev;
477  
478  	if (!ns->use_p2pmem)
479  		return 0;
480  
481  	if (!ns->bdev) {
482  		pr_err("peer-to-peer DMA is not supported by non-block device namespaces\n");
483  		return -EINVAL;
484  	}
485  
486  	if (!blk_queue_pci_p2pdma(ns->bdev->bd_disk->queue)) {
487  		pr_err("peer-to-peer DMA is not supported by the driver of %s\n",
488  		       ns->device_path);
489  		return -EINVAL;
490  	}
491  
492  	if (ns->p2p_dev) {
493  		ret = pci_p2pdma_distance(ns->p2p_dev, nvmet_ns_dev(ns), true);
494  		if (ret < 0)
495  			return -EINVAL;
496  	} else {
497  		/*
498  		 * Right now we just check that there is p2pmem available so
499  		 * we can report an error to the user right away if there
500  		 * is not. We'll find the actual device to use once we
501  		 * setup the controller when the port's device is available.
502  		 */
503  
504  		p2p_dev = pci_p2pmem_find(nvmet_ns_dev(ns));
505  		if (!p2p_dev) {
506  			pr_err("no peer-to-peer memory is available for %s\n",
507  			       ns->device_path);
508  			return -EINVAL;
509  		}
510  
511  		pci_dev_put(p2p_dev);
512  	}
513  
514  	return 0;
515  }
516  
517  /*
518   * Note: ctrl->subsys->lock should be held when calling this function
519   */
nvmet_p2pmem_ns_add_p2p(struct nvmet_ctrl * ctrl,struct nvmet_ns * ns)520  static void nvmet_p2pmem_ns_add_p2p(struct nvmet_ctrl *ctrl,
521  				    struct nvmet_ns *ns)
522  {
523  	struct device *clients[2];
524  	struct pci_dev *p2p_dev;
525  	int ret;
526  
527  	if (!ctrl->p2p_client || !ns->use_p2pmem)
528  		return;
529  
530  	if (ns->p2p_dev) {
531  		ret = pci_p2pdma_distance(ns->p2p_dev, ctrl->p2p_client, true);
532  		if (ret < 0)
533  			return;
534  
535  		p2p_dev = pci_dev_get(ns->p2p_dev);
536  	} else {
537  		clients[0] = ctrl->p2p_client;
538  		clients[1] = nvmet_ns_dev(ns);
539  
540  		p2p_dev = pci_p2pmem_find_many(clients, ARRAY_SIZE(clients));
541  		if (!p2p_dev) {
542  			pr_err("no peer-to-peer memory is available that's supported by %s and %s\n",
543  			       dev_name(ctrl->p2p_client), ns->device_path);
544  			return;
545  		}
546  	}
547  
548  	ret = radix_tree_insert(&ctrl->p2p_ns_map, ns->nsid, p2p_dev);
549  	if (ret < 0)
550  		pci_dev_put(p2p_dev);
551  
552  	pr_info("using p2pmem on %s for nsid %d\n", pci_name(p2p_dev),
553  		ns->nsid);
554  }
555  
nvmet_ns_revalidate(struct nvmet_ns * ns)556  bool nvmet_ns_revalidate(struct nvmet_ns *ns)
557  {
558  	loff_t oldsize = ns->size;
559  
560  	if (ns->bdev)
561  		nvmet_bdev_ns_revalidate(ns);
562  	else
563  		nvmet_file_ns_revalidate(ns);
564  
565  	return oldsize != ns->size;
566  }
567  
nvmet_ns_enable(struct nvmet_ns * ns)568  int nvmet_ns_enable(struct nvmet_ns *ns)
569  {
570  	struct nvmet_subsys *subsys = ns->subsys;
571  	struct nvmet_ctrl *ctrl;
572  	int ret;
573  
574  	mutex_lock(&subsys->lock);
575  	ret = 0;
576  
577  	if (nvmet_is_passthru_subsys(subsys)) {
578  		pr_info("cannot enable both passthru and regular namespaces for a single subsystem");
579  		goto out_unlock;
580  	}
581  
582  	if (ns->enabled)
583  		goto out_unlock;
584  
585  	ret = -EMFILE;
586  	if (subsys->nr_namespaces == NVMET_MAX_NAMESPACES)
587  		goto out_unlock;
588  
589  	ret = nvmet_bdev_ns_enable(ns);
590  	if (ret == -ENOTBLK)
591  		ret = nvmet_file_ns_enable(ns);
592  	if (ret)
593  		goto out_unlock;
594  
595  	ret = nvmet_p2pmem_ns_enable(ns);
596  	if (ret)
597  		goto out_dev_disable;
598  
599  	list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
600  		nvmet_p2pmem_ns_add_p2p(ctrl, ns);
601  
602  	ret = percpu_ref_init(&ns->ref, nvmet_destroy_namespace,
603  				0, GFP_KERNEL);
604  	if (ret)
605  		goto out_dev_put;
606  
607  	if (ns->nsid > subsys->max_nsid)
608  		subsys->max_nsid = ns->nsid;
609  
610  	ret = xa_insert(&subsys->namespaces, ns->nsid, ns, GFP_KERNEL);
611  	if (ret)
612  		goto out_restore_subsys_maxnsid;
613  
614  	subsys->nr_namespaces++;
615  
616  	nvmet_ns_changed(subsys, ns->nsid);
617  	ns->enabled = true;
618  	ret = 0;
619  out_unlock:
620  	mutex_unlock(&subsys->lock);
621  	return ret;
622  
623  out_restore_subsys_maxnsid:
624  	subsys->max_nsid = nvmet_max_nsid(subsys);
625  	percpu_ref_exit(&ns->ref);
626  out_dev_put:
627  	list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
628  		pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid));
629  out_dev_disable:
630  	nvmet_ns_dev_disable(ns);
631  	goto out_unlock;
632  }
633  
nvmet_ns_disable(struct nvmet_ns * ns)634  void nvmet_ns_disable(struct nvmet_ns *ns)
635  {
636  	struct nvmet_subsys *subsys = ns->subsys;
637  	struct nvmet_ctrl *ctrl;
638  
639  	mutex_lock(&subsys->lock);
640  	if (!ns->enabled)
641  		goto out_unlock;
642  
643  	ns->enabled = false;
644  	xa_erase(&ns->subsys->namespaces, ns->nsid);
645  	if (ns->nsid == subsys->max_nsid)
646  		subsys->max_nsid = nvmet_max_nsid(subsys);
647  
648  	list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
649  		pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid));
650  
651  	mutex_unlock(&subsys->lock);
652  
653  	/*
654  	 * Now that we removed the namespaces from the lookup list, we
655  	 * can kill the per_cpu ref and wait for any remaining references
656  	 * to be dropped, as well as a RCU grace period for anyone only
657  	 * using the namepace under rcu_read_lock().  Note that we can't
658  	 * use call_rcu here as we need to ensure the namespaces have
659  	 * been fully destroyed before unloading the module.
660  	 */
661  	percpu_ref_kill(&ns->ref);
662  	synchronize_rcu();
663  	wait_for_completion(&ns->disable_done);
664  	percpu_ref_exit(&ns->ref);
665  
666  	mutex_lock(&subsys->lock);
667  
668  	subsys->nr_namespaces--;
669  	nvmet_ns_changed(subsys, ns->nsid);
670  	nvmet_ns_dev_disable(ns);
671  out_unlock:
672  	mutex_unlock(&subsys->lock);
673  }
674  
nvmet_ns_free(struct nvmet_ns * ns)675  void nvmet_ns_free(struct nvmet_ns *ns)
676  {
677  	nvmet_ns_disable(ns);
678  
679  	down_write(&nvmet_ana_sem);
680  	nvmet_ana_group_enabled[ns->anagrpid]--;
681  	up_write(&nvmet_ana_sem);
682  
683  	kfree(ns->device_path);
684  	kfree(ns);
685  }
686  
nvmet_ns_alloc(struct nvmet_subsys * subsys,u32 nsid)687  struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid)
688  {
689  	struct nvmet_ns *ns;
690  
691  	ns = kzalloc(sizeof(*ns), GFP_KERNEL);
692  	if (!ns)
693  		return NULL;
694  
695  	init_completion(&ns->disable_done);
696  
697  	ns->nsid = nsid;
698  	ns->subsys = subsys;
699  
700  	down_write(&nvmet_ana_sem);
701  	ns->anagrpid = NVMET_DEFAULT_ANA_GRPID;
702  	nvmet_ana_group_enabled[ns->anagrpid]++;
703  	up_write(&nvmet_ana_sem);
704  
705  	uuid_gen(&ns->uuid);
706  	ns->buffered_io = false;
707  	ns->csi = NVME_CSI_NVM;
708  
709  	return ns;
710  }
711  
nvmet_update_sq_head(struct nvmet_req * req)712  static void nvmet_update_sq_head(struct nvmet_req *req)
713  {
714  	if (req->sq->size) {
715  		u32 old_sqhd, new_sqhd;
716  
717  		old_sqhd = READ_ONCE(req->sq->sqhd);
718  		do {
719  			new_sqhd = (old_sqhd + 1) % req->sq->size;
720  		} while (!try_cmpxchg(&req->sq->sqhd, &old_sqhd, new_sqhd));
721  	}
722  	req->cqe->sq_head = cpu_to_le16(req->sq->sqhd & 0x0000FFFF);
723  }
724  
nvmet_set_error(struct nvmet_req * req,u16 status)725  static void nvmet_set_error(struct nvmet_req *req, u16 status)
726  {
727  	struct nvmet_ctrl *ctrl = req->sq->ctrl;
728  	struct nvme_error_slot *new_error_slot;
729  	unsigned long flags;
730  
731  	req->cqe->status = cpu_to_le16(status << 1);
732  
733  	if (!ctrl || req->error_loc == NVMET_NO_ERROR_LOC)
734  		return;
735  
736  	spin_lock_irqsave(&ctrl->error_lock, flags);
737  	ctrl->err_counter++;
738  	new_error_slot =
739  		&ctrl->slots[ctrl->err_counter % NVMET_ERROR_LOG_SLOTS];
740  
741  	new_error_slot->error_count = cpu_to_le64(ctrl->err_counter);
742  	new_error_slot->sqid = cpu_to_le16(req->sq->qid);
743  	new_error_slot->cmdid = cpu_to_le16(req->cmd->common.command_id);
744  	new_error_slot->status_field = cpu_to_le16(status << 1);
745  	new_error_slot->param_error_location = cpu_to_le16(req->error_loc);
746  	new_error_slot->lba = cpu_to_le64(req->error_slba);
747  	new_error_slot->nsid = req->cmd->common.nsid;
748  	spin_unlock_irqrestore(&ctrl->error_lock, flags);
749  
750  	/* set the more bit for this request */
751  	req->cqe->status |= cpu_to_le16(1 << 14);
752  }
753  
__nvmet_req_complete(struct nvmet_req * req,u16 status)754  static void __nvmet_req_complete(struct nvmet_req *req, u16 status)
755  {
756  	struct nvmet_ns *ns = req->ns;
757  
758  	if (!req->sq->sqhd_disabled)
759  		nvmet_update_sq_head(req);
760  	req->cqe->sq_id = cpu_to_le16(req->sq->qid);
761  	req->cqe->command_id = req->cmd->common.command_id;
762  
763  	if (unlikely(status))
764  		nvmet_set_error(req, status);
765  
766  	trace_nvmet_req_complete(req);
767  
768  	req->ops->queue_response(req);
769  	if (ns)
770  		nvmet_put_namespace(ns);
771  }
772  
nvmet_req_complete(struct nvmet_req * req,u16 status)773  void nvmet_req_complete(struct nvmet_req *req, u16 status)
774  {
775  	struct nvmet_sq *sq = req->sq;
776  
777  	__nvmet_req_complete(req, status);
778  	percpu_ref_put(&sq->ref);
779  }
780  EXPORT_SYMBOL_GPL(nvmet_req_complete);
781  
nvmet_cq_setup(struct nvmet_ctrl * ctrl,struct nvmet_cq * cq,u16 qid,u16 size)782  void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq,
783  		u16 qid, u16 size)
784  {
785  	cq->qid = qid;
786  	cq->size = size;
787  }
788  
nvmet_sq_setup(struct nvmet_ctrl * ctrl,struct nvmet_sq * sq,u16 qid,u16 size)789  void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq,
790  		u16 qid, u16 size)
791  {
792  	sq->sqhd = 0;
793  	sq->qid = qid;
794  	sq->size = size;
795  
796  	ctrl->sqs[qid] = sq;
797  }
798  
nvmet_confirm_sq(struct percpu_ref * ref)799  static void nvmet_confirm_sq(struct percpu_ref *ref)
800  {
801  	struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref);
802  
803  	complete(&sq->confirm_done);
804  }
805  
nvmet_sq_destroy(struct nvmet_sq * sq)806  void nvmet_sq_destroy(struct nvmet_sq *sq)
807  {
808  	struct nvmet_ctrl *ctrl = sq->ctrl;
809  
810  	/*
811  	 * If this is the admin queue, complete all AERs so that our
812  	 * queue doesn't have outstanding requests on it.
813  	 */
814  	if (ctrl && ctrl->sqs && ctrl->sqs[0] == sq)
815  		nvmet_async_events_failall(ctrl);
816  	percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq);
817  	wait_for_completion(&sq->confirm_done);
818  	wait_for_completion(&sq->free_done);
819  	percpu_ref_exit(&sq->ref);
820  	nvmet_auth_sq_free(sq);
821  
822  	/*
823  	 * we must reference the ctrl again after waiting for inflight IO
824  	 * to complete. Because admin connect may have sneaked in after we
825  	 * store sq->ctrl locally, but before we killed the percpu_ref. the
826  	 * admin connect allocates and assigns sq->ctrl, which now needs a
827  	 * final ref put, as this ctrl is going away.
828  	 */
829  	ctrl = sq->ctrl;
830  
831  	if (ctrl) {
832  		/*
833  		 * The teardown flow may take some time, and the host may not
834  		 * send us keep-alive during this period, hence reset the
835  		 * traffic based keep-alive timer so we don't trigger a
836  		 * controller teardown as a result of a keep-alive expiration.
837  		 */
838  		ctrl->reset_tbkas = true;
839  		sq->ctrl->sqs[sq->qid] = NULL;
840  		nvmet_ctrl_put(ctrl);
841  		sq->ctrl = NULL; /* allows reusing the queue later */
842  	}
843  }
844  EXPORT_SYMBOL_GPL(nvmet_sq_destroy);
845  
nvmet_sq_free(struct percpu_ref * ref)846  static void nvmet_sq_free(struct percpu_ref *ref)
847  {
848  	struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref);
849  
850  	complete(&sq->free_done);
851  }
852  
nvmet_sq_init(struct nvmet_sq * sq)853  int nvmet_sq_init(struct nvmet_sq *sq)
854  {
855  	int ret;
856  
857  	ret = percpu_ref_init(&sq->ref, nvmet_sq_free, 0, GFP_KERNEL);
858  	if (ret) {
859  		pr_err("percpu_ref init failed!\n");
860  		return ret;
861  	}
862  	init_completion(&sq->free_done);
863  	init_completion(&sq->confirm_done);
864  	nvmet_auth_sq_init(sq);
865  
866  	return 0;
867  }
868  EXPORT_SYMBOL_GPL(nvmet_sq_init);
869  
nvmet_check_ana_state(struct nvmet_port * port,struct nvmet_ns * ns)870  static inline u16 nvmet_check_ana_state(struct nvmet_port *port,
871  		struct nvmet_ns *ns)
872  {
873  	enum nvme_ana_state state = port->ana_state[ns->anagrpid];
874  
875  	if (unlikely(state == NVME_ANA_INACCESSIBLE))
876  		return NVME_SC_ANA_INACCESSIBLE;
877  	if (unlikely(state == NVME_ANA_PERSISTENT_LOSS))
878  		return NVME_SC_ANA_PERSISTENT_LOSS;
879  	if (unlikely(state == NVME_ANA_CHANGE))
880  		return NVME_SC_ANA_TRANSITION;
881  	return 0;
882  }
883  
nvmet_io_cmd_check_access(struct nvmet_req * req)884  static inline u16 nvmet_io_cmd_check_access(struct nvmet_req *req)
885  {
886  	if (unlikely(req->ns->readonly)) {
887  		switch (req->cmd->common.opcode) {
888  		case nvme_cmd_read:
889  		case nvme_cmd_flush:
890  			break;
891  		default:
892  			return NVME_SC_NS_WRITE_PROTECTED;
893  		}
894  	}
895  
896  	return 0;
897  }
898  
nvmet_parse_io_cmd(struct nvmet_req * req)899  static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
900  {
901  	struct nvme_command *cmd = req->cmd;
902  	u16 ret;
903  
904  	if (nvme_is_fabrics(cmd))
905  		return nvmet_parse_fabrics_io_cmd(req);
906  
907  	if (unlikely(!nvmet_check_auth_status(req)))
908  		return NVME_SC_AUTH_REQUIRED | NVME_STATUS_DNR;
909  
910  	ret = nvmet_check_ctrl_status(req);
911  	if (unlikely(ret))
912  		return ret;
913  
914  	if (nvmet_is_passthru_req(req))
915  		return nvmet_parse_passthru_io_cmd(req);
916  
917  	ret = nvmet_req_find_ns(req);
918  	if (unlikely(ret))
919  		return ret;
920  
921  	ret = nvmet_check_ana_state(req->port, req->ns);
922  	if (unlikely(ret)) {
923  		req->error_loc = offsetof(struct nvme_common_command, nsid);
924  		return ret;
925  	}
926  	ret = nvmet_io_cmd_check_access(req);
927  	if (unlikely(ret)) {
928  		req->error_loc = offsetof(struct nvme_common_command, nsid);
929  		return ret;
930  	}
931  
932  	switch (req->ns->csi) {
933  	case NVME_CSI_NVM:
934  		if (req->ns->file)
935  			return nvmet_file_parse_io_cmd(req);
936  		return nvmet_bdev_parse_io_cmd(req);
937  	case NVME_CSI_ZNS:
938  		if (IS_ENABLED(CONFIG_BLK_DEV_ZONED))
939  			return nvmet_bdev_zns_parse_io_cmd(req);
940  		return NVME_SC_INVALID_IO_CMD_SET;
941  	default:
942  		return NVME_SC_INVALID_IO_CMD_SET;
943  	}
944  }
945  
nvmet_req_init(struct nvmet_req * req,struct nvmet_cq * cq,struct nvmet_sq * sq,const struct nvmet_fabrics_ops * ops)946  bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
947  		struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops)
948  {
949  	u8 flags = req->cmd->common.flags;
950  	u16 status;
951  
952  	req->cq = cq;
953  	req->sq = sq;
954  	req->ops = ops;
955  	req->sg = NULL;
956  	req->metadata_sg = NULL;
957  	req->sg_cnt = 0;
958  	req->metadata_sg_cnt = 0;
959  	req->transfer_len = 0;
960  	req->metadata_len = 0;
961  	req->cqe->result.u64 = 0;
962  	req->cqe->status = 0;
963  	req->cqe->sq_head = 0;
964  	req->ns = NULL;
965  	req->error_loc = NVMET_NO_ERROR_LOC;
966  	req->error_slba = 0;
967  
968  	/* no support for fused commands yet */
969  	if (unlikely(flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND))) {
970  		req->error_loc = offsetof(struct nvme_common_command, flags);
971  		status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
972  		goto fail;
973  	}
974  
975  	/*
976  	 * For fabrics, PSDT field shall describe metadata pointer (MPTR) that
977  	 * contains an address of a single contiguous physical buffer that is
978  	 * byte aligned.
979  	 */
980  	if (unlikely((flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METABUF)) {
981  		req->error_loc = offsetof(struct nvme_common_command, flags);
982  		status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
983  		goto fail;
984  	}
985  
986  	if (unlikely(!req->sq->ctrl))
987  		/* will return an error for any non-connect command: */
988  		status = nvmet_parse_connect_cmd(req);
989  	else if (likely(req->sq->qid != 0))
990  		status = nvmet_parse_io_cmd(req);
991  	else
992  		status = nvmet_parse_admin_cmd(req);
993  
994  	if (status)
995  		goto fail;
996  
997  	trace_nvmet_req_init(req, req->cmd);
998  
999  	if (unlikely(!percpu_ref_tryget_live(&sq->ref))) {
1000  		status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
1001  		goto fail;
1002  	}
1003  
1004  	if (sq->ctrl)
1005  		sq->ctrl->reset_tbkas = true;
1006  
1007  	return true;
1008  
1009  fail:
1010  	__nvmet_req_complete(req, status);
1011  	return false;
1012  }
1013  EXPORT_SYMBOL_GPL(nvmet_req_init);
1014  
nvmet_req_uninit(struct nvmet_req * req)1015  void nvmet_req_uninit(struct nvmet_req *req)
1016  {
1017  	percpu_ref_put(&req->sq->ref);
1018  	if (req->ns)
1019  		nvmet_put_namespace(req->ns);
1020  }
1021  EXPORT_SYMBOL_GPL(nvmet_req_uninit);
1022  
nvmet_check_transfer_len(struct nvmet_req * req,size_t len)1023  bool nvmet_check_transfer_len(struct nvmet_req *req, size_t len)
1024  {
1025  	if (unlikely(len != req->transfer_len)) {
1026  		req->error_loc = offsetof(struct nvme_common_command, dptr);
1027  		nvmet_req_complete(req, NVME_SC_SGL_INVALID_DATA | NVME_STATUS_DNR);
1028  		return false;
1029  	}
1030  
1031  	return true;
1032  }
1033  EXPORT_SYMBOL_GPL(nvmet_check_transfer_len);
1034  
nvmet_check_data_len_lte(struct nvmet_req * req,size_t data_len)1035  bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len)
1036  {
1037  	if (unlikely(data_len > req->transfer_len)) {
1038  		req->error_loc = offsetof(struct nvme_common_command, dptr);
1039  		nvmet_req_complete(req, NVME_SC_SGL_INVALID_DATA | NVME_STATUS_DNR);
1040  		return false;
1041  	}
1042  
1043  	return true;
1044  }
1045  
nvmet_data_transfer_len(struct nvmet_req * req)1046  static unsigned int nvmet_data_transfer_len(struct nvmet_req *req)
1047  {
1048  	return req->transfer_len - req->metadata_len;
1049  }
1050  
nvmet_req_alloc_p2pmem_sgls(struct pci_dev * p2p_dev,struct nvmet_req * req)1051  static int nvmet_req_alloc_p2pmem_sgls(struct pci_dev *p2p_dev,
1052  		struct nvmet_req *req)
1053  {
1054  	req->sg = pci_p2pmem_alloc_sgl(p2p_dev, &req->sg_cnt,
1055  			nvmet_data_transfer_len(req));
1056  	if (!req->sg)
1057  		goto out_err;
1058  
1059  	if (req->metadata_len) {
1060  		req->metadata_sg = pci_p2pmem_alloc_sgl(p2p_dev,
1061  				&req->metadata_sg_cnt, req->metadata_len);
1062  		if (!req->metadata_sg)
1063  			goto out_free_sg;
1064  	}
1065  
1066  	req->p2p_dev = p2p_dev;
1067  
1068  	return 0;
1069  out_free_sg:
1070  	pci_p2pmem_free_sgl(req->p2p_dev, req->sg);
1071  out_err:
1072  	return -ENOMEM;
1073  }
1074  
nvmet_req_find_p2p_dev(struct nvmet_req * req)1075  static struct pci_dev *nvmet_req_find_p2p_dev(struct nvmet_req *req)
1076  {
1077  	if (!IS_ENABLED(CONFIG_PCI_P2PDMA) ||
1078  	    !req->sq->ctrl || !req->sq->qid || !req->ns)
1079  		return NULL;
1080  	return radix_tree_lookup(&req->sq->ctrl->p2p_ns_map, req->ns->nsid);
1081  }
1082  
nvmet_req_alloc_sgls(struct nvmet_req * req)1083  int nvmet_req_alloc_sgls(struct nvmet_req *req)
1084  {
1085  	struct pci_dev *p2p_dev = nvmet_req_find_p2p_dev(req);
1086  
1087  	if (p2p_dev && !nvmet_req_alloc_p2pmem_sgls(p2p_dev, req))
1088  		return 0;
1089  
1090  	req->sg = sgl_alloc(nvmet_data_transfer_len(req), GFP_KERNEL,
1091  			    &req->sg_cnt);
1092  	if (unlikely(!req->sg))
1093  		goto out;
1094  
1095  	if (req->metadata_len) {
1096  		req->metadata_sg = sgl_alloc(req->metadata_len, GFP_KERNEL,
1097  					     &req->metadata_sg_cnt);
1098  		if (unlikely(!req->metadata_sg))
1099  			goto out_free;
1100  	}
1101  
1102  	return 0;
1103  out_free:
1104  	sgl_free(req->sg);
1105  out:
1106  	return -ENOMEM;
1107  }
1108  EXPORT_SYMBOL_GPL(nvmet_req_alloc_sgls);
1109  
nvmet_req_free_sgls(struct nvmet_req * req)1110  void nvmet_req_free_sgls(struct nvmet_req *req)
1111  {
1112  	if (req->p2p_dev) {
1113  		pci_p2pmem_free_sgl(req->p2p_dev, req->sg);
1114  		if (req->metadata_sg)
1115  			pci_p2pmem_free_sgl(req->p2p_dev, req->metadata_sg);
1116  		req->p2p_dev = NULL;
1117  	} else {
1118  		sgl_free(req->sg);
1119  		if (req->metadata_sg)
1120  			sgl_free(req->metadata_sg);
1121  	}
1122  
1123  	req->sg = NULL;
1124  	req->metadata_sg = NULL;
1125  	req->sg_cnt = 0;
1126  	req->metadata_sg_cnt = 0;
1127  }
1128  EXPORT_SYMBOL_GPL(nvmet_req_free_sgls);
1129  
nvmet_cc_en(u32 cc)1130  static inline bool nvmet_cc_en(u32 cc)
1131  {
1132  	return (cc >> NVME_CC_EN_SHIFT) & 0x1;
1133  }
1134  
nvmet_cc_css(u32 cc)1135  static inline u8 nvmet_cc_css(u32 cc)
1136  {
1137  	return (cc >> NVME_CC_CSS_SHIFT) & 0x7;
1138  }
1139  
nvmet_cc_mps(u32 cc)1140  static inline u8 nvmet_cc_mps(u32 cc)
1141  {
1142  	return (cc >> NVME_CC_MPS_SHIFT) & 0xf;
1143  }
1144  
nvmet_cc_ams(u32 cc)1145  static inline u8 nvmet_cc_ams(u32 cc)
1146  {
1147  	return (cc >> NVME_CC_AMS_SHIFT) & 0x7;
1148  }
1149  
nvmet_cc_shn(u32 cc)1150  static inline u8 nvmet_cc_shn(u32 cc)
1151  {
1152  	return (cc >> NVME_CC_SHN_SHIFT) & 0x3;
1153  }
1154  
nvmet_cc_iosqes(u32 cc)1155  static inline u8 nvmet_cc_iosqes(u32 cc)
1156  {
1157  	return (cc >> NVME_CC_IOSQES_SHIFT) & 0xf;
1158  }
1159  
nvmet_cc_iocqes(u32 cc)1160  static inline u8 nvmet_cc_iocqes(u32 cc)
1161  {
1162  	return (cc >> NVME_CC_IOCQES_SHIFT) & 0xf;
1163  }
1164  
nvmet_css_supported(u8 cc_css)1165  static inline bool nvmet_css_supported(u8 cc_css)
1166  {
1167  	switch (cc_css << NVME_CC_CSS_SHIFT) {
1168  	case NVME_CC_CSS_NVM:
1169  	case NVME_CC_CSS_CSI:
1170  		return true;
1171  	default:
1172  		return false;
1173  	}
1174  }
1175  
nvmet_start_ctrl(struct nvmet_ctrl * ctrl)1176  static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl)
1177  {
1178  	lockdep_assert_held(&ctrl->lock);
1179  
1180  	/*
1181  	 * Only I/O controllers should verify iosqes,iocqes.
1182  	 * Strictly speaking, the spec says a discovery controller
1183  	 * should verify iosqes,iocqes are zeroed, however that
1184  	 * would break backwards compatibility, so don't enforce it.
1185  	 */
1186  	if (!nvmet_is_disc_subsys(ctrl->subsys) &&
1187  	    (nvmet_cc_iosqes(ctrl->cc) != NVME_NVM_IOSQES ||
1188  	     nvmet_cc_iocqes(ctrl->cc) != NVME_NVM_IOCQES)) {
1189  		ctrl->csts = NVME_CSTS_CFS;
1190  		return;
1191  	}
1192  
1193  	if (nvmet_cc_mps(ctrl->cc) != 0 ||
1194  	    nvmet_cc_ams(ctrl->cc) != 0 ||
1195  	    !nvmet_css_supported(nvmet_cc_css(ctrl->cc))) {
1196  		ctrl->csts = NVME_CSTS_CFS;
1197  		return;
1198  	}
1199  
1200  	ctrl->csts = NVME_CSTS_RDY;
1201  
1202  	/*
1203  	 * Controllers that are not yet enabled should not really enforce the
1204  	 * keep alive timeout, but we still want to track a timeout and cleanup
1205  	 * in case a host died before it enabled the controller.  Hence, simply
1206  	 * reset the keep alive timer when the controller is enabled.
1207  	 */
1208  	if (ctrl->kato)
1209  		mod_delayed_work(nvmet_wq, &ctrl->ka_work, ctrl->kato * HZ);
1210  }
1211  
nvmet_clear_ctrl(struct nvmet_ctrl * ctrl)1212  static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl)
1213  {
1214  	lockdep_assert_held(&ctrl->lock);
1215  
1216  	/* XXX: tear down queues? */
1217  	ctrl->csts &= ~NVME_CSTS_RDY;
1218  	ctrl->cc = 0;
1219  }
1220  
nvmet_update_cc(struct nvmet_ctrl * ctrl,u32 new)1221  void nvmet_update_cc(struct nvmet_ctrl *ctrl, u32 new)
1222  {
1223  	u32 old;
1224  
1225  	mutex_lock(&ctrl->lock);
1226  	old = ctrl->cc;
1227  	ctrl->cc = new;
1228  
1229  	if (nvmet_cc_en(new) && !nvmet_cc_en(old))
1230  		nvmet_start_ctrl(ctrl);
1231  	if (!nvmet_cc_en(new) && nvmet_cc_en(old))
1232  		nvmet_clear_ctrl(ctrl);
1233  	if (nvmet_cc_shn(new) && !nvmet_cc_shn(old)) {
1234  		nvmet_clear_ctrl(ctrl);
1235  		ctrl->csts |= NVME_CSTS_SHST_CMPLT;
1236  	}
1237  	if (!nvmet_cc_shn(new) && nvmet_cc_shn(old))
1238  		ctrl->csts &= ~NVME_CSTS_SHST_CMPLT;
1239  	mutex_unlock(&ctrl->lock);
1240  }
1241  
nvmet_init_cap(struct nvmet_ctrl * ctrl)1242  static void nvmet_init_cap(struct nvmet_ctrl *ctrl)
1243  {
1244  	/* command sets supported: NVMe command set: */
1245  	ctrl->cap = (1ULL << 37);
1246  	/* Controller supports one or more I/O Command Sets */
1247  	ctrl->cap |= (1ULL << 43);
1248  	/* CC.EN timeout in 500msec units: */
1249  	ctrl->cap |= (15ULL << 24);
1250  	/* maximum queue entries supported: */
1251  	if (ctrl->ops->get_max_queue_size)
1252  		ctrl->cap |= min_t(u16, ctrl->ops->get_max_queue_size(ctrl),
1253  				   ctrl->port->max_queue_size) - 1;
1254  	else
1255  		ctrl->cap |= ctrl->port->max_queue_size - 1;
1256  
1257  	if (nvmet_is_passthru_subsys(ctrl->subsys))
1258  		nvmet_passthrough_override_cap(ctrl);
1259  }
1260  
nvmet_ctrl_find_get(const char * subsysnqn,const char * hostnqn,u16 cntlid,struct nvmet_req * req)1261  struct nvmet_ctrl *nvmet_ctrl_find_get(const char *subsysnqn,
1262  				       const char *hostnqn, u16 cntlid,
1263  				       struct nvmet_req *req)
1264  {
1265  	struct nvmet_ctrl *ctrl = NULL;
1266  	struct nvmet_subsys *subsys;
1267  
1268  	subsys = nvmet_find_get_subsys(req->port, subsysnqn);
1269  	if (!subsys) {
1270  		pr_warn("connect request for invalid subsystem %s!\n",
1271  			subsysnqn);
1272  		req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn);
1273  		goto out;
1274  	}
1275  
1276  	mutex_lock(&subsys->lock);
1277  	list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
1278  		if (ctrl->cntlid == cntlid) {
1279  			if (strncmp(hostnqn, ctrl->hostnqn, NVMF_NQN_SIZE)) {
1280  				pr_warn("hostnqn mismatch.\n");
1281  				continue;
1282  			}
1283  			if (!kref_get_unless_zero(&ctrl->ref))
1284  				continue;
1285  
1286  			/* ctrl found */
1287  			goto found;
1288  		}
1289  	}
1290  
1291  	ctrl = NULL; /* ctrl not found */
1292  	pr_warn("could not find controller %d for subsys %s / host %s\n",
1293  		cntlid, subsysnqn, hostnqn);
1294  	req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid);
1295  
1296  found:
1297  	mutex_unlock(&subsys->lock);
1298  	nvmet_subsys_put(subsys);
1299  out:
1300  	return ctrl;
1301  }
1302  
nvmet_check_ctrl_status(struct nvmet_req * req)1303  u16 nvmet_check_ctrl_status(struct nvmet_req *req)
1304  {
1305  	if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) {
1306  		pr_err("got cmd %d while CC.EN == 0 on qid = %d\n",
1307  		       req->cmd->common.opcode, req->sq->qid);
1308  		return NVME_SC_CMD_SEQ_ERROR | NVME_STATUS_DNR;
1309  	}
1310  
1311  	if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) {
1312  		pr_err("got cmd %d while CSTS.RDY == 0 on qid = %d\n",
1313  		       req->cmd->common.opcode, req->sq->qid);
1314  		return NVME_SC_CMD_SEQ_ERROR | NVME_STATUS_DNR;
1315  	}
1316  
1317  	if (unlikely(!nvmet_check_auth_status(req))) {
1318  		pr_warn("qid %d not authenticated\n", req->sq->qid);
1319  		return NVME_SC_AUTH_REQUIRED | NVME_STATUS_DNR;
1320  	}
1321  	return 0;
1322  }
1323  
nvmet_host_allowed(struct nvmet_subsys * subsys,const char * hostnqn)1324  bool nvmet_host_allowed(struct nvmet_subsys *subsys, const char *hostnqn)
1325  {
1326  	struct nvmet_host_link *p;
1327  
1328  	lockdep_assert_held(&nvmet_config_sem);
1329  
1330  	if (subsys->allow_any_host)
1331  		return true;
1332  
1333  	if (nvmet_is_disc_subsys(subsys)) /* allow all access to disc subsys */
1334  		return true;
1335  
1336  	list_for_each_entry(p, &subsys->hosts, entry) {
1337  		if (!strcmp(nvmet_host_name(p->host), hostnqn))
1338  			return true;
1339  	}
1340  
1341  	return false;
1342  }
1343  
1344  /*
1345   * Note: ctrl->subsys->lock should be held when calling this function
1346   */
nvmet_setup_p2p_ns_map(struct nvmet_ctrl * ctrl,struct nvmet_req * req)1347  static void nvmet_setup_p2p_ns_map(struct nvmet_ctrl *ctrl,
1348  		struct nvmet_req *req)
1349  {
1350  	struct nvmet_ns *ns;
1351  	unsigned long idx;
1352  
1353  	if (!req->p2p_client)
1354  		return;
1355  
1356  	ctrl->p2p_client = get_device(req->p2p_client);
1357  
1358  	xa_for_each(&ctrl->subsys->namespaces, idx, ns)
1359  		nvmet_p2pmem_ns_add_p2p(ctrl, ns);
1360  }
1361  
1362  /*
1363   * Note: ctrl->subsys->lock should be held when calling this function
1364   */
nvmet_release_p2p_ns_map(struct nvmet_ctrl * ctrl)1365  static void nvmet_release_p2p_ns_map(struct nvmet_ctrl *ctrl)
1366  {
1367  	struct radix_tree_iter iter;
1368  	void __rcu **slot;
1369  
1370  	radix_tree_for_each_slot(slot, &ctrl->p2p_ns_map, &iter, 0)
1371  		pci_dev_put(radix_tree_deref_slot(slot));
1372  
1373  	put_device(ctrl->p2p_client);
1374  }
1375  
nvmet_fatal_error_handler(struct work_struct * work)1376  static void nvmet_fatal_error_handler(struct work_struct *work)
1377  {
1378  	struct nvmet_ctrl *ctrl =
1379  			container_of(work, struct nvmet_ctrl, fatal_err_work);
1380  
1381  	pr_err("ctrl %d fatal error occurred!\n", ctrl->cntlid);
1382  	ctrl->ops->delete_ctrl(ctrl);
1383  }
1384  
nvmet_alloc_ctrl(const char * subsysnqn,const char * hostnqn,struct nvmet_req * req,u32 kato,struct nvmet_ctrl ** ctrlp)1385  u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
1386  		struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp)
1387  {
1388  	struct nvmet_subsys *subsys;
1389  	struct nvmet_ctrl *ctrl;
1390  	int ret;
1391  	u16 status;
1392  
1393  	status = NVME_SC_CONNECT_INVALID_PARAM | NVME_STATUS_DNR;
1394  	subsys = nvmet_find_get_subsys(req->port, subsysnqn);
1395  	if (!subsys) {
1396  		pr_warn("connect request for invalid subsystem %s!\n",
1397  			subsysnqn);
1398  		req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn);
1399  		req->error_loc = offsetof(struct nvme_common_command, dptr);
1400  		goto out;
1401  	}
1402  
1403  	down_read(&nvmet_config_sem);
1404  	if (!nvmet_host_allowed(subsys, hostnqn)) {
1405  		pr_info("connect by host %s for subsystem %s not allowed\n",
1406  			hostnqn, subsysnqn);
1407  		req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(hostnqn);
1408  		up_read(&nvmet_config_sem);
1409  		status = NVME_SC_CONNECT_INVALID_HOST | NVME_STATUS_DNR;
1410  		req->error_loc = offsetof(struct nvme_common_command, dptr);
1411  		goto out_put_subsystem;
1412  	}
1413  	up_read(&nvmet_config_sem);
1414  
1415  	status = NVME_SC_INTERNAL;
1416  	ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
1417  	if (!ctrl)
1418  		goto out_put_subsystem;
1419  	mutex_init(&ctrl->lock);
1420  
1421  	ctrl->port = req->port;
1422  	ctrl->ops = req->ops;
1423  
1424  #ifdef CONFIG_NVME_TARGET_PASSTHRU
1425  	/* By default, set loop targets to clear IDS by default */
1426  	if (ctrl->port->disc_addr.trtype == NVMF_TRTYPE_LOOP)
1427  		subsys->clear_ids = 1;
1428  #endif
1429  
1430  	INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work);
1431  	INIT_LIST_HEAD(&ctrl->async_events);
1432  	INIT_RADIX_TREE(&ctrl->p2p_ns_map, GFP_KERNEL);
1433  	INIT_WORK(&ctrl->fatal_err_work, nvmet_fatal_error_handler);
1434  	INIT_DELAYED_WORK(&ctrl->ka_work, nvmet_keep_alive_timer);
1435  
1436  	memcpy(ctrl->subsysnqn, subsysnqn, NVMF_NQN_SIZE);
1437  	memcpy(ctrl->hostnqn, hostnqn, NVMF_NQN_SIZE);
1438  
1439  	kref_init(&ctrl->ref);
1440  	ctrl->subsys = subsys;
1441  	ctrl->pi_support = ctrl->port->pi_enable && ctrl->subsys->pi_support;
1442  	nvmet_init_cap(ctrl);
1443  	WRITE_ONCE(ctrl->aen_enabled, NVMET_AEN_CFG_OPTIONAL);
1444  
1445  	ctrl->changed_ns_list = kmalloc_array(NVME_MAX_CHANGED_NAMESPACES,
1446  			sizeof(__le32), GFP_KERNEL);
1447  	if (!ctrl->changed_ns_list)
1448  		goto out_free_ctrl;
1449  
1450  	ctrl->sqs = kcalloc(subsys->max_qid + 1,
1451  			sizeof(struct nvmet_sq *),
1452  			GFP_KERNEL);
1453  	if (!ctrl->sqs)
1454  		goto out_free_changed_ns_list;
1455  
1456  	ret = ida_alloc_range(&cntlid_ida,
1457  			     subsys->cntlid_min, subsys->cntlid_max,
1458  			     GFP_KERNEL);
1459  	if (ret < 0) {
1460  		status = NVME_SC_CONNECT_CTRL_BUSY | NVME_STATUS_DNR;
1461  		goto out_free_sqs;
1462  	}
1463  	ctrl->cntlid = ret;
1464  
1465  	/*
1466  	 * Discovery controllers may use some arbitrary high value
1467  	 * in order to cleanup stale discovery sessions
1468  	 */
1469  	if (nvmet_is_disc_subsys(ctrl->subsys) && !kato)
1470  		kato = NVMET_DISC_KATO_MS;
1471  
1472  	/* keep-alive timeout in seconds */
1473  	ctrl->kato = DIV_ROUND_UP(kato, 1000);
1474  
1475  	ctrl->err_counter = 0;
1476  	spin_lock_init(&ctrl->error_lock);
1477  
1478  	nvmet_start_keep_alive_timer(ctrl);
1479  
1480  	mutex_lock(&subsys->lock);
1481  	list_add_tail(&ctrl->subsys_entry, &subsys->ctrls);
1482  	nvmet_setup_p2p_ns_map(ctrl, req);
1483  	nvmet_debugfs_ctrl_setup(ctrl);
1484  	mutex_unlock(&subsys->lock);
1485  
1486  	*ctrlp = ctrl;
1487  	return 0;
1488  
1489  out_free_sqs:
1490  	kfree(ctrl->sqs);
1491  out_free_changed_ns_list:
1492  	kfree(ctrl->changed_ns_list);
1493  out_free_ctrl:
1494  	kfree(ctrl);
1495  out_put_subsystem:
1496  	nvmet_subsys_put(subsys);
1497  out:
1498  	return status;
1499  }
1500  
nvmet_ctrl_free(struct kref * ref)1501  static void nvmet_ctrl_free(struct kref *ref)
1502  {
1503  	struct nvmet_ctrl *ctrl = container_of(ref, struct nvmet_ctrl, ref);
1504  	struct nvmet_subsys *subsys = ctrl->subsys;
1505  
1506  	mutex_lock(&subsys->lock);
1507  	nvmet_release_p2p_ns_map(ctrl);
1508  	list_del(&ctrl->subsys_entry);
1509  	mutex_unlock(&subsys->lock);
1510  
1511  	nvmet_stop_keep_alive_timer(ctrl);
1512  
1513  	flush_work(&ctrl->async_event_work);
1514  	cancel_work_sync(&ctrl->fatal_err_work);
1515  
1516  	nvmet_destroy_auth(ctrl);
1517  
1518  	nvmet_debugfs_ctrl_free(ctrl);
1519  
1520  	ida_free(&cntlid_ida, ctrl->cntlid);
1521  
1522  	nvmet_async_events_free(ctrl);
1523  	kfree(ctrl->sqs);
1524  	kfree(ctrl->changed_ns_list);
1525  	kfree(ctrl);
1526  
1527  	nvmet_subsys_put(subsys);
1528  }
1529  
nvmet_ctrl_put(struct nvmet_ctrl * ctrl)1530  void nvmet_ctrl_put(struct nvmet_ctrl *ctrl)
1531  {
1532  	kref_put(&ctrl->ref, nvmet_ctrl_free);
1533  }
1534  
nvmet_ctrl_fatal_error(struct nvmet_ctrl * ctrl)1535  void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl)
1536  {
1537  	mutex_lock(&ctrl->lock);
1538  	if (!(ctrl->csts & NVME_CSTS_CFS)) {
1539  		ctrl->csts |= NVME_CSTS_CFS;
1540  		queue_work(nvmet_wq, &ctrl->fatal_err_work);
1541  	}
1542  	mutex_unlock(&ctrl->lock);
1543  }
1544  EXPORT_SYMBOL_GPL(nvmet_ctrl_fatal_error);
1545  
nvmet_ctrl_host_traddr(struct nvmet_ctrl * ctrl,char * traddr,size_t traddr_len)1546  ssize_t nvmet_ctrl_host_traddr(struct nvmet_ctrl *ctrl,
1547  		char *traddr, size_t traddr_len)
1548  {
1549  	if (!ctrl->ops->host_traddr)
1550  		return -EOPNOTSUPP;
1551  	return ctrl->ops->host_traddr(ctrl, traddr, traddr_len);
1552  }
1553  
nvmet_find_get_subsys(struct nvmet_port * port,const char * subsysnqn)1554  static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port,
1555  		const char *subsysnqn)
1556  {
1557  	struct nvmet_subsys_link *p;
1558  
1559  	if (!port)
1560  		return NULL;
1561  
1562  	if (!strcmp(NVME_DISC_SUBSYS_NAME, subsysnqn)) {
1563  		if (!kref_get_unless_zero(&nvmet_disc_subsys->ref))
1564  			return NULL;
1565  		return nvmet_disc_subsys;
1566  	}
1567  
1568  	down_read(&nvmet_config_sem);
1569  	if (!strncmp(nvmet_disc_subsys->subsysnqn, subsysnqn,
1570  				NVMF_NQN_SIZE)) {
1571  		if (kref_get_unless_zero(&nvmet_disc_subsys->ref)) {
1572  			up_read(&nvmet_config_sem);
1573  			return nvmet_disc_subsys;
1574  		}
1575  	}
1576  	list_for_each_entry(p, &port->subsystems, entry) {
1577  		if (!strncmp(p->subsys->subsysnqn, subsysnqn,
1578  				NVMF_NQN_SIZE)) {
1579  			if (!kref_get_unless_zero(&p->subsys->ref))
1580  				break;
1581  			up_read(&nvmet_config_sem);
1582  			return p->subsys;
1583  		}
1584  	}
1585  	up_read(&nvmet_config_sem);
1586  	return NULL;
1587  }
1588  
nvmet_subsys_alloc(const char * subsysnqn,enum nvme_subsys_type type)1589  struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
1590  		enum nvme_subsys_type type)
1591  {
1592  	struct nvmet_subsys *subsys;
1593  	char serial[NVMET_SN_MAX_SIZE / 2];
1594  	int ret;
1595  
1596  	subsys = kzalloc(sizeof(*subsys), GFP_KERNEL);
1597  	if (!subsys)
1598  		return ERR_PTR(-ENOMEM);
1599  
1600  	subsys->ver = NVMET_DEFAULT_VS;
1601  	/* generate a random serial number as our controllers are ephemeral: */
1602  	get_random_bytes(&serial, sizeof(serial));
1603  	bin2hex(subsys->serial, &serial, sizeof(serial));
1604  
1605  	subsys->model_number = kstrdup(NVMET_DEFAULT_CTRL_MODEL, GFP_KERNEL);
1606  	if (!subsys->model_number) {
1607  		ret = -ENOMEM;
1608  		goto free_subsys;
1609  	}
1610  
1611  	subsys->ieee_oui = 0;
1612  
1613  	subsys->firmware_rev = kstrndup(UTS_RELEASE, NVMET_FR_MAX_SIZE, GFP_KERNEL);
1614  	if (!subsys->firmware_rev) {
1615  		ret = -ENOMEM;
1616  		goto free_mn;
1617  	}
1618  
1619  	switch (type) {
1620  	case NVME_NQN_NVME:
1621  		subsys->max_qid = NVMET_NR_QUEUES;
1622  		break;
1623  	case NVME_NQN_DISC:
1624  	case NVME_NQN_CURR:
1625  		subsys->max_qid = 0;
1626  		break;
1627  	default:
1628  		pr_err("%s: Unknown Subsystem type - %d\n", __func__, type);
1629  		ret = -EINVAL;
1630  		goto free_fr;
1631  	}
1632  	subsys->type = type;
1633  	subsys->subsysnqn = kstrndup(subsysnqn, NVMF_NQN_SIZE,
1634  			GFP_KERNEL);
1635  	if (!subsys->subsysnqn) {
1636  		ret = -ENOMEM;
1637  		goto free_fr;
1638  	}
1639  	subsys->cntlid_min = NVME_CNTLID_MIN;
1640  	subsys->cntlid_max = NVME_CNTLID_MAX;
1641  	kref_init(&subsys->ref);
1642  
1643  	mutex_init(&subsys->lock);
1644  	xa_init(&subsys->namespaces);
1645  	INIT_LIST_HEAD(&subsys->ctrls);
1646  	INIT_LIST_HEAD(&subsys->hosts);
1647  
1648  	ret = nvmet_debugfs_subsys_setup(subsys);
1649  	if (ret)
1650  		goto free_subsysnqn;
1651  
1652  	return subsys;
1653  
1654  free_subsysnqn:
1655  	kfree(subsys->subsysnqn);
1656  free_fr:
1657  	kfree(subsys->firmware_rev);
1658  free_mn:
1659  	kfree(subsys->model_number);
1660  free_subsys:
1661  	kfree(subsys);
1662  	return ERR_PTR(ret);
1663  }
1664  
nvmet_subsys_free(struct kref * ref)1665  static void nvmet_subsys_free(struct kref *ref)
1666  {
1667  	struct nvmet_subsys *subsys =
1668  		container_of(ref, struct nvmet_subsys, ref);
1669  
1670  	WARN_ON_ONCE(!xa_empty(&subsys->namespaces));
1671  
1672  	nvmet_debugfs_subsys_free(subsys);
1673  
1674  	xa_destroy(&subsys->namespaces);
1675  	nvmet_passthru_subsys_free(subsys);
1676  
1677  	kfree(subsys->subsysnqn);
1678  	kfree(subsys->model_number);
1679  	kfree(subsys->firmware_rev);
1680  	kfree(subsys);
1681  }
1682  
nvmet_subsys_del_ctrls(struct nvmet_subsys * subsys)1683  void nvmet_subsys_del_ctrls(struct nvmet_subsys *subsys)
1684  {
1685  	struct nvmet_ctrl *ctrl;
1686  
1687  	mutex_lock(&subsys->lock);
1688  	list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
1689  		ctrl->ops->delete_ctrl(ctrl);
1690  	mutex_unlock(&subsys->lock);
1691  }
1692  
nvmet_subsys_put(struct nvmet_subsys * subsys)1693  void nvmet_subsys_put(struct nvmet_subsys *subsys)
1694  {
1695  	kref_put(&subsys->ref, nvmet_subsys_free);
1696  }
1697  
nvmet_init(void)1698  static int __init nvmet_init(void)
1699  {
1700  	int error = -ENOMEM;
1701  
1702  	nvmet_ana_group_enabled[NVMET_DEFAULT_ANA_GRPID] = 1;
1703  
1704  	nvmet_bvec_cache = kmem_cache_create("nvmet-bvec",
1705  			NVMET_MAX_MPOOL_BVEC * sizeof(struct bio_vec), 0,
1706  			SLAB_HWCACHE_ALIGN, NULL);
1707  	if (!nvmet_bvec_cache)
1708  		return -ENOMEM;
1709  
1710  	zbd_wq = alloc_workqueue("nvmet-zbd-wq", WQ_MEM_RECLAIM, 0);
1711  	if (!zbd_wq)
1712  		goto out_destroy_bvec_cache;
1713  
1714  	buffered_io_wq = alloc_workqueue("nvmet-buffered-io-wq",
1715  			WQ_MEM_RECLAIM, 0);
1716  	if (!buffered_io_wq)
1717  		goto out_free_zbd_work_queue;
1718  
1719  	nvmet_wq = alloc_workqueue("nvmet-wq",
1720  			WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
1721  	if (!nvmet_wq)
1722  		goto out_free_buffered_work_queue;
1723  
1724  	error = nvmet_init_discovery();
1725  	if (error)
1726  		goto out_free_nvmet_work_queue;
1727  
1728  	error = nvmet_init_debugfs();
1729  	if (error)
1730  		goto out_exit_discovery;
1731  
1732  	error = nvmet_init_configfs();
1733  	if (error)
1734  		goto out_exit_debugfs;
1735  
1736  	return 0;
1737  
1738  out_exit_debugfs:
1739  	nvmet_exit_debugfs();
1740  out_exit_discovery:
1741  	nvmet_exit_discovery();
1742  out_free_nvmet_work_queue:
1743  	destroy_workqueue(nvmet_wq);
1744  out_free_buffered_work_queue:
1745  	destroy_workqueue(buffered_io_wq);
1746  out_free_zbd_work_queue:
1747  	destroy_workqueue(zbd_wq);
1748  out_destroy_bvec_cache:
1749  	kmem_cache_destroy(nvmet_bvec_cache);
1750  	return error;
1751  }
1752  
nvmet_exit(void)1753  static void __exit nvmet_exit(void)
1754  {
1755  	nvmet_exit_configfs();
1756  	nvmet_exit_debugfs();
1757  	nvmet_exit_discovery();
1758  	ida_destroy(&cntlid_ida);
1759  	destroy_workqueue(nvmet_wq);
1760  	destroy_workqueue(buffered_io_wq);
1761  	destroy_workqueue(zbd_wq);
1762  	kmem_cache_destroy(nvmet_bvec_cache);
1763  
1764  	BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_entry) != 1024);
1765  	BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_hdr) != 1024);
1766  }
1767  
1768  module_init(nvmet_init);
1769  module_exit(nvmet_exit);
1770  
1771  MODULE_DESCRIPTION("NVMe target core framework");
1772  MODULE_LICENSE("GPL v2");
1773