Lines Matching +full:dout +full:- +full:default +full:- +full:2
3 rbd.c -- Export ceph rados objects as a Linux block device
27 Documentation/ABI/testing/sysfs-bus-rbd
43 #include <linux/blk-mq.h>
58 * -EINVAL without updating it.
70 return -EINVAL; in atomic_inc_return_safe()
73 /* Decrement the counter. Return the resulting value, or -EINVAL */
84 return -EINVAL; in atomic_dec_return_safe()
96 (NAME_MAX - (sizeof (RBD_SNAP_DEV_NAME_PREFIX) - 1))
100 #define RBD_SNAP_HEAD_NAME "-"
105 #define RBD_IMAGE_NAME_LEN_MAX (PAGE_SIZE - sizeof (__le32) - 1)
117 #define RBD_FEATURE_EXCLUSIVE_LOCK (1ULL<<2)
144 * block device image metadata (in-memory version)
170 * user-mapped image, the names are supplied and the id's associated
175 * non-null if the image it represents is a child in a layered
190 const char *pool_ns; /* NULL if default, never "" */
233 #define RBD_OBJ_FLAG_COPYUP_ZEROS (1U << 2)
252 * . v v (deep-copyup .
352 list_for_each_entry(oreq, &(ireq)->object_extents, ex.oe_item)
354 list_for_each_entry_safe(oreq, n, &(ireq)->object_extents, ex.oe_item)
388 u32 image_format; /* Either 1 or 2 */
455 * Flag bits for rbd_dev->flags:
456 * - REMOVING (which is coupled with rbd_dev->open_count) is protected
457 * by rbd_dev->lock
462 RBD_DEV_FLAG_READONLY, /* -o ro or snapshot */
473 /* Slab caches for frequently-allocated structures */
488 * single-major requires >= 0.75 version of userspace rbd utility.
492 MODULE_PARM_DESC(single_major, "Use a single major number for all rbd devices (default: true)");
515 return test_bit(RBD_DEV_FLAG_READONLY, &rbd_dev->flags); in rbd_is_ro()
520 return rbd_dev->spec->snap_id != CEPH_NOSNAP; in rbd_is_snap()
525 lockdep_assert_held(&rbd_dev->lock_rwsem); in __rbd_is_lock_owner()
527 return rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED || in __rbd_is_lock_owner()
528 rbd_dev->lock_state == RBD_LOCK_STATE_QUIESCING; in __rbd_is_lock_owner()
535 down_read(&rbd_dev->lock_rwsem); in rbd_is_lock_owner()
537 up_read(&rbd_dev->lock_rwsem); in rbd_is_lock_owner()
569 return attr->mode; in rbd_bus_is_visible()
592 static __printf(2, 3)
604 else if (rbd_dev->disk) in rbd_warn()
606 RBD_DRV_NAME, rbd_dev->disk->disk_name, &vaf); in rbd_warn()
607 else if (rbd_dev->spec && rbd_dev->spec->image_name) in rbd_warn()
609 RBD_DRV_NAME, rbd_dev->spec->image_name, &vaf); in rbd_warn()
610 else if (rbd_dev->spec && rbd_dev->spec->image_id) in rbd_warn()
612 RBD_DRV_NAME, rbd_dev->spec->image_id, &vaf); in rbd_warn()
651 rbd_assert(pending->num_pending > 0); in pending_result_dec()
653 if (*result && !pending->result) in pending_result_dec()
654 pending->result = *result; in pending_result_dec()
655 if (--pending->num_pending) in pending_result_dec()
658 *result = pending->result; in pending_result_dec()
664 struct rbd_device *rbd_dev = disk->private_data; in rbd_open()
667 spin_lock_irq(&rbd_dev->lock); in rbd_open()
668 if (test_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags)) in rbd_open()
671 rbd_dev->open_count++; in rbd_open()
672 spin_unlock_irq(&rbd_dev->lock); in rbd_open()
674 return -ENOENT; in rbd_open()
676 (void) get_device(&rbd_dev->dev); in rbd_open()
683 struct rbd_device *rbd_dev = disk->private_data; in rbd_release()
686 spin_lock_irq(&rbd_dev->lock); in rbd_release()
687 open_count_before = rbd_dev->open_count--; in rbd_release()
688 spin_unlock_irq(&rbd_dev->lock); in rbd_release()
691 put_device(&rbd_dev->dev); in rbd_release()
707 int ret = -ENOMEM; in rbd_client_create()
709 dout("%s:\n", __func__); in rbd_client_create()
714 kref_init(&rbdc->kref); in rbd_client_create()
715 INIT_LIST_HEAD(&rbdc->node); in rbd_client_create()
717 rbdc->client = ceph_create_client(ceph_opts, rbdc); in rbd_client_create()
718 if (IS_ERR(rbdc->client)) in rbd_client_create()
720 ceph_opts = NULL; /* Now rbdc->client is responsible for ceph_opts */ in rbd_client_create()
722 ret = ceph_open_session(rbdc->client); in rbd_client_create()
727 list_add_tail(&rbdc->node, &rbd_client_list); in rbd_client_create()
730 dout("%s: rbdc %p\n", __func__, rbdc); in rbd_client_create()
734 ceph_destroy_client(rbdc->client); in rbd_client_create()
740 dout("%s: error %d\n", __func__, ret); in rbd_client_create()
747 kref_get(&rbdc->kref); in __rbd_get_client()
760 if (ceph_opts->flags & CEPH_OPT_NOSHARE) in rbd_client_find()
765 if (!ceph_compare_options(ceph_opts, iter->client)) { in rbd_client_find()
862 default: in obj_op_name()
876 dout("%s: rbdc %p\n", __func__, rbdc); in rbd_client_release()
878 list_del(&rbdc->node); in rbd_client_release()
881 ceph_destroy_client(rbdc->client); in rbd_client_release()
892 kref_put(&rbdc->kref, rbd_client_release); in rbd_put_client()
911 * Using an existing client. Make sure ->pg_pools is up to in rbd_get_client()
914 ret = ceph_wait_for_latest_osdmap(rbdc->client, in rbd_get_client()
915 rbdc->client->options->mount_timeout); in rbd_get_client()
931 return image_format == 1 || image_format == 2; in rbd_image_format_valid()
940 if (memcmp(&ondisk->text, RBD_HEADER_TEXT, sizeof (RBD_HEADER_TEXT))) in rbd_dev_ondisk_valid()
943 /* The bio layer requires at least sector-sized I/O */ in rbd_dev_ondisk_valid()
945 if (ondisk->options.order < SECTOR_SHIFT) in rbd_dev_ondisk_valid()
950 if (ondisk->options.order > 8 * sizeof (int) - 1) in rbd_dev_ondisk_valid()
957 snap_count = le32_to_cpu(ondisk->snap_count); in rbd_dev_ondisk_valid()
958 size = SIZE_MAX - sizeof (struct ceph_snap_context); in rbd_dev_ondisk_valid()
966 size -= snap_count * sizeof (__le64); in rbd_dev_ondisk_valid()
967 if ((u64) size < le64_to_cpu(ondisk->snap_names_len)) in rbd_dev_ondisk_valid()
978 return 1U << header->obj_order; in rbd_obj_bytes()
983 if (rbd_dev->header.stripe_unit == 0 || in rbd_init_layout()
984 rbd_dev->header.stripe_count == 0) { in rbd_init_layout()
985 rbd_dev->header.stripe_unit = rbd_obj_bytes(&rbd_dev->header); in rbd_init_layout()
986 rbd_dev->header.stripe_count = 1; in rbd_init_layout()
989 rbd_dev->layout.stripe_unit = rbd_dev->header.stripe_unit; in rbd_init_layout()
990 rbd_dev->layout.stripe_count = rbd_dev->header.stripe_count; in rbd_init_layout()
991 rbd_dev->layout.object_size = rbd_obj_bytes(&rbd_dev->header); in rbd_init_layout()
992 rbd_dev->layout.pool_id = rbd_dev->header.data_pool_id == CEPH_NOPOOL ? in rbd_init_layout()
993 rbd_dev->spec->pool_id : rbd_dev->header.data_pool_id; in rbd_init_layout()
994 RCU_INIT_POINTER(rbd_dev->layout.pool_ns, NULL); in rbd_init_layout()
999 kfree(header->object_prefix); in rbd_image_header_cleanup()
1000 ceph_put_snap_context(header->snapc); in rbd_image_header_cleanup()
1001 kfree(header->snap_sizes); in rbd_image_header_cleanup()
1002 kfree(header->snap_names); in rbd_image_header_cleanup()
1009 * on-disk header.
1020 int ret = -ENOMEM; in rbd_header_from_disk()
1026 object_prefix = kstrndup(ondisk->object_prefix, in rbd_header_from_disk()
1027 sizeof(ondisk->object_prefix), in rbd_header_from_disk()
1030 return -ENOMEM; in rbd_header_from_disk()
1035 snap_count = le32_to_cpu(ondisk->snap_count); in rbd_header_from_disk()
1039 snapc->seq = le64_to_cpu(ondisk->snap_seq); in rbd_header_from_disk()
1042 u64 snap_names_len = le64_to_cpu(ondisk->snap_names_len); in rbd_header_from_disk()
1054 sizeof(*header->snap_sizes), in rbd_header_from_disk()
1068 memcpy(snap_names, &ondisk->snaps[snap_count], snap_names_len); in rbd_header_from_disk()
1069 snaps = ondisk->snaps; in rbd_header_from_disk()
1071 snapc->snaps[i] = le64_to_cpu(snaps[i].id); in rbd_header_from_disk()
1079 header->object_prefix = object_prefix; in rbd_header_from_disk()
1080 header->obj_order = ondisk->options.order; in rbd_header_from_disk()
1085 header->image_size = le64_to_cpu(ondisk->image_size); in rbd_header_from_disk()
1086 header->snapc = snapc; in rbd_header_from_disk()
1087 header->snap_names = snap_names; in rbd_header_from_disk()
1088 header->snap_sizes = snap_sizes; in rbd_header_from_disk()
1092 ret = -EIO; in rbd_header_from_disk()
1106 rbd_assert(which < rbd_dev->header.snapc->num_snaps); in _rbd_dev_v1_snap_name()
1110 snap_name = rbd_dev->header.snap_names; in _rbd_dev_v1_snap_name()
1111 while (which--) in _rbd_dev_v1_snap_name()
1128 return snap_id1 == snap_id2 ? 0 : -1; in snapid_compare_reverse()
1143 struct ceph_snap_context *snapc = rbd_dev->header.snapc; in rbd_dev_snap_index()
1146 found = bsearch(&snap_id, &snapc->snaps, snapc->num_snaps, in rbd_dev_snap_index()
1149 return found ? (u32)(found - &snapc->snaps[0]) : BAD_SNAP_INDEX; in rbd_dev_snap_index()
1160 return ERR_PTR(-ENOENT); in rbd_dev_v1_snap_name()
1163 return snap_name ? snap_name : ERR_PTR(-ENOMEM); in rbd_dev_v1_snap_name()
1171 rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); in rbd_snap_name()
1172 if (rbd_dev->image_format == 1) in rbd_snap_name()
1181 rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); in rbd_snap_size()
1183 *snap_size = rbd_dev->header.image_size; in rbd_snap_size()
1184 } else if (rbd_dev->image_format == 1) { in rbd_snap_size()
1189 return -ENOENT; in rbd_snap_size()
1191 *snap_size = rbd_dev->header.snap_sizes[which]; in rbd_snap_size()
1207 u64 snap_id = rbd_dev->spec->snap_id; in rbd_dev_mapping_set()
1215 rbd_dev->mapping.size = size; in rbd_dev_mapping_set()
1221 rbd_dev->mapping.size = 0; in rbd_dev_mapping_clear()
1253 dout("%s %p data buf %u~%u\n", __func__, obj_req, off, bytes); in rbd_obj_zero_range()
1255 switch (obj_req->img_request->data_type) { in rbd_obj_zero_range()
1257 zero_bios(&obj_req->bio_pos, off, bytes); in rbd_obj_zero_range()
1261 zero_bvecs(&obj_req->bvec_pos, off, bytes); in rbd_obj_zero_range()
1263 default: in rbd_obj_zero_range()
1272 dout("%s: obj %p (was %d)\n", __func__, obj_request, in rbd_obj_request_put()
1273 kref_read(&obj_request->kref)); in rbd_obj_request_put()
1274 kref_put(&obj_request->kref, rbd_obj_request_destroy); in rbd_obj_request_put()
1280 rbd_assert(obj_request->img_request == NULL); in rbd_img_obj_request_add()
1283 obj_request->img_request = img_request; in rbd_img_obj_request_add()
1284 dout("%s: img %p obj %p\n", __func__, img_request, obj_request); in rbd_img_obj_request_add()
1290 dout("%s: img %p obj %p\n", __func__, img_request, obj_request); in rbd_img_obj_request_del()
1291 list_del(&obj_request->ex.oe_item); in rbd_img_obj_request_del()
1292 rbd_assert(obj_request->img_request == img_request); in rbd_img_obj_request_del()
1298 struct rbd_obj_request *obj_req = osd_req->r_priv; in rbd_osd_submit()
1300 dout("%s osd_req %p for obj_req %p objno %llu %llu~%llu\n", in rbd_osd_submit()
1301 __func__, osd_req, obj_req, obj_req->ex.oe_objno, in rbd_osd_submit()
1302 obj_req->ex.oe_off, obj_req->ex.oe_len); in rbd_osd_submit()
1303 ceph_osdc_start_request(osd_req->r_osdc, osd_req); in rbd_osd_submit()
1307 * The default/initial value for all image request flags is 0. Each
1313 set_bit(IMG_REQ_LAYERED, &img_request->flags); in img_request_layered_set()
1318 return test_bit(IMG_REQ_LAYERED, &img_request->flags) != 0; in img_request_layered_test()
1323 struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; in rbd_obj_is_entire()
1325 return !obj_req->ex.oe_off && in rbd_obj_is_entire()
1326 obj_req->ex.oe_len == rbd_dev->layout.object_size; in rbd_obj_is_entire()
1331 struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; in rbd_obj_is_tail()
1333 return obj_req->ex.oe_off + obj_req->ex.oe_len == in rbd_obj_is_tail()
1334 rbd_dev->layout.object_size; in rbd_obj_is_tail()
1342 rbd_assert(obj_req->img_request->snapc); in rbd_obj_set_copyup_enabled()
1344 if (obj_req->img_request->op_type == OBJ_OP_DISCARD) { in rbd_obj_set_copyup_enabled()
1345 dout("%s %p objno %llu discard\n", __func__, obj_req, in rbd_obj_set_copyup_enabled()
1346 obj_req->ex.oe_objno); in rbd_obj_set_copyup_enabled()
1350 if (!obj_req->num_img_extents) { in rbd_obj_set_copyup_enabled()
1351 dout("%s %p objno %llu not overlapping\n", __func__, obj_req, in rbd_obj_set_copyup_enabled()
1352 obj_req->ex.oe_objno); in rbd_obj_set_copyup_enabled()
1357 !obj_req->img_request->snapc->num_snaps) { in rbd_obj_set_copyup_enabled()
1358 dout("%s %p objno %llu entire\n", __func__, obj_req, in rbd_obj_set_copyup_enabled()
1359 obj_req->ex.oe_objno); in rbd_obj_set_copyup_enabled()
1363 obj_req->flags |= RBD_OBJ_FLAG_COPYUP_ENABLED; in rbd_obj_set_copyup_enabled()
1368 return ceph_file_extents_bytes(obj_req->img_extents, in rbd_obj_img_extents_bytes()
1369 obj_req->num_img_extents); in rbd_obj_img_extents_bytes()
1374 switch (img_req->op_type) { in rbd_img_is_write()
1381 default: in rbd_img_is_write()
1388 struct rbd_obj_request *obj_req = osd_req->r_priv; in rbd_osd_req_callback()
1391 dout("%s osd_req %p result %d for obj_req %p\n", __func__, osd_req, in rbd_osd_req_callback()
1392 osd_req->r_result, obj_req); in rbd_osd_req_callback()
1399 if (osd_req->r_result > 0 && rbd_img_is_write(obj_req->img_request)) in rbd_osd_req_callback()
1402 result = osd_req->r_result; in rbd_osd_req_callback()
1409 struct rbd_obj_request *obj_request = osd_req->r_priv; in rbd_osd_format_read()
1410 struct rbd_device *rbd_dev = obj_request->img_request->rbd_dev; in rbd_osd_format_read()
1411 struct ceph_options *opt = rbd_dev->rbd_client->client->options; in rbd_osd_format_read()
1413 osd_req->r_flags = CEPH_OSD_FLAG_READ | opt->read_from_replica; in rbd_osd_format_read()
1414 osd_req->r_snapid = obj_request->img_request->snap_id; in rbd_osd_format_read()
1419 struct rbd_obj_request *obj_request = osd_req->r_priv; in rbd_osd_format_write()
1421 osd_req->r_flags = CEPH_OSD_FLAG_WRITE; in rbd_osd_format_write()
1422 ktime_get_real_ts64(&osd_req->r_mtime); in rbd_osd_format_write()
1423 osd_req->r_data_offset = obj_request->ex.oe_off; in rbd_osd_format_write()
1430 struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; in __rbd_obj_add_osd_request()
1431 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; in __rbd_obj_add_osd_request()
1433 const char *name_format = rbd_dev->image_format == 1 ? in __rbd_obj_add_osd_request()
1439 return ERR_PTR(-ENOMEM); in __rbd_obj_add_osd_request()
1441 list_add_tail(&req->r_private_item, &obj_req->osd_reqs); in __rbd_obj_add_osd_request()
1442 req->r_callback = rbd_osd_req_callback; in __rbd_obj_add_osd_request()
1443 req->r_priv = obj_req; in __rbd_obj_add_osd_request()
1449 ceph_oloc_copy(&req->r_base_oloc, &rbd_dev->header_oloc); in __rbd_obj_add_osd_request()
1450 req->r_base_oloc.pool = rbd_dev->layout.pool_id; in __rbd_obj_add_osd_request()
1452 ret = ceph_oid_aprintf(&req->r_base_oid, GFP_NOIO, name_format, in __rbd_obj_add_osd_request()
1453 rbd_dev->header.object_prefix, in __rbd_obj_add_osd_request()
1454 obj_req->ex.oe_objno); in __rbd_obj_add_osd_request()
1464 rbd_assert(obj_req->img_request->snapc); in rbd_obj_add_osd_request()
1465 return __rbd_obj_add_osd_request(obj_req, obj_req->img_request->snapc, in rbd_obj_add_osd_request()
1477 ceph_object_extent_init(&obj_request->ex); in rbd_obj_request_create()
1478 INIT_LIST_HEAD(&obj_request->osd_reqs); in rbd_obj_request_create()
1479 mutex_init(&obj_request->state_mutex); in rbd_obj_request_create()
1480 kref_init(&obj_request->kref); in rbd_obj_request_create()
1482 dout("%s %p\n", __func__, obj_request); in rbd_obj_request_create()
1494 dout("%s: obj %p\n", __func__, obj_request); in rbd_obj_request_destroy()
1496 while (!list_empty(&obj_request->osd_reqs)) { in rbd_obj_request_destroy()
1497 osd_req = list_first_entry(&obj_request->osd_reqs, in rbd_obj_request_destroy()
1499 list_del_init(&osd_req->r_private_item); in rbd_obj_request_destroy()
1503 switch (obj_request->img_request->data_type) { in rbd_obj_request_destroy()
1509 kfree(obj_request->bvec_pos.bvecs); in rbd_obj_request_destroy()
1511 default: in rbd_obj_request_destroy()
1515 kfree(obj_request->img_extents); in rbd_obj_request_destroy()
1516 if (obj_request->copyup_bvecs) { in rbd_obj_request_destroy()
1517 for (i = 0; i < obj_request->copyup_bvec_count; i++) { in rbd_obj_request_destroy()
1518 if (obj_request->copyup_bvecs[i].bv_page) in rbd_obj_request_destroy()
1519 __free_page(obj_request->copyup_bvecs[i].bv_page); in rbd_obj_request_destroy()
1521 kfree(obj_request->copyup_bvecs); in rbd_obj_request_destroy()
1533 rbd_spec_put(rbd_dev->parent_spec); in rbd_dev_unparent()
1534 rbd_dev->parent_spec = NULL; in rbd_dev_unparent()
1535 rbd_dev->parent_overlap = 0; in rbd_dev_unparent()
1540 * image's parent fields can be safely torn down--after there are no
1541 * more in-flight requests to the parent image. When the last
1548 if (!rbd_dev->parent_spec) in rbd_dev_parent_put()
1551 counter = atomic_dec_return_safe(&rbd_dev->parent_ref); in rbd_dev_parent_put()
1564 * If an image has a non-zero parent overlap, get a reference to its
1567 * Returns true if the rbd device has a parent with a non-zero
1575 if (!rbd_dev->parent_spec) in rbd_dev_parent_get()
1578 if (rbd_dev->parent_overlap) in rbd_dev_parent_get()
1579 counter = atomic_inc_return_safe(&rbd_dev->parent_ref); in rbd_dev_parent_get()
1593 img_request->rbd_dev = rbd_dev; in rbd_img_request_init()
1594 img_request->op_type = op_type; in rbd_img_request_init()
1596 INIT_LIST_HEAD(&img_request->lock_item); in rbd_img_request_init()
1597 INIT_LIST_HEAD(&img_request->object_extents); in rbd_img_request_init()
1598 mutex_init(&img_request->state_mutex); in rbd_img_request_init()
1608 struct rbd_device *rbd_dev = img_req->rbd_dev; in rbd_img_capture_header()
1610 lockdep_assert_held(&rbd_dev->header_rwsem); in rbd_img_capture_header()
1613 img_req->snap_id = rbd_dev->spec->snap_id; in rbd_img_capture_header()
1624 dout("%s: img %p\n", __func__, img_request); in rbd_img_request_destroy()
1626 WARN_ON(!list_empty(&img_request->lock_item)); in rbd_img_request_destroy()
1631 rbd_dev_parent_put(img_request->rbd_dev); in rbd_img_request_destroy()
1634 ceph_put_snap_context(img_request->snapc); in rbd_img_request_destroy()
1636 if (test_bit(IMG_REQ_CHILD, &img_request->flags)) in rbd_img_request_destroy()
1640 #define BITS_PER_OBJ 2
1642 #define OBJ_MASK ((1 << BITS_PER_OBJ) - 1)
1649 rbd_assert(objno < rbd_dev->object_map_size); in __rbd_object_map_index()
1651 *shift = (OBJS_PER_BYTE - off - 1) * BITS_PER_OBJ; in __rbd_object_map_index()
1659 lockdep_assert_held(&rbd_dev->object_map_lock); in __rbd_object_map_get()
1661 return (rbd_dev->object_map[index] >> shift) & OBJ_MASK; in __rbd_object_map_get()
1670 lockdep_assert_held(&rbd_dev->object_map_lock); in __rbd_object_map_set()
1674 p = &rbd_dev->object_map[index]; in __rbd_object_map_set()
1682 spin_lock(&rbd_dev->object_map_lock); in rbd_object_map_get()
1684 spin_unlock(&rbd_dev->object_map_lock); in rbd_object_map_get()
1691 * An image mapped read-only can't use the object map -- it isn't in use_object_map()
1701 return ((rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP) && in use_object_map()
1702 !(rbd_dev->object_map_flags & RBD_FLAG_OBJECT_MAP_INVALID)); in use_object_map()
1709 /* fall back to default logic if object map is disabled or invalid */ in rbd_object_map_may_exist()
1722 rbd_dev->spec->image_id); in rbd_object_map_name()
1725 rbd_dev->spec->image_id, snap_id); in rbd_object_map_name()
1730 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; in rbd_object_map_lock()
1742 ret = ceph_cls_lock(osdc, &oid, &rbd_dev->header_oloc, RBD_LOCK_NAME, in rbd_object_map_lock()
1744 if (ret != -EBUSY || broke_lock) { in rbd_object_map_lock()
1745 if (ret == -EEXIST) in rbd_object_map_lock()
1752 ret = ceph_cls_lock_info(osdc, &oid, &rbd_dev->header_oloc, in rbd_object_map_lock()
1756 if (ret == -ENOENT) in rbd_object_map_lock()
1770 ret = ceph_cls_break_lock(osdc, &oid, &rbd_dev->header_oloc, in rbd_object_map_lock()
1775 if (ret == -ENOENT) in rbd_object_map_lock()
1788 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; in rbd_object_map_unlock()
1794 ret = ceph_cls_unlock(osdc, &oid, &rbd_dev->header_oloc, RBD_LOCK_NAME, in rbd_object_map_unlock()
1796 if (ret && ret != -ENOENT) in rbd_object_map_unlock()
1822 return -EINVAL; in decode_object_map_header()
1827 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; in __rbd_object_map_load()
1838 rbd_assert(!rbd_dev->object_map && !rbd_dev->object_map_size); in __rbd_object_map_load()
1840 num_objects = ceph_get_num_objects(&rbd_dev->layout, in __rbd_object_map_load()
1841 rbd_dev->mapping.size); in __rbd_object_map_load()
1850 rbd_object_map_name(rbd_dev, rbd_dev->spec->snap_id, &oid); in __rbd_object_map_load()
1851 ret = ceph_osdc_call(osdc, &oid, &rbd_dev->header_oloc, in __rbd_object_map_load()
1866 ret = -EINVAL; in __rbd_object_map_load()
1871 ret = -EINVAL; in __rbd_object_map_load()
1875 rbd_dev->object_map = kvmalloc(object_map_bytes, GFP_KERNEL); in __rbd_object_map_load()
1876 if (!rbd_dev->object_map) { in __rbd_object_map_load()
1877 ret = -ENOMEM; in __rbd_object_map_load()
1881 rbd_dev->object_map_size = object_map_size; in __rbd_object_map_load()
1882 ceph_copy_from_page_vector(pages, rbd_dev->object_map, in __rbd_object_map_load()
1892 kvfree(rbd_dev->object_map); in rbd_object_map_free()
1893 rbd_dev->object_map = NULL; in rbd_object_map_free()
1894 rbd_dev->object_map_size = 0; in rbd_object_map_free()
1911 if (rbd_dev->object_map_flags & RBD_FLAG_OBJECT_MAP_INVALID) in rbd_object_map_load()
1953 struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; in rbd_object_map_update_finish()
1960 if (osd_req->r_result) in rbd_object_map_update_finish()
1961 return osd_req->r_result; in rbd_object_map_update_finish()
1966 if (osd_req->r_num_ops == 1) in rbd_object_map_update_finish()
1970 * Update in-memory HEAD object map. in rbd_object_map_update_finish()
1972 rbd_assert(osd_req->r_num_ops == 2); in rbd_object_map_update_finish()
1974 rbd_assert(osd_data->type == CEPH_OSD_DATA_TYPE_PAGES); in rbd_object_map_update_finish()
1976 p = page_address(osd_data->pages[0]); in rbd_object_map_update_finish()
1978 rbd_assert(objno == obj_req->ex.oe_objno); in rbd_object_map_update_finish()
1985 spin_lock(&rbd_dev->object_map_lock); in rbd_object_map_update_finish()
1990 spin_unlock(&rbd_dev->object_map_lock); in rbd_object_map_update_finish()
1997 struct rbd_obj_request *obj_req = osd_req->r_priv; in rbd_object_map_callback()
2000 dout("%s osd_req %p result %d for obj_req %p\n", __func__, osd_req, in rbd_object_map_callback()
2001 osd_req->r_result, obj_req); in rbd_object_map_callback()
2046 osd_req_op_cls_request_data_pages(req, which, pages, p - start, 0, in rbd_cls_object_map_update()
2053 * 0 - object map update sent
2054 * 1 - object map update isn't needed
2055 * <0 - error
2060 struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; in rbd_object_map_update()
2061 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; in rbd_object_map_update()
2068 if (!update_needed(rbd_dev, obj_req->ex.oe_objno, new_state)) in rbd_object_map_update()
2076 return -ENOMEM; in rbd_object_map_update()
2078 list_add_tail(&req->r_private_item, &obj_req->osd_reqs); in rbd_object_map_update()
2079 req->r_callback = rbd_object_map_callback; in rbd_object_map_update()
2080 req->r_priv = obj_req; in rbd_object_map_update()
2082 rbd_object_map_name(rbd_dev, snap_id, &req->r_base_oid); in rbd_object_map_update()
2083 ceph_oloc_copy(&req->r_base_oloc, &rbd_dev->header_oloc); in rbd_object_map_update()
2084 req->r_flags = CEPH_OSD_FLAG_WRITE; in rbd_object_map_update()
2085 ktime_get_real_ts64(&req->r_mtime); in rbd_object_map_update()
2098 ret = rbd_cls_object_map_update(req, which, obj_req->ex.oe_objno, in rbd_object_map_update()
2117 while (cnt && img_extents[cnt - 1].fe_off >= overlap) in prune_extents()
2118 cnt--; in prune_extents()
2121 struct ceph_file_extent *ex = &img_extents[cnt - 1]; in prune_extents()
2124 if (ex->fe_off + ex->fe_len > overlap) in prune_extents()
2125 ex->fe_len = overlap - ex->fe_off; in prune_extents()
2138 struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; in rbd_obj_calc_img_extents()
2141 if (!rbd_dev->parent_overlap) in rbd_obj_calc_img_extents()
2144 ret = ceph_extent_to_file(&rbd_dev->layout, obj_req->ex.oe_objno, in rbd_obj_calc_img_extents()
2145 entire ? 0 : obj_req->ex.oe_off, in rbd_obj_calc_img_extents()
2146 entire ? rbd_dev->layout.object_size : in rbd_obj_calc_img_extents()
2147 obj_req->ex.oe_len, in rbd_obj_calc_img_extents()
2148 &obj_req->img_extents, in rbd_obj_calc_img_extents()
2149 &obj_req->num_img_extents); in rbd_obj_calc_img_extents()
2153 prune_extents(obj_req->img_extents, &obj_req->num_img_extents, in rbd_obj_calc_img_extents()
2154 rbd_dev->parent_overlap); in rbd_obj_calc_img_extents()
2160 struct rbd_obj_request *obj_req = osd_req->r_priv; in rbd_osd_setup_data()
2162 switch (obj_req->img_request->data_type) { in rbd_osd_setup_data()
2165 &obj_req->bio_pos, in rbd_osd_setup_data()
2166 obj_req->ex.oe_len); in rbd_osd_setup_data()
2170 rbd_assert(obj_req->bvec_pos.iter.bi_size == in rbd_osd_setup_data()
2171 obj_req->ex.oe_len); in rbd_osd_setup_data()
2172 rbd_assert(obj_req->bvec_idx == obj_req->bvec_count); in rbd_osd_setup_data()
2174 &obj_req->bvec_pos); in rbd_osd_setup_data()
2176 default: in rbd_osd_setup_data()
2207 struct rbd_obj_request *obj_req = osd_req->r_priv; in rbd_osd_setup_copyup()
2214 osd_req_op_cls_request_data_bvecs(osd_req, which, obj_req->copyup_bvecs, in rbd_osd_setup_copyup()
2215 obj_req->copyup_bvec_count, bytes); in rbd_osd_setup_copyup()
2221 obj_req->read_state = RBD_OBJ_READ_START; in rbd_obj_init_read()
2228 struct rbd_obj_request *obj_req = osd_req->r_priv; in __rbd_osd_setup_write_ops()
2229 struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; in __rbd_osd_setup_write_ops()
2233 !(obj_req->flags & RBD_OBJ_FLAG_MAY_EXIST)) { in __rbd_osd_setup_write_ops()
2235 rbd_dev->layout.object_size, in __rbd_osd_setup_write_ops()
2236 rbd_dev->layout.object_size, in __rbd_osd_setup_write_ops()
2237 rbd_dev->opts->alloc_hint_flags); in __rbd_osd_setup_write_ops()
2246 obj_req->ex.oe_off, obj_req->ex.oe_len, 0, 0); in __rbd_osd_setup_write_ops()
2259 obj_req->write_state = RBD_OBJ_WRITE_START; in rbd_obj_init_write()
2272 struct rbd_obj_request *obj_req = osd_req->r_priv; in __rbd_osd_setup_discard_ops()
2274 if (rbd_obj_is_entire(obj_req) && !obj_req->num_img_extents) { in __rbd_osd_setup_discard_ops()
2275 rbd_assert(obj_req->flags & RBD_OBJ_FLAG_DELETION); in __rbd_osd_setup_discard_ops()
2280 obj_req->ex.oe_off, obj_req->ex.oe_len, in __rbd_osd_setup_discard_ops()
2287 struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; in rbd_obj_init_discard()
2299 if (rbd_dev->opts->alloc_size != rbd_dev->layout.object_size || in rbd_obj_init_discard()
2301 off = round_up(obj_req->ex.oe_off, rbd_dev->opts->alloc_size); in rbd_obj_init_discard()
2302 next_off = round_down(obj_req->ex.oe_off + obj_req->ex.oe_len, in rbd_obj_init_discard()
2303 rbd_dev->opts->alloc_size); in rbd_obj_init_discard()
2307 dout("%s %p %llu~%llu -> %llu~%llu\n", __func__, in rbd_obj_init_discard()
2308 obj_req, obj_req->ex.oe_off, obj_req->ex.oe_len, in rbd_obj_init_discard()
2309 off, next_off - off); in rbd_obj_init_discard()
2310 obj_req->ex.oe_off = off; in rbd_obj_init_discard()
2311 obj_req->ex.oe_len = next_off - off; in rbd_obj_init_discard()
2319 obj_req->flags |= RBD_OBJ_FLAG_NOOP_FOR_NONEXISTENT; in rbd_obj_init_discard()
2320 if (rbd_obj_is_entire(obj_req) && !obj_req->num_img_extents) in rbd_obj_init_discard()
2321 obj_req->flags |= RBD_OBJ_FLAG_DELETION; in rbd_obj_init_discard()
2323 obj_req->write_state = RBD_OBJ_WRITE_START; in rbd_obj_init_discard()
2330 struct rbd_obj_request *obj_req = osd_req->r_priv; in __rbd_osd_setup_zeroout_ops()
2334 if (obj_req->num_img_extents) { in __rbd_osd_setup_zeroout_ops()
2335 if (!(obj_req->flags & RBD_OBJ_FLAG_COPYUP_ENABLED)) in __rbd_osd_setup_zeroout_ops()
2340 rbd_assert(obj_req->flags & RBD_OBJ_FLAG_DELETION); in __rbd_osd_setup_zeroout_ops()
2351 obj_req->ex.oe_off, obj_req->ex.oe_len, in __rbd_osd_setup_zeroout_ops()
2364 if (!obj_req->num_img_extents) { in rbd_obj_init_zeroout()
2365 obj_req->flags |= RBD_OBJ_FLAG_NOOP_FOR_NONEXISTENT; in rbd_obj_init_zeroout()
2367 obj_req->flags |= RBD_OBJ_FLAG_DELETION; in rbd_obj_init_zeroout()
2370 obj_req->write_state = RBD_OBJ_WRITE_START; in rbd_obj_init_zeroout()
2376 struct rbd_img_request *img_req = obj_req->img_request; in count_write_ops()
2378 switch (img_req->op_type) { in count_write_ops()
2380 if (!use_object_map(img_req->rbd_dev) || in count_write_ops()
2381 !(obj_req->flags & RBD_OBJ_FLAG_MAY_EXIST)) in count_write_ops()
2382 return 2; /* setallochint + write/writefull */ in count_write_ops()
2388 if (rbd_obj_is_entire(obj_req) && obj_req->num_img_extents && in count_write_ops()
2389 !(obj_req->flags & RBD_OBJ_FLAG_COPYUP_ENABLED)) in count_write_ops()
2390 return 2; /* create + truncate */ in count_write_ops()
2393 default: in count_write_ops()
2401 struct rbd_obj_request *obj_req = osd_req->r_priv; in rbd_osd_setup_write_ops()
2403 switch (obj_req->img_request->op_type) { in rbd_osd_setup_write_ops()
2413 default: in rbd_osd_setup_write_ops()
2429 switch (img_req->op_type) { in __rbd_img_fill_request()
2442 default: in __rbd_img_fill_request()
2453 img_req->state = RBD_IMG_START; in __rbd_img_fill_request()
2481 return &obj_req->ex; in alloc_object_extent()
2487 * because ->set_pos_fn() should be called only once per object.
2493 return l->stripe_unit != l->object_size; in rbd_layout_is_fancy()
2504 img_req->data_type = fctx->pos_type; in rbd_img_fill_request_nocopy()
2510 fctx->iter = *fctx->pos; in rbd_img_fill_request_nocopy()
2512 ret = ceph_file_to_extents(&img_req->rbd_dev->layout, in rbd_img_fill_request_nocopy()
2515 &img_req->object_extents, in rbd_img_fill_request_nocopy()
2517 fctx->set_pos_fn, &fctx->iter); in rbd_img_fill_request_nocopy()
2530 * @fctx->pos data buffer.
2534 * different chunks of @fctx->pos data buffer.
2536 * @fctx->pos data buffer is assumed to be large enough.
2543 struct rbd_device *rbd_dev = img_req->rbd_dev; in rbd_img_fill_request()
2548 if (fctx->pos_type == OBJ_REQUEST_NODATA || in rbd_img_fill_request()
2549 !rbd_layout_is_fancy(&rbd_dev->layout)) in rbd_img_fill_request()
2553 img_req->data_type = OBJ_REQUEST_OWN_BVECS; in rbd_img_fill_request()
2556 * Create object requests and determine ->bvec_count for each object in rbd_img_fill_request()
2557 * request. Note that ->bvec_count sum over all object requests may in rbd_img_fill_request()
2562 fctx->iter = *fctx->pos; in rbd_img_fill_request()
2564 ret = ceph_file_to_extents(&rbd_dev->layout, in rbd_img_fill_request()
2567 &img_req->object_extents, in rbd_img_fill_request()
2569 fctx->count_fn, &fctx->iter); in rbd_img_fill_request()
2575 obj_req->bvec_pos.bvecs = kmalloc_array(obj_req->bvec_count, in rbd_img_fill_request()
2576 sizeof(*obj_req->bvec_pos.bvecs), in rbd_img_fill_request()
2578 if (!obj_req->bvec_pos.bvecs) in rbd_img_fill_request()
2579 return -ENOMEM; in rbd_img_fill_request()
2586 fctx->iter = *fctx->pos; in rbd_img_fill_request()
2588 ret = ceph_iterate_extents(&rbd_dev->layout, in rbd_img_fill_request()
2591 &img_req->object_extents, in rbd_img_fill_request()
2592 fctx->copy_fn, &fctx->iter); in rbd_img_fill_request()
2619 dout("%s objno %llu bytes %u\n", __func__, ex->oe_objno, bytes); in set_bio_pos()
2620 obj_req->bio_pos = *it; in set_bio_pos()
2630 dout("%s objno %llu bytes %u\n", __func__, ex->oe_objno, bytes); in count_bio_bvecs()
2632 obj_req->bvec_count++; in count_bio_bvecs()
2643 dout("%s objno %llu bytes %u\n", __func__, ex->oe_objno, bytes); in copy_bio_bvecs()
2645 obj_req->bvec_pos.bvecs[obj_req->bvec_idx++] = bv; in copy_bio_bvecs()
2646 obj_req->bvec_pos.iter.bi_size += bv.bv_len; in copy_bio_bvecs()
2671 struct ceph_bio_iter it = { .bio = bio, .iter = bio->bi_iter }; in rbd_img_fill_from_bio()
2682 obj_req->bvec_pos = *it; in set_bvec_pos()
2683 ceph_bvec_iter_shorten(&obj_req->bvec_pos, bytes); in set_bvec_pos()
2694 obj_req->bvec_count++; in count_bvecs()
2705 obj_req->bvec_pos.bvecs[obj_req->bvec_idx++] = bv; in copy_bvecs()
2706 obj_req->bvec_pos.iter.bi_size += bv.bv_len; in copy_bvecs()
2747 rbd_img_handle_request(img_req, img_req->work_result); in rbd_img_handle_request_work()
2752 INIT_WORK(&img_req->work, rbd_img_handle_request_work); in rbd_img_schedule()
2753 img_req->work_result = result; in rbd_img_schedule()
2754 queue_work(rbd_wq, &img_req->work); in rbd_img_schedule()
2759 struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; in rbd_obj_may_exist()
2761 if (rbd_object_map_may_exist(rbd_dev, obj_req->ex.oe_objno)) { in rbd_obj_may_exist()
2762 obj_req->flags |= RBD_OBJ_FLAG_MAY_EXIST; in rbd_obj_may_exist()
2766 dout("%s %p objno %llu assuming dne\n", __func__, obj_req, in rbd_obj_may_exist()
2767 obj_req->ex.oe_objno); in rbd_obj_may_exist()
2781 obj_req->ex.oe_off, obj_req->ex.oe_len, 0, 0); in rbd_obj_read_object()
2795 struct rbd_img_request *img_req = obj_req->img_request; in rbd_obj_read_from_parent()
2796 struct rbd_device *parent = img_req->rbd_dev->parent; in rbd_obj_read_from_parent()
2802 return -ENOMEM; in rbd_obj_read_from_parent()
2805 __set_bit(IMG_REQ_CHILD, &child_img_req->flags); in rbd_obj_read_from_parent()
2806 child_img_req->obj_request = obj_req; in rbd_obj_read_from_parent()
2808 down_read(&parent->header_rwsem); in rbd_obj_read_from_parent()
2810 up_read(&parent->header_rwsem); in rbd_obj_read_from_parent()
2812 dout("%s child_img_req %p for obj_req %p\n", __func__, child_img_req, in rbd_obj_read_from_parent()
2816 switch (img_req->data_type) { in rbd_obj_read_from_parent()
2819 obj_req->img_extents, in rbd_obj_read_from_parent()
2820 obj_req->num_img_extents, in rbd_obj_read_from_parent()
2821 &obj_req->bio_pos); in rbd_obj_read_from_parent()
2826 obj_req->img_extents, in rbd_obj_read_from_parent()
2827 obj_req->num_img_extents, in rbd_obj_read_from_parent()
2828 &obj_req->bvec_pos); in rbd_obj_read_from_parent()
2830 default: in rbd_obj_read_from_parent()
2835 obj_req->img_extents, in rbd_obj_read_from_parent()
2836 obj_req->num_img_extents, in rbd_obj_read_from_parent()
2837 obj_req->copyup_bvecs); in rbd_obj_read_from_parent()
2851 struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; in rbd_obj_advance_read()
2855 switch (obj_req->read_state) { in rbd_obj_advance_read()
2860 *result = -ENOENT; in rbd_obj_advance_read()
2861 obj_req->read_state = RBD_OBJ_READ_OBJECT; in rbd_obj_advance_read()
2870 obj_req->read_state = RBD_OBJ_READ_OBJECT; in rbd_obj_advance_read()
2873 if (*result == -ENOENT && rbd_dev->parent_overlap) { in rbd_obj_advance_read()
2880 if (obj_req->num_img_extents) { in rbd_obj_advance_read()
2886 obj_req->read_state = RBD_OBJ_READ_PARENT; in rbd_obj_advance_read()
2892 * -ENOENT means a hole in the image -- zero-fill the entire in rbd_obj_advance_read()
2893 * length of the request. A short read also implies zero-fill in rbd_obj_advance_read()
2896 if (*result == -ENOENT) { in rbd_obj_advance_read()
2897 rbd_obj_zero_range(obj_req, 0, obj_req->ex.oe_len); in rbd_obj_advance_read()
2900 if (*result < obj_req->ex.oe_len) in rbd_obj_advance_read()
2902 obj_req->ex.oe_len - *result); in rbd_obj_advance_read()
2904 rbd_assert(*result == obj_req->ex.oe_len); in rbd_obj_advance_read()
2910 * The parent image is read only up to the overlap -- zero-fill in rbd_obj_advance_read()
2916 if (obj_overlap < obj_req->ex.oe_len) in rbd_obj_advance_read()
2918 obj_req->ex.oe_len - obj_overlap); in rbd_obj_advance_read()
2921 default: in rbd_obj_advance_read()
2928 struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; in rbd_obj_write_is_noop()
2930 if (rbd_object_map_may_exist(rbd_dev, obj_req->ex.oe_objno)) in rbd_obj_write_is_noop()
2931 obj_req->flags |= RBD_OBJ_FLAG_MAY_EXIST; in rbd_obj_write_is_noop()
2933 if (!(obj_req->flags & RBD_OBJ_FLAG_MAY_EXIST) && in rbd_obj_write_is_noop()
2934 (obj_req->flags & RBD_OBJ_FLAG_NOOP_FOR_NONEXISTENT)) { in rbd_obj_write_is_noop()
2935 dout("%s %p noop for nonexistent\n", __func__, obj_req); in rbd_obj_write_is_noop()
2944 * 0 - object map update sent
2945 * 1 - object map update isn't needed
2946 * <0 - error
2950 struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; in rbd_obj_write_pre_object_map()
2953 if (!(rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP)) in rbd_obj_write_pre_object_map()
2956 if (obj_req->flags & RBD_OBJ_FLAG_DELETION) in rbd_obj_write_pre_object_map()
2971 if (obj_req->flags & RBD_OBJ_FLAG_COPYUP_ENABLED) in rbd_obj_write_object()
2978 if (obj_req->flags & RBD_OBJ_FLAG_COPYUP_ENABLED) { in rbd_obj_write_object()
3020 dout("%s obj_req %p bytes %u\n", __func__, obj_req, bytes); in rbd_obj_copyup_empty_snapc()
3049 dout("%s obj_req %p bytes %u\n", __func__, obj_req, bytes); in rbd_obj_copyup_current_snapc()
3079 rbd_assert(!obj_req->copyup_bvecs); in setup_copyup_bvecs()
3080 obj_req->copyup_bvec_count = calc_pages_for(0, obj_overlap); in setup_copyup_bvecs()
3081 obj_req->copyup_bvecs = kcalloc(obj_req->copyup_bvec_count, in setup_copyup_bvecs()
3082 sizeof(*obj_req->copyup_bvecs), in setup_copyup_bvecs()
3084 if (!obj_req->copyup_bvecs) in setup_copyup_bvecs()
3085 return -ENOMEM; in setup_copyup_bvecs()
3087 for (i = 0; i < obj_req->copyup_bvec_count; i++) { in setup_copyup_bvecs()
3092 return -ENOMEM; in setup_copyup_bvecs()
3094 bvec_set_page(&obj_req->copyup_bvecs[i], page, len, 0); in setup_copyup_bvecs()
3095 obj_overlap -= len; in setup_copyup_bvecs()
3109 struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; in rbd_obj_copyup_read_parent()
3112 rbd_assert(obj_req->num_img_extents); in rbd_obj_copyup_read_parent()
3113 prune_extents(obj_req->img_extents, &obj_req->num_img_extents, in rbd_obj_copyup_read_parent()
3114 rbd_dev->parent_overlap); in rbd_obj_copyup_read_parent()
3115 if (!obj_req->num_img_extents) { in rbd_obj_copyup_read_parent()
3118 * image has been flattened). Re-submit the original write in rbd_obj_copyup_read_parent()
3119 * request -- pass MODS_ONLY since the copyup isn't needed in rbd_obj_copyup_read_parent()
3134 struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; in rbd_obj_copyup_object_maps()
3135 struct ceph_snap_context *snapc = obj_req->img_request->snapc; in rbd_obj_copyup_object_maps()
3140 rbd_assert(!obj_req->pending.result && !obj_req->pending.num_pending); in rbd_obj_copyup_object_maps()
3142 if (!(rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP)) in rbd_obj_copyup_object_maps()
3145 if (obj_req->flags & RBD_OBJ_FLAG_COPYUP_ZEROS) in rbd_obj_copyup_object_maps()
3148 for (i = 0; i < snapc->num_snaps; i++) { in rbd_obj_copyup_object_maps()
3149 if ((rbd_dev->header.features & RBD_FEATURE_FAST_DIFF) && in rbd_obj_copyup_object_maps()
3150 i + 1 < snapc->num_snaps) in rbd_obj_copyup_object_maps()
3155 ret = rbd_object_map_update(obj_req, snapc->snaps[i], in rbd_obj_copyup_object_maps()
3158 obj_req->pending.result = ret; in rbd_obj_copyup_object_maps()
3163 obj_req->pending.num_pending++; in rbd_obj_copyup_object_maps()
3172 rbd_assert(!obj_req->pending.result && !obj_req->pending.num_pending); in rbd_obj_copyup_write_object()
3175 * Only send non-zero copyup data to save some I/O and network in rbd_obj_copyup_write_object()
3176 * bandwidth -- zero copyup data is equivalent to the object not in rbd_obj_copyup_write_object()
3179 if (obj_req->flags & RBD_OBJ_FLAG_COPYUP_ZEROS) in rbd_obj_copyup_write_object()
3182 if (obj_req->img_request->snapc->num_snaps && bytes > 0) { in rbd_obj_copyup_write_object()
3185 * deep-copyup the object through all existing snapshots. in rbd_obj_copyup_write_object()
3191 obj_req->pending.result = ret; in rbd_obj_copyup_write_object()
3195 obj_req->pending.num_pending++; in rbd_obj_copyup_write_object()
3201 obj_req->pending.result = ret; in rbd_obj_copyup_write_object()
3205 obj_req->pending.num_pending++; in rbd_obj_copyup_write_object()
3210 struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; in rbd_obj_advance_copyup()
3214 switch (obj_req->copyup_state) { in rbd_obj_advance_copyup()
3223 if (obj_req->num_img_extents) in rbd_obj_advance_copyup()
3224 obj_req->copyup_state = RBD_OBJ_COPYUP_READ_PARENT; in rbd_obj_advance_copyup()
3226 obj_req->copyup_state = RBD_OBJ_COPYUP_WRITE_OBJECT; in rbd_obj_advance_copyup()
3232 if (is_zero_bvecs(obj_req->copyup_bvecs, in rbd_obj_advance_copyup()
3234 dout("%s %p detected zeros\n", __func__, obj_req); in rbd_obj_advance_copyup()
3235 obj_req->flags |= RBD_OBJ_FLAG_COPYUP_ZEROS; in rbd_obj_advance_copyup()
3239 if (!obj_req->pending.num_pending) { in rbd_obj_advance_copyup()
3240 *result = obj_req->pending.result; in rbd_obj_advance_copyup()
3241 obj_req->copyup_state = RBD_OBJ_COPYUP_OBJECT_MAPS; in rbd_obj_advance_copyup()
3244 obj_req->copyup_state = __RBD_OBJ_COPYUP_OBJECT_MAPS; in rbd_obj_advance_copyup()
3247 if (!pending_result_dec(&obj_req->pending, result)) in rbd_obj_advance_copyup()
3258 if (!obj_req->pending.num_pending) { in rbd_obj_advance_copyup()
3259 *result = obj_req->pending.result; in rbd_obj_advance_copyup()
3260 obj_req->copyup_state = RBD_OBJ_COPYUP_WRITE_OBJECT; in rbd_obj_advance_copyup()
3263 obj_req->copyup_state = __RBD_OBJ_COPYUP_WRITE_OBJECT; in rbd_obj_advance_copyup()
3266 if (!pending_result_dec(&obj_req->pending, result)) in rbd_obj_advance_copyup()
3271 default: in rbd_obj_advance_copyup()
3278 * 0 - object map update sent
3279 * 1 - object map update isn't needed
3280 * <0 - error
3284 struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; in rbd_obj_write_post_object_map()
3287 if (!(rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP)) in rbd_obj_write_post_object_map()
3290 if (!(obj_req->flags & RBD_OBJ_FLAG_DELETION)) in rbd_obj_write_post_object_map()
3299 struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; in rbd_obj_advance_write()
3303 switch (obj_req->write_state) { in rbd_obj_advance_write()
3316 obj_req->write_state = RBD_OBJ_WRITE_PRE_OBJECT_MAP; in rbd_obj_advance_write()
3331 obj_req->write_state = RBD_OBJ_WRITE_OBJECT; in rbd_obj_advance_write()
3334 if (*result == -ENOENT) { in rbd_obj_advance_write()
3335 if (obj_req->flags & RBD_OBJ_FLAG_COPYUP_ENABLED) { in rbd_obj_advance_write()
3337 obj_req->copyup_state = RBD_OBJ_COPYUP_START; in rbd_obj_advance_write()
3338 obj_req->write_state = __RBD_OBJ_WRITE_COPYUP; in rbd_obj_advance_write()
3342 * On a non-existent object: in rbd_obj_advance_write()
3343 * delete - -ENOENT, truncate/zero - 0 in rbd_obj_advance_write()
3345 if (obj_req->flags & RBD_OBJ_FLAG_DELETION) in rbd_obj_advance_write()
3351 obj_req->write_state = RBD_OBJ_WRITE_COPYUP; in rbd_obj_advance_write()
3367 obj_req->write_state = RBD_OBJ_WRITE_POST_OBJECT_MAP; in rbd_obj_advance_write()
3376 default: in rbd_obj_advance_write()
3387 struct rbd_img_request *img_req = obj_req->img_request; in __rbd_obj_handle_request()
3388 struct rbd_device *rbd_dev = img_req->rbd_dev; in __rbd_obj_handle_request()
3391 mutex_lock(&obj_req->state_mutex); in __rbd_obj_handle_request()
3396 mutex_unlock(&obj_req->state_mutex); in __rbd_obj_handle_request()
3401 obj_op_name(img_req->op_type), obj_req->ex.oe_objno, in __rbd_obj_handle_request()
3402 obj_req->ex.oe_off, obj_req->ex.oe_len, *result); in __rbd_obj_handle_request()
3408 * This is open-coded in rbd_img_handle_request() to avoid parent chain
3414 rbd_img_handle_request(obj_req->img_request, result); in rbd_obj_handle_request()
3419 struct rbd_device *rbd_dev = img_req->rbd_dev; in need_exclusive_lock()
3421 if (!(rbd_dev->header.features & RBD_FEATURE_EXCLUSIVE_LOCK)) in need_exclusive_lock()
3427 rbd_assert(!test_bit(IMG_REQ_CHILD, &img_req->flags)); in need_exclusive_lock()
3428 if (rbd_dev->opts->lock_on_read || in need_exclusive_lock()
3429 (rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP)) in need_exclusive_lock()
3437 struct rbd_device *rbd_dev = img_req->rbd_dev; in rbd_lock_add_request()
3440 lockdep_assert_held(&rbd_dev->lock_rwsem); in rbd_lock_add_request()
3441 locked = rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED; in rbd_lock_add_request()
3442 spin_lock(&rbd_dev->lock_lists_lock); in rbd_lock_add_request()
3443 rbd_assert(list_empty(&img_req->lock_item)); in rbd_lock_add_request()
3445 list_add_tail(&img_req->lock_item, &rbd_dev->acquiring_list); in rbd_lock_add_request()
3447 list_add_tail(&img_req->lock_item, &rbd_dev->running_list); in rbd_lock_add_request()
3448 spin_unlock(&rbd_dev->lock_lists_lock); in rbd_lock_add_request()
3454 struct rbd_device *rbd_dev = img_req->rbd_dev; in rbd_lock_del_request()
3457 lockdep_assert_held(&rbd_dev->lock_rwsem); in rbd_lock_del_request()
3458 spin_lock(&rbd_dev->lock_lists_lock); in rbd_lock_del_request()
3459 if (!list_empty(&img_req->lock_item)) { in rbd_lock_del_request()
3460 rbd_assert(!list_empty(&rbd_dev->running_list)); in rbd_lock_del_request()
3461 list_del_init(&img_req->lock_item); in rbd_lock_del_request()
3462 need_wakeup = (rbd_dev->lock_state == RBD_LOCK_STATE_QUIESCING && in rbd_lock_del_request()
3463 list_empty(&rbd_dev->running_list)); in rbd_lock_del_request()
3465 spin_unlock(&rbd_dev->lock_lists_lock); in rbd_lock_del_request()
3467 complete(&rbd_dev->quiescing_wait); in rbd_lock_del_request()
3472 struct rbd_device *rbd_dev = img_req->rbd_dev; in rbd_img_exclusive_lock()
3484 dout("%s rbd_dev %p queueing lock_dwork\n", __func__, rbd_dev); in rbd_img_exclusive_lock()
3485 queue_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork, 0); in rbd_img_exclusive_lock()
3491 struct rbd_device *rbd_dev = img_req->rbd_dev; in rbd_img_object_requests()
3494 rbd_assert(!img_req->pending.result && !img_req->pending.num_pending); in rbd_img_object_requests()
3499 rbd_assert(!img_req->snapc); in rbd_img_object_requests()
3500 down_read(&rbd_dev->header_rwsem); in rbd_img_object_requests()
3501 img_req->snapc = ceph_get_snap_context(rbd_dev->header.snapc); in rbd_img_object_requests()
3502 up_read(&rbd_dev->header_rwsem); in rbd_img_object_requests()
3510 img_req->pending.result = result; in rbd_img_object_requests()
3514 img_req->pending.num_pending++; in rbd_img_object_requests()
3524 switch (img_req->state) { in rbd_img_advance()
3533 img_req->state = RBD_IMG_EXCLUSIVE_LOCK; in rbd_img_advance()
3542 if (!img_req->pending.num_pending) { in rbd_img_advance()
3543 *result = img_req->pending.result; in rbd_img_advance()
3544 img_req->state = RBD_IMG_OBJECT_REQUESTS; in rbd_img_advance()
3547 img_req->state = __RBD_IMG_OBJECT_REQUESTS; in rbd_img_advance()
3550 if (!pending_result_dec(&img_req->pending, result)) in rbd_img_advance()
3555 default: in rbd_img_advance()
3566 struct rbd_device *rbd_dev = img_req->rbd_dev; in __rbd_img_handle_request()
3570 down_read(&rbd_dev->lock_rwsem); in __rbd_img_handle_request()
3571 mutex_lock(&img_req->state_mutex); in __rbd_img_handle_request()
3575 mutex_unlock(&img_req->state_mutex); in __rbd_img_handle_request()
3576 up_read(&rbd_dev->lock_rwsem); in __rbd_img_handle_request()
3578 mutex_lock(&img_req->state_mutex); in __rbd_img_handle_request()
3580 mutex_unlock(&img_req->state_mutex); in __rbd_img_handle_request()
3586 test_bit(IMG_REQ_CHILD, &img_req->flags) ? "child " : "", in __rbd_img_handle_request()
3587 obj_op_name(img_req->op_type), *result); in __rbd_img_handle_request()
3598 if (test_bit(IMG_REQ_CHILD, &img_req->flags)) { in rbd_img_handle_request()
3599 struct rbd_obj_request *obj_req = img_req->obj_request; in rbd_img_handle_request()
3603 img_req = obj_req->img_request; in rbd_img_handle_request()
3619 return lhs->gid == rhs->gid && lhs->handle == rhs->handle; in rbd_cid_equal()
3626 mutex_lock(&rbd_dev->watch_mutex); in rbd_get_cid()
3627 cid.gid = ceph_client_gid(rbd_dev->rbd_client->client); in rbd_get_cid()
3628 cid.handle = rbd_dev->watch_cookie; in rbd_get_cid()
3629 mutex_unlock(&rbd_dev->watch_mutex); in rbd_get_cid()
3639 dout("%s rbd_dev %p %llu-%llu -> %llu-%llu\n", __func__, rbd_dev, in rbd_set_owner_cid()
3640 rbd_dev->owner_cid.gid, rbd_dev->owner_cid.handle, in rbd_set_owner_cid()
3641 cid->gid, cid->handle); in rbd_set_owner_cid()
3642 rbd_dev->owner_cid = *cid; /* struct */ in rbd_set_owner_cid()
3647 mutex_lock(&rbd_dev->watch_mutex); in format_lock_cookie()
3648 sprintf(buf, "%s %llu", RBD_LOCK_COOKIE_PREFIX, rbd_dev->watch_cookie); in format_lock_cookie()
3649 mutex_unlock(&rbd_dev->watch_mutex); in format_lock_cookie()
3656 rbd_dev->lock_state = RBD_LOCK_STATE_LOCKED; in __rbd_lock()
3657 strcpy(rbd_dev->lock_cookie, cookie); in __rbd_lock()
3659 queue_work(rbd_dev->task_wq, &rbd_dev->acquired_lock_work); in __rbd_lock()
3667 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; in rbd_lock()
3672 rbd_dev->lock_cookie[0] != '\0'); in rbd_lock()
3675 ret = ceph_cls_lock(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc, in rbd_lock()
3678 if (ret && ret != -EEXIST) in rbd_lock()
3690 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; in rbd_unlock()
3694 rbd_dev->lock_cookie[0] == '\0'); in rbd_unlock()
3696 ret = ceph_cls_unlock(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc, in rbd_unlock()
3697 RBD_LOCK_NAME, rbd_dev->lock_cookie); in rbd_unlock()
3698 if (ret && ret != -ENOENT) in rbd_unlock()
3702 rbd_dev->lock_state = RBD_LOCK_STATE_UNLOCKED; in rbd_unlock()
3703 rbd_dev->lock_cookie[0] = '\0'; in rbd_unlock()
3705 queue_work(rbd_dev->task_wq, &rbd_dev->released_lock_work); in rbd_unlock()
3713 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; in __rbd_notify_op_lock()
3719 dout("%s rbd_dev %p notify_op %d\n", __func__, rbd_dev, notify_op); in __rbd_notify_op_lock()
3722 ceph_start_encoding(&p, 2, 1, buf_size - CEPH_ENCODING_START_BLK_LEN); in __rbd_notify_op_lock()
3727 return ceph_osdc_notify(osdc, &rbd_dev->header_oid, in __rbd_notify_op_lock()
3728 &rbd_dev->header_oloc, buf, buf_size, in __rbd_notify_op_lock()
3761 dout("%s rbd_dev %p\n", __func__, rbd_dev); in rbd_request_lock()
3765 if (ret && ret != -ETIMEDOUT) { in rbd_request_lock()
3776 while (n--) { in rbd_request_lock()
3790 ret = -EIO; in rbd_request_lock()
3810 ret = -ETIMEDOUT; in rbd_request_lock()
3818 ret = -EINVAL; in rbd_request_lock()
3830 dout("%s rbd_dev %p result %d\n", __func__, rbd_dev, result); in wake_lock_waiters()
3831 lockdep_assert_held_write(&rbd_dev->lock_rwsem); in wake_lock_waiters()
3833 cancel_delayed_work(&rbd_dev->lock_dwork); in wake_lock_waiters()
3834 if (!completion_done(&rbd_dev->acquire_wait)) { in wake_lock_waiters()
3835 rbd_assert(list_empty(&rbd_dev->acquiring_list) && in wake_lock_waiters()
3836 list_empty(&rbd_dev->running_list)); in wake_lock_waiters()
3837 rbd_dev->acquire_err = result; in wake_lock_waiters()
3838 complete_all(&rbd_dev->acquire_wait); in wake_lock_waiters()
3842 while (!list_empty(&rbd_dev->acquiring_list)) { in wake_lock_waiters()
3843 img_req = list_first_entry(&rbd_dev->acquiring_list, in wake_lock_waiters()
3845 mutex_lock(&img_req->state_mutex); in wake_lock_waiters()
3846 rbd_assert(img_req->state == RBD_IMG_EXCLUSIVE_LOCK); in wake_lock_waiters()
3848 list_move_tail(&img_req->lock_item, in wake_lock_waiters()
3849 &rbd_dev->running_list); in wake_lock_waiters()
3851 list_del_init(&img_req->lock_item); in wake_lock_waiters()
3853 mutex_unlock(&img_req->state_mutex); in wake_lock_waiters()
3860 return lhs->id.name.type == rhs->id.name.type && in locker_equal()
3861 lhs->id.name.num == rhs->id.name.num && in locker_equal()
3862 !strcmp(lhs->id.cookie, rhs->id.cookie) && in locker_equal()
3863 ceph_addr_equal_no_type(&lhs->info.addr, &rhs->info.addr); in locker_equal()
3874 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; in get_lock_owner_info()
3882 ret = ceph_cls_lock_info(osdc, &rbd_dev->header_oid, in get_lock_owner_info()
3883 &rbd_dev->header_oloc, RBD_LOCK_NAME, in get_lock_owner_info()
3891 dout("%s rbd_dev %p no lockers detected\n", __func__, rbd_dev); in get_lock_owner_info()
3920 dout("%s rbd_dev %p got locker %s%llu@%pISpc/%u handle %llu\n", in get_lock_owner_info()
3932 return ERR_PTR(-EBUSY); in get_lock_owner_info()
3938 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; in find_watcher()
3945 ret = ceph_osdc_list_watchers(osdc, &rbd_dev->header_oid, in find_watcher()
3946 &rbd_dev->header_oloc, &watchers, in find_watcher()
3953 sscanf(locker->id.cookie, RBD_LOCK_COOKIE_PREFIX " %llu", &cookie); in find_watcher()
3956 * Ignore addr->type while comparing. This mimics in find_watcher()
3960 &locker->info.addr) && in find_watcher()
3967 dout("%s rbd_dev %p found cid %llu-%llu\n", __func__, in find_watcher()
3975 dout("%s rbd_dev %p no watchers\n", __func__, rbd_dev); in find_watcher()
3987 struct ceph_client *client = rbd_dev->rbd_client->client; in rbd_try_lock()
3997 if (ret != -EBUSY) { in rbd_try_lock()
4027 ENTITY_NAME(locker->id.name)); in rbd_try_lock()
4029 ret = ceph_monc_blocklist_add(&client->monc, in rbd_try_lock()
4030 &locker->info.addr); in rbd_try_lock()
4033 ENTITY_NAME(locker->id.name), ret); in rbd_try_lock()
4037 ret = ceph_cls_break_lock(&client->osdc, &rbd_dev->header_oid, in rbd_try_lock()
4038 &rbd_dev->header_oloc, RBD_LOCK_NAME, in rbd_try_lock()
4039 locker->id.cookie, &locker->id.name); in rbd_try_lock()
4040 if (ret && ret != -ENOENT) { in rbd_try_lock()
4065 if (rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP) { in rbd_post_acquire_action()
4076 * 0 - lock acquired
4077 * 1 - caller should call rbd_request_lock()
4078 * <0 - error
4084 down_read(&rbd_dev->lock_rwsem); in rbd_try_acquire_lock()
4085 dout("%s rbd_dev %p read lock_state %d\n", __func__, rbd_dev, in rbd_try_acquire_lock()
4086 rbd_dev->lock_state); in rbd_try_acquire_lock()
4088 up_read(&rbd_dev->lock_rwsem); in rbd_try_acquire_lock()
4092 up_read(&rbd_dev->lock_rwsem); in rbd_try_acquire_lock()
4093 down_write(&rbd_dev->lock_rwsem); in rbd_try_acquire_lock()
4094 dout("%s rbd_dev %p write lock_state %d\n", __func__, rbd_dev, in rbd_try_acquire_lock()
4095 rbd_dev->lock_state); in rbd_try_acquire_lock()
4097 up_write(&rbd_dev->lock_rwsem); in rbd_try_acquire_lock()
4107 up_write(&rbd_dev->lock_rwsem); in rbd_try_acquire_lock()
4111 rbd_assert(rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED); in rbd_try_acquire_lock()
4112 rbd_assert(list_empty(&rbd_dev->running_list)); in rbd_try_acquire_lock()
4116 rbd_warn(rbd_dev, "post-acquire action failed: %d", ret); in rbd_try_acquire_lock()
4127 up_write(&rbd_dev->lock_rwsem); in rbd_try_acquire_lock()
4137 dout("%s rbd_dev %p\n", __func__, rbd_dev); in rbd_acquire_lock()
4141 dout("%s rbd_dev %p ret %d - done\n", __func__, rbd_dev, ret); in rbd_acquire_lock()
4146 if (ret == -ETIMEDOUT) { in rbd_acquire_lock()
4148 } else if (ret == -EROFS) { in rbd_acquire_lock()
4150 down_write(&rbd_dev->lock_rwsem); in rbd_acquire_lock()
4152 up_write(&rbd_dev->lock_rwsem); in rbd_acquire_lock()
4155 mod_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork, in rbd_acquire_lock()
4162 dout("%s rbd_dev %p requeuing lock_dwork\n", __func__, in rbd_acquire_lock()
4164 mod_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork, in rbd_acquire_lock()
4165 msecs_to_jiffies(2 * RBD_NOTIFY_TIMEOUT * MSEC_PER_SEC)); in rbd_acquire_lock()
4171 dout("%s rbd_dev %p\n", __func__, rbd_dev); in rbd_quiesce_lock()
4172 lockdep_assert_held_write(&rbd_dev->lock_rwsem); in rbd_quiesce_lock()
4174 if (rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED) in rbd_quiesce_lock()
4178 * Ensure that all in-flight IO is flushed. in rbd_quiesce_lock()
4180 rbd_dev->lock_state = RBD_LOCK_STATE_QUIESCING; in rbd_quiesce_lock()
4181 rbd_assert(!completion_done(&rbd_dev->quiescing_wait)); in rbd_quiesce_lock()
4182 if (list_empty(&rbd_dev->running_list)) in rbd_quiesce_lock()
4185 up_write(&rbd_dev->lock_rwsem); in rbd_quiesce_lock()
4186 wait_for_completion(&rbd_dev->quiescing_wait); in rbd_quiesce_lock()
4188 down_write(&rbd_dev->lock_rwsem); in rbd_quiesce_lock()
4189 if (rbd_dev->lock_state != RBD_LOCK_STATE_QUIESCING) in rbd_quiesce_lock()
4192 rbd_assert(list_empty(&rbd_dev->running_list)); in rbd_quiesce_lock()
4198 if (rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP) in rbd_pre_release_action()
4204 rbd_assert(list_empty(&rbd_dev->running_list)); in __rbd_release_lock()
4221 * Give others a chance to grab the lock - we would re-acquire in rbd_release_lock()
4227 cancel_delayed_work(&rbd_dev->lock_dwork); in rbd_release_lock()
4235 down_write(&rbd_dev->lock_rwsem); in rbd_release_lock_work()
4237 up_write(&rbd_dev->lock_rwsem); in rbd_release_lock_work()
4244 dout("%s rbd_dev %p\n", __func__, rbd_dev); in maybe_kick_acquire()
4248 spin_lock(&rbd_dev->lock_lists_lock); in maybe_kick_acquire()
4249 have_requests = !list_empty(&rbd_dev->acquiring_list); in maybe_kick_acquire()
4250 spin_unlock(&rbd_dev->lock_lists_lock); in maybe_kick_acquire()
4251 if (have_requests || delayed_work_pending(&rbd_dev->lock_dwork)) { in maybe_kick_acquire()
4252 dout("%s rbd_dev %p kicking lock_dwork\n", __func__, rbd_dev); in maybe_kick_acquire()
4253 mod_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork, 0); in maybe_kick_acquire()
4262 if (struct_v >= 2) { in rbd_handle_acquired_lock()
4267 dout("%s rbd_dev %p cid %llu-%llu\n", __func__, rbd_dev, cid.gid, in rbd_handle_acquired_lock()
4270 down_write(&rbd_dev->lock_rwsem); in rbd_handle_acquired_lock()
4271 if (rbd_cid_equal(&cid, &rbd_dev->owner_cid)) { in rbd_handle_acquired_lock()
4272 dout("%s rbd_dev %p cid %llu-%llu == owner_cid\n", in rbd_handle_acquired_lock()
4277 downgrade_write(&rbd_dev->lock_rwsem); in rbd_handle_acquired_lock()
4279 down_read(&rbd_dev->lock_rwsem); in rbd_handle_acquired_lock()
4283 up_read(&rbd_dev->lock_rwsem); in rbd_handle_acquired_lock()
4291 if (struct_v >= 2) { in rbd_handle_released_lock()
4296 dout("%s rbd_dev %p cid %llu-%llu\n", __func__, rbd_dev, cid.gid, in rbd_handle_released_lock()
4299 down_write(&rbd_dev->lock_rwsem); in rbd_handle_released_lock()
4300 if (!rbd_cid_equal(&cid, &rbd_dev->owner_cid)) { in rbd_handle_released_lock()
4301 dout("%s rbd_dev %p cid %llu-%llu != owner_cid %llu-%llu\n", in rbd_handle_released_lock()
4303 rbd_dev->owner_cid.gid, rbd_dev->owner_cid.handle); in rbd_handle_released_lock()
4307 downgrade_write(&rbd_dev->lock_rwsem); in rbd_handle_released_lock()
4309 down_read(&rbd_dev->lock_rwsem); in rbd_handle_released_lock()
4313 up_read(&rbd_dev->lock_rwsem); in rbd_handle_released_lock()
4327 if (struct_v >= 2) { in rbd_handle_request_lock()
4332 dout("%s rbd_dev %p cid %llu-%llu\n", __func__, rbd_dev, cid.gid, in rbd_handle_request_lock()
4337 down_read(&rbd_dev->lock_rwsem); in rbd_handle_request_lock()
4339 if (rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED && in rbd_handle_request_lock()
4340 rbd_cid_equal(&rbd_dev->owner_cid, &rbd_empty_cid)) in rbd_handle_request_lock()
4349 if (rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED) { in rbd_handle_request_lock()
4350 if (!rbd_dev->opts->exclusive) { in rbd_handle_request_lock()
4351 dout("%s rbd_dev %p queueing unlock_work\n", in rbd_handle_request_lock()
4353 queue_work(rbd_dev->task_wq, in rbd_handle_request_lock()
4354 &rbd_dev->unlock_work); in rbd_handle_request_lock()
4357 result = -EROFS; in rbd_handle_request_lock()
4363 up_read(&rbd_dev->lock_rwsem); in rbd_handle_request_lock()
4370 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; in __rbd_acknowledge_notify()
4380 buf_size - CEPH_ENCODING_START_BLK_LEN); in __rbd_acknowledge_notify()
4386 ret = ceph_osdc_notify_ack(osdc, &rbd_dev->header_oid, in __rbd_acknowledge_notify()
4387 &rbd_dev->header_oloc, notify_id, cookie, in __rbd_acknowledge_notify()
4396 dout("%s rbd_dev %p\n", __func__, rbd_dev); in rbd_acknowledge_notify()
4403 dout("%s rbd_dev %p result %d\n", __func__, rbd_dev, result); in rbd_acknowledge_notify_result()
4418 dout("%s rbd_dev %p cookie %llu notify_id %llu data_len %zu\n", in rbd_watch_cb()
4436 dout("%s rbd_dev %p notify_op %u\n", __func__, rbd_dev, notify_op); in rbd_watch_cb()
4461 default: in rbd_watch_cb()
4464 cookie, -EOPNOTSUPP); in rbd_watch_cb()
4479 down_write(&rbd_dev->lock_rwsem); in rbd_watch_errcb()
4481 up_write(&rbd_dev->lock_rwsem); in rbd_watch_errcb()
4483 mutex_lock(&rbd_dev->watch_mutex); in rbd_watch_errcb()
4484 if (rbd_dev->watch_state == RBD_WATCH_STATE_REGISTERED) { in rbd_watch_errcb()
4486 rbd_dev->watch_state = RBD_WATCH_STATE_ERROR; in rbd_watch_errcb()
4488 queue_delayed_work(rbd_dev->task_wq, &rbd_dev->watch_dwork, 0); in rbd_watch_errcb()
4490 mutex_unlock(&rbd_dev->watch_mutex); in rbd_watch_errcb()
4498 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; in __rbd_register_watch()
4501 rbd_assert(!rbd_dev->watch_handle); in __rbd_register_watch()
4502 dout("%s rbd_dev %p\n", __func__, rbd_dev); in __rbd_register_watch()
4504 handle = ceph_osdc_watch(osdc, &rbd_dev->header_oid, in __rbd_register_watch()
4505 &rbd_dev->header_oloc, rbd_watch_cb, in __rbd_register_watch()
4510 rbd_dev->watch_handle = handle; in __rbd_register_watch()
4519 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; in __rbd_unregister_watch()
4522 rbd_assert(rbd_dev->watch_handle); in __rbd_unregister_watch()
4523 dout("%s rbd_dev %p\n", __func__, rbd_dev); in __rbd_unregister_watch()
4525 ret = ceph_osdc_unwatch(osdc, rbd_dev->watch_handle); in __rbd_unregister_watch()
4529 rbd_dev->watch_handle = NULL; in __rbd_unregister_watch()
4536 mutex_lock(&rbd_dev->watch_mutex); in rbd_register_watch()
4537 rbd_assert(rbd_dev->watch_state == RBD_WATCH_STATE_UNREGISTERED); in rbd_register_watch()
4542 rbd_dev->watch_state = RBD_WATCH_STATE_REGISTERED; in rbd_register_watch()
4543 rbd_dev->watch_cookie = rbd_dev->watch_handle->linger_id; in rbd_register_watch()
4546 mutex_unlock(&rbd_dev->watch_mutex); in rbd_register_watch()
4552 dout("%s rbd_dev %p\n", __func__, rbd_dev); in cancel_tasks_sync()
4554 cancel_work_sync(&rbd_dev->acquired_lock_work); in cancel_tasks_sync()
4555 cancel_work_sync(&rbd_dev->released_lock_work); in cancel_tasks_sync()
4556 cancel_delayed_work_sync(&rbd_dev->lock_dwork); in cancel_tasks_sync()
4557 cancel_work_sync(&rbd_dev->unlock_work); in cancel_tasks_sync()
4568 mutex_lock(&rbd_dev->watch_mutex); in rbd_unregister_watch()
4569 if (rbd_dev->watch_state == RBD_WATCH_STATE_REGISTERED) in rbd_unregister_watch()
4571 rbd_dev->watch_state = RBD_WATCH_STATE_UNREGISTERED; in rbd_unregister_watch()
4572 mutex_unlock(&rbd_dev->watch_mutex); in rbd_unregister_watch()
4574 cancel_delayed_work_sync(&rbd_dev->watch_dwork); in rbd_unregister_watch()
4575 ceph_osdc_flush_notifies(&rbd_dev->rbd_client->client->osdc); in rbd_unregister_watch()
4583 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; in rbd_reacquire_lock()
4591 ret = ceph_cls_set_cookie(osdc, &rbd_dev->header_oid, in rbd_reacquire_lock()
4592 &rbd_dev->header_oloc, RBD_LOCK_NAME, in rbd_reacquire_lock()
4593 CEPH_CLS_LOCK_EXCLUSIVE, rbd_dev->lock_cookie, in rbd_reacquire_lock()
4596 if (ret != -EOPNOTSUPP) in rbd_reacquire_lock()
4600 if (rbd_dev->opts->exclusive) in rbd_reacquire_lock()
4609 queue_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork, 0); in rbd_reacquire_lock()
4622 dout("%s rbd_dev %p\n", __func__, rbd_dev); in rbd_reregister_watch()
4624 mutex_lock(&rbd_dev->watch_mutex); in rbd_reregister_watch()
4625 if (rbd_dev->watch_state != RBD_WATCH_STATE_ERROR) { in rbd_reregister_watch()
4626 mutex_unlock(&rbd_dev->watch_mutex); in rbd_reregister_watch()
4633 if (ret != -EBLOCKLISTED && ret != -ENOENT) { in rbd_reregister_watch()
4634 queue_delayed_work(rbd_dev->task_wq, in rbd_reregister_watch()
4635 &rbd_dev->watch_dwork, in rbd_reregister_watch()
4637 mutex_unlock(&rbd_dev->watch_mutex); in rbd_reregister_watch()
4641 mutex_unlock(&rbd_dev->watch_mutex); in rbd_reregister_watch()
4642 down_write(&rbd_dev->lock_rwsem); in rbd_reregister_watch()
4644 up_write(&rbd_dev->lock_rwsem); in rbd_reregister_watch()
4648 rbd_dev->watch_state = RBD_WATCH_STATE_REGISTERED; in rbd_reregister_watch()
4649 rbd_dev->watch_cookie = rbd_dev->watch_handle->linger_id; in rbd_reregister_watch()
4650 mutex_unlock(&rbd_dev->watch_mutex); in rbd_reregister_watch()
4652 down_write(&rbd_dev->lock_rwsem); in rbd_reregister_watch()
4653 if (rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED) in rbd_reregister_watch()
4655 up_write(&rbd_dev->lock_rwsem); in rbd_reregister_watch()
4675 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; in rbd_obj_method_sync()
4683 * also supply outbound data--parameters for the object in rbd_obj_method_sync()
4689 return -E2BIG; in rbd_obj_method_sync()
4693 return -ENOMEM; in rbd_obj_method_sync()
4702 return -ENOMEM; in rbd_obj_method_sync()
4723 struct rbd_device *rbd_dev = img_request->rbd_dev; in rbd_queue_workfn()
4724 enum obj_operation_type op_type = img_request->op_type; in rbd_queue_workfn()
4731 /* Ignore/skip any zero-length requests */ in rbd_queue_workfn()
4733 dout("%s: zero-length request\n", __func__); in rbd_queue_workfn()
4740 down_read(&rbd_dev->header_rwsem); in rbd_queue_workfn()
4741 mapping_size = rbd_dev->mapping.size; in rbd_queue_workfn()
4743 up_read(&rbd_dev->header_rwsem); in rbd_queue_workfn()
4748 result = -EIO; in rbd_queue_workfn()
4752 dout("%s rbd_dev %p img_req %p %s %llu~%llu\n", __func__, rbd_dev, in rbd_queue_workfn()
4759 rq->bio); in rbd_queue_workfn()
4777 struct rbd_device *rbd_dev = hctx->queue->queuedata; in rbd_queue_rq()
4778 struct rbd_img_request *img_req = blk_mq_rq_to_pdu(bd->rq); in rbd_queue_rq()
4781 switch (req_op(bd->rq)) { in rbd_queue_rq()
4794 default: in rbd_queue_rq()
4795 rbd_warn(rbd_dev, "unknown req_op %d", req_op(bd->rq)); in rbd_queue_rq()
4803 rbd_warn(rbd_dev, "%s on read-only mapping", in rbd_queue_rq()
4804 obj_op_name(img_req->op_type)); in rbd_queue_rq()
4810 INIT_WORK(&img_req->work, rbd_queue_workfn); in rbd_queue_rq()
4811 queue_work(rbd_wq, &img_req->work); in rbd_queue_rq()
4817 put_disk(rbd_dev->disk); in rbd_free_disk()
4818 blk_mq_free_tag_set(&rbd_dev->tag_set); in rbd_free_disk()
4819 rbd_dev->disk = NULL; in rbd_free_disk()
4828 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; in rbd_obj_read_sync()
4836 return -ENOMEM; in rbd_obj_read_sync()
4838 ceph_oid_copy(&req->r_base_oid, oid); in rbd_obj_read_sync()
4839 ceph_oloc_copy(&req->r_base_oloc, oloc); in rbd_obj_read_sync()
4840 req->r_flags = CEPH_OSD_FLAG_READ; in rbd_obj_read_sync()
4868 * return, the rbd_dev->header field will contain up-to-date
4882 * The complete header will include an array of its 64-bit in rbd_dev_v1_header_info()
4884 * a contiguous block of NUL-terminated strings. Note that in rbd_dev_v1_header_info()
4886 * it in, in which case we re-read it. in rbd_dev_v1_header_info()
4898 return -ENOMEM; in rbd_dev_v1_header_info()
4900 ret = rbd_obj_read_sync(rbd_dev, &rbd_dev->header_oid, in rbd_dev_v1_header_info()
4901 &rbd_dev->header_oloc, ondisk, size); in rbd_dev_v1_header_info()
4905 ret = -ENXIO; in rbd_dev_v1_header_info()
4911 ret = -ENXIO; in rbd_dev_v1_header_info()
4916 names_size = le64_to_cpu(ondisk->snap_names_len); in rbd_dev_v1_header_info()
4918 snap_count = le32_to_cpu(ondisk->snap_count); in rbd_dev_v1_header_info()
4933 * If EXISTS is not set, rbd_dev->disk may be NULL, so don't in rbd_dev_update_size()
4937 if (test_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags) && in rbd_dev_update_size()
4938 !test_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags)) { in rbd_dev_update_size()
4939 size = (sector_t)rbd_dev->mapping.size / SECTOR_SIZE; in rbd_dev_update_size()
4940 dout("setting size to %llu sectors", (unsigned long long)size); in rbd_dev_update_size()
4941 set_capacity_and_notify(rbd_dev->disk, size); in rbd_dev_update_size()
4953 rbd_dev->layout.object_size * rbd_dev->layout.stripe_count; in rbd_init_disk()
4957 .io_min = rbd_dev->opts->alloc_size, in rbd_init_disk()
4963 memset(&rbd_dev->tag_set, 0, sizeof(rbd_dev->tag_set)); in rbd_init_disk()
4964 rbd_dev->tag_set.ops = &rbd_mq_ops; in rbd_init_disk()
4965 rbd_dev->tag_set.queue_depth = rbd_dev->opts->queue_depth; in rbd_init_disk()
4966 rbd_dev->tag_set.numa_node = NUMA_NO_NODE; in rbd_init_disk()
4967 rbd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; in rbd_init_disk()
4968 rbd_dev->tag_set.nr_hw_queues = num_present_cpus(); in rbd_init_disk()
4969 rbd_dev->tag_set.cmd_size = sizeof(struct rbd_img_request); in rbd_init_disk()
4971 err = blk_mq_alloc_tag_set(&rbd_dev->tag_set); in rbd_init_disk()
4975 if (rbd_dev->opts->trim) { in rbd_init_disk()
4976 lim.discard_granularity = rbd_dev->opts->alloc_size; in rbd_init_disk()
4981 if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC)) in rbd_init_disk()
4984 disk = blk_mq_alloc_disk(&rbd_dev->tag_set, &lim, rbd_dev); in rbd_init_disk()
4990 snprintf(disk->disk_name, sizeof(disk->disk_name), RBD_DRV_NAME "%d", in rbd_init_disk()
4991 rbd_dev->dev_id); in rbd_init_disk()
4992 disk->major = rbd_dev->major; in rbd_init_disk()
4993 disk->first_minor = rbd_dev->minor; in rbd_init_disk()
4995 disk->minors = (1 << RBD_SINGLE_MAJOR_PART_SHIFT); in rbd_init_disk()
4997 disk->minors = RBD_MINORS_PER_MAJOR; in rbd_init_disk()
4998 disk->fops = &rbd_bd_ops; in rbd_init_disk()
4999 disk->private_data = rbd_dev; in rbd_init_disk()
5000 rbd_dev->disk = disk; in rbd_init_disk()
5004 blk_mq_free_tag_set(&rbd_dev->tag_set); in rbd_init_disk()
5023 (unsigned long long)rbd_dev->mapping.size); in rbd_size_show()
5031 return sprintf(buf, "0x%016llx\n", rbd_dev->header.features); in rbd_features_show()
5039 if (rbd_dev->major) in rbd_major_show()
5040 return sprintf(buf, "%d\n", rbd_dev->major); in rbd_major_show()
5050 return sprintf(buf, "%d\n", rbd_dev->minor); in rbd_minor_show()
5058 ceph_client_addr(rbd_dev->rbd_client->client); in rbd_client_addr_show()
5060 return sprintf(buf, "%pISpc/%u\n", &client_addr->in_addr, in rbd_client_addr_show()
5061 le32_to_cpu(client_addr->nonce)); in rbd_client_addr_show()
5070 ceph_client_gid(rbd_dev->rbd_client->client)); in rbd_client_id_show()
5078 return sprintf(buf, "%pU\n", &rbd_dev->rbd_client->client->fsid); in rbd_cluster_fsid_show()
5087 return -EPERM; in rbd_config_info_show()
5089 return sprintf(buf, "%s\n", rbd_dev->config_info); in rbd_config_info_show()
5097 return sprintf(buf, "%s\n", rbd_dev->spec->pool_name); in rbd_pool_show()
5106 (unsigned long long) rbd_dev->spec->pool_id); in rbd_pool_id_show()
5114 return sprintf(buf, "%s\n", rbd_dev->spec->pool_ns ?: ""); in rbd_pool_ns_show()
5122 if (rbd_dev->spec->image_name) in rbd_name_show()
5123 return sprintf(buf, "%s\n", rbd_dev->spec->image_name); in rbd_name_show()
5133 return sprintf(buf, "%s\n", rbd_dev->spec->image_id); in rbd_image_id_show()
5137 * Shows the name of the currently-mapped snapshot (or
5146 return sprintf(buf, "%s\n", rbd_dev->spec->snap_name); in rbd_snap_show()
5154 return sprintf(buf, "%llu\n", rbd_dev->spec->snap_id); in rbd_snap_id_show()
5169 if (!rbd_dev->parent) in rbd_parent_show()
5172 for ( ; rbd_dev->parent; rbd_dev = rbd_dev->parent) { in rbd_parent_show()
5173 struct rbd_spec *spec = rbd_dev->parent_spec; in rbd_parent_show()
5182 spec->pool_id, spec->pool_name, in rbd_parent_show()
5183 spec->pool_ns ?: "", in rbd_parent_show()
5184 spec->image_id, spec->image_name ?: "(unknown)", in rbd_parent_show()
5185 spec->snap_id, spec->snap_name, in rbd_parent_show()
5186 rbd_dev->parent_overlap); in rbd_parent_show()
5201 return -EPERM; in rbd_image_refresh()
5268 kref_get(&spec->kref); in rbd_spec_get()
5277 kref_put(&spec->kref, rbd_spec_free); in rbd_spec_put()
5288 spec->pool_id = CEPH_NOPOOL; in rbd_spec_alloc()
5289 spec->snap_id = CEPH_NOSNAP; in rbd_spec_alloc()
5290 kref_init(&spec->kref); in rbd_spec_alloc()
5299 kfree(spec->pool_name); in rbd_spec_free()
5300 kfree(spec->pool_ns); in rbd_spec_free()
5301 kfree(spec->image_id); in rbd_spec_free()
5302 kfree(spec->image_name); in rbd_spec_free()
5303 kfree(spec->snap_name); in rbd_spec_free()
5309 WARN_ON(rbd_dev->watch_state != RBD_WATCH_STATE_UNREGISTERED); in rbd_dev_free()
5310 WARN_ON(rbd_dev->lock_state != RBD_LOCK_STATE_UNLOCKED); in rbd_dev_free()
5312 ceph_oid_destroy(&rbd_dev->header_oid); in rbd_dev_free()
5313 ceph_oloc_destroy(&rbd_dev->header_oloc); in rbd_dev_free()
5314 kfree(rbd_dev->config_info); in rbd_dev_free()
5316 rbd_put_client(rbd_dev->rbd_client); in rbd_dev_free()
5317 rbd_spec_put(rbd_dev->spec); in rbd_dev_free()
5318 kfree(rbd_dev->opts); in rbd_dev_free()
5325 bool need_put = !!rbd_dev->opts; in rbd_dev_release()
5328 destroy_workqueue(rbd_dev->task_wq); in rbd_dev_release()
5329 ida_free(&rbd_dev_id_ida, rbd_dev->dev_id); in rbd_dev_release()
5337 * doing something similar to dm (dm-builtin.c) is overkill. in rbd_dev_release()
5351 spin_lock_init(&rbd_dev->lock); in __rbd_dev_create()
5352 INIT_LIST_HEAD(&rbd_dev->node); in __rbd_dev_create()
5353 init_rwsem(&rbd_dev->header_rwsem); in __rbd_dev_create()
5355 rbd_dev->header.data_pool_id = CEPH_NOPOOL; in __rbd_dev_create()
5356 ceph_oid_init(&rbd_dev->header_oid); in __rbd_dev_create()
5357 rbd_dev->header_oloc.pool = spec->pool_id; in __rbd_dev_create()
5358 if (spec->pool_ns) { in __rbd_dev_create()
5359 WARN_ON(!*spec->pool_ns); in __rbd_dev_create()
5360 rbd_dev->header_oloc.pool_ns = in __rbd_dev_create()
5361 ceph_find_or_create_string(spec->pool_ns, in __rbd_dev_create()
5362 strlen(spec->pool_ns)); in __rbd_dev_create()
5365 mutex_init(&rbd_dev->watch_mutex); in __rbd_dev_create()
5366 rbd_dev->watch_state = RBD_WATCH_STATE_UNREGISTERED; in __rbd_dev_create()
5367 INIT_DELAYED_WORK(&rbd_dev->watch_dwork, rbd_reregister_watch); in __rbd_dev_create()
5369 init_rwsem(&rbd_dev->lock_rwsem); in __rbd_dev_create()
5370 rbd_dev->lock_state = RBD_LOCK_STATE_UNLOCKED; in __rbd_dev_create()
5371 INIT_WORK(&rbd_dev->acquired_lock_work, rbd_notify_acquired_lock); in __rbd_dev_create()
5372 INIT_WORK(&rbd_dev->released_lock_work, rbd_notify_released_lock); in __rbd_dev_create()
5373 INIT_DELAYED_WORK(&rbd_dev->lock_dwork, rbd_acquire_lock); in __rbd_dev_create()
5374 INIT_WORK(&rbd_dev->unlock_work, rbd_release_lock_work); in __rbd_dev_create()
5375 spin_lock_init(&rbd_dev->lock_lists_lock); in __rbd_dev_create()
5376 INIT_LIST_HEAD(&rbd_dev->acquiring_list); in __rbd_dev_create()
5377 INIT_LIST_HEAD(&rbd_dev->running_list); in __rbd_dev_create()
5378 init_completion(&rbd_dev->acquire_wait); in __rbd_dev_create()
5379 init_completion(&rbd_dev->quiescing_wait); in __rbd_dev_create()
5381 spin_lock_init(&rbd_dev->object_map_lock); in __rbd_dev_create()
5383 rbd_dev->dev.bus = &rbd_bus_type; in __rbd_dev_create()
5384 rbd_dev->dev.type = &rbd_device_type; in __rbd_dev_create()
5385 rbd_dev->dev.parent = &rbd_root_dev; in __rbd_dev_create()
5386 device_initialize(&rbd_dev->dev); in __rbd_dev_create()
5405 rbd_dev->dev_id = ida_alloc_max(&rbd_dev_id_ida, in rbd_dev_create()
5406 minor_to_rbd_dev_id(1 << MINORBITS) - 1, in rbd_dev_create()
5408 if (rbd_dev->dev_id < 0) in rbd_dev_create()
5411 sprintf(rbd_dev->name, RBD_DRV_NAME "%d", rbd_dev->dev_id); in rbd_dev_create()
5412 rbd_dev->task_wq = alloc_ordered_workqueue("%s-tasks", WQ_MEM_RECLAIM, in rbd_dev_create()
5413 rbd_dev->name); in rbd_dev_create()
5414 if (!rbd_dev->task_wq) in rbd_dev_create()
5420 rbd_dev->rbd_client = rbdc; in rbd_dev_create()
5421 rbd_dev->spec = spec; in rbd_dev_create()
5422 rbd_dev->opts = opts; in rbd_dev_create()
5424 dout("%s rbd_dev %p dev_id %d\n", __func__, rbd_dev, rbd_dev->dev_id); in rbd_dev_create()
5428 ida_free(&rbd_dev_id_ida, rbd_dev->dev_id); in rbd_dev_create()
5437 put_device(&rbd_dev->dev); in rbd_dev_destroy()
5455 ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid, in _rbd_dev_v2_snap_size()
5456 &rbd_dev->header_oloc, "get_size", in _rbd_dev_v2_snap_size()
5459 dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); in _rbd_dev_v2_snap_size()
5463 return -ERANGE; in _rbd_dev_v2_snap_size()
5467 dout(" order %u", (unsigned int)*order); in _rbd_dev_v2_snap_size()
5471 dout(" snap_id 0x%016llx snap_size = %llu\n", in _rbd_dev_v2_snap_size()
5491 return -ENOMEM; in rbd_dev_v2_object_prefix()
5493 ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid, in rbd_dev_v2_object_prefix()
5494 &rbd_dev->header_oloc, "get_object_prefix", in rbd_dev_v2_object_prefix()
5496 dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); in rbd_dev_v2_object_prefix()
5510 dout(" object_prefix = %s\n", object_prefix); in rbd_dev_v2_object_prefix()
5534 ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid, in _rbd_dev_v2_snap_features()
5535 &rbd_dev->header_oloc, "get_features", in _rbd_dev_v2_snap_features()
5538 dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); in _rbd_dev_v2_snap_features()
5542 return -ERANGE; in _rbd_dev_v2_snap_features()
5548 return -ENXIO; in _rbd_dev_v2_snap_features()
5553 dout(" snap_id 0x%016llx features = 0x%016llx incompat = 0x%016llx\n", in _rbd_dev_v2_snap_features()
5563 * object map, store them in rbd_dev->object_map_flags.
5570 __le64 snapid = cpu_to_le64(rbd_dev->spec->snap_id); in rbd_dev_v2_get_flags()
5574 ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid, in rbd_dev_v2_get_flags()
5575 &rbd_dev->header_oloc, "get_flags", in rbd_dev_v2_get_flags()
5581 return -EBADMSG; in rbd_dev_v2_get_flags()
5583 rbd_dev->object_map_flags = le64_to_cpu(flags); in rbd_dev_v2_get_flags()
5599 kfree(pii->pool_ns); in rbd_parent_info_cleanup()
5600 kfree(pii->image_id); in rbd_parent_info_cleanup()
5620 ceph_decode_64_safe(p, end, pii->pool_id, e_inval); in decode_parent_image_spec()
5621 pii->pool_ns = ceph_extract_encoded_string(p, end, NULL, GFP_KERNEL); in decode_parent_image_spec()
5622 if (IS_ERR(pii->pool_ns)) { in decode_parent_image_spec()
5623 ret = PTR_ERR(pii->pool_ns); in decode_parent_image_spec()
5624 pii->pool_ns = NULL; in decode_parent_image_spec()
5627 pii->image_id = ceph_extract_encoded_string(p, end, NULL, GFP_KERNEL); in decode_parent_image_spec()
5628 if (IS_ERR(pii->image_id)) { in decode_parent_image_spec()
5629 ret = PTR_ERR(pii->image_id); in decode_parent_image_spec()
5630 pii->image_id = NULL; in decode_parent_image_spec()
5633 ceph_decode_64_safe(p, end, pii->snap_id, e_inval); in decode_parent_image_spec()
5637 return -EINVAL; in decode_parent_image_spec()
5645 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; in __get_parent_info()
5650 ret = ceph_osdc_call(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc, in __get_parent_info()
5654 return ret == -EOPNOTSUPP ? 1 : ret; in __get_parent_info()
5662 ret = ceph_osdc_call(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc, in __get_parent_info()
5670 ceph_decode_8_safe(&p, end, pii->has_overlap, e_inval); in __get_parent_info()
5671 if (pii->has_overlap) in __get_parent_info()
5672 ceph_decode_64_safe(&p, end, pii->overlap, e_inval); in __get_parent_info()
5674 dout("%s pool_id %llu pool_ns %s image_id %s snap_id %llu has_overlap %d overlap %llu\n", in __get_parent_info()
5675 __func__, pii->pool_id, pii->pool_ns, pii->image_id, pii->snap_id, in __get_parent_info()
5676 pii->has_overlap, pii->overlap); in __get_parent_info()
5680 return -EINVAL; in __get_parent_info()
5691 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; in __get_parent_info_legacy()
5696 ret = ceph_osdc_call(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc, in __get_parent_info_legacy()
5704 ceph_decode_64_safe(&p, end, pii->pool_id, e_inval); in __get_parent_info_legacy()
5705 pii->image_id = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL); in __get_parent_info_legacy()
5706 if (IS_ERR(pii->image_id)) { in __get_parent_info_legacy()
5707 ret = PTR_ERR(pii->image_id); in __get_parent_info_legacy()
5708 pii->image_id = NULL; in __get_parent_info_legacy()
5711 ceph_decode_64_safe(&p, end, pii->snap_id, e_inval); in __get_parent_info_legacy()
5712 pii->has_overlap = true; in __get_parent_info_legacy()
5713 ceph_decode_64_safe(&p, end, pii->overlap, e_inval); in __get_parent_info_legacy()
5715 dout("%s pool_id %llu pool_ns %s image_id %s snap_id %llu has_overlap %d overlap %llu\n", in __get_parent_info_legacy()
5716 __func__, pii->pool_id, pii->pool_ns, pii->image_id, pii->snap_id, in __get_parent_info_legacy()
5717 pii->has_overlap, pii->overlap); in __get_parent_info_legacy()
5721 return -EINVAL; in __get_parent_info_legacy()
5733 return -ENOMEM; in rbd_dev_v2_parent_info()
5738 return -ENOMEM; in rbd_dev_v2_parent_info()
5742 ceph_encode_64(&p, rbd_dev->spec->snap_id); in rbd_dev_v2_parent_info()
5761 return -ENOMEM; in rbd_dev_setup_parent()
5772 ret = -EIO; in rbd_dev_setup_parent()
5783 parent_spec->pool_id = pii.pool_id; in rbd_dev_setup_parent()
5785 parent_spec->pool_ns = pii.pool_ns; in rbd_dev_setup_parent()
5788 parent_spec->image_id = pii.image_id; in rbd_dev_setup_parent()
5790 parent_spec->snap_id = pii.snap_id; in rbd_dev_setup_parent()
5792 rbd_assert(!rbd_dev->parent_spec); in rbd_dev_setup_parent()
5793 rbd_dev->parent_spec = parent_spec; in rbd_dev_setup_parent()
5802 rbd_dev->parent_overlap = pii.overlap; in rbd_dev_setup_parent()
5822 ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid, in rbd_dev_v2_striping_info()
5823 &rbd_dev->header_oloc, "get_stripe_unit_count", in rbd_dev_v2_striping_info()
5825 dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); in rbd_dev_v2_striping_info()
5829 return -ERANGE; in rbd_dev_v2_striping_info()
5833 dout(" stripe_unit = %llu stripe_count = %llu\n", *stripe_unit, in rbd_dev_v2_striping_info()
5844 ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid, in rbd_dev_v2_data_pool()
5845 &rbd_dev->header_oloc, "get_data_pool", in rbd_dev_v2_data_pool()
5848 dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); in rbd_dev_v2_data_pool()
5852 return -EBADMSG; in rbd_dev_v2_data_pool()
5855 dout(" data_pool_id = %lld\n", *data_pool_id); in rbd_dev_v2_data_pool()
5874 rbd_assert(!rbd_dev->spec->image_name); in rbd_dev_image_name()
5876 len = strlen(rbd_dev->spec->image_id); in rbd_dev_image_name()
5884 ceph_encode_string(&p, end, rbd_dev->spec->image_id, (u32)len); in rbd_dev_image_name()
5892 ret = rbd_obj_method_sync(rbd_dev, &oid, &rbd_dev->header_oloc, in rbd_dev_image_name()
5904 dout("%s: name is %s len is %zd\n", __func__, image_name, len); in rbd_dev_image_name()
5914 struct ceph_snap_context *snapc = rbd_dev->header.snapc; in rbd_v1_snap_id_by_name()
5920 snap_name = rbd_dev->header.snap_names; in rbd_v1_snap_id_by_name()
5921 while (which < snapc->num_snaps) { in rbd_v1_snap_id_by_name()
5923 return snapc->snaps[which]; in rbd_v1_snap_id_by_name()
5932 struct ceph_snap_context *snapc = rbd_dev->header.snapc; in rbd_v2_snap_id_by_name()
5937 for (which = 0; !found && which < snapc->num_snaps; which++) { in rbd_v2_snap_id_by_name()
5940 snap_id = snapc->snaps[which]; in rbd_v2_snap_id_by_name()
5943 /* ignore no-longer existing snapshots */ in rbd_v2_snap_id_by_name()
5944 if (PTR_ERR(snap_name) == -ENOENT) in rbd_v2_snap_id_by_name()
5961 if (rbd_dev->image_format == 1) in rbd_snap_id_by_name()
5972 struct rbd_spec *spec = rbd_dev->spec; in rbd_spec_fill_snap_id()
5974 rbd_assert(spec->pool_id != CEPH_NOPOOL && spec->pool_name); in rbd_spec_fill_snap_id()
5975 rbd_assert(spec->image_id && spec->image_name); in rbd_spec_fill_snap_id()
5976 rbd_assert(spec->snap_name); in rbd_spec_fill_snap_id()
5978 if (strcmp(spec->snap_name, RBD_SNAP_HEAD_NAME)) { in rbd_spec_fill_snap_id()
5981 snap_id = rbd_snap_id_by_name(rbd_dev, spec->snap_name); in rbd_spec_fill_snap_id()
5983 return -ENOENT; in rbd_spec_fill_snap_id()
5985 spec->snap_id = snap_id; in rbd_spec_fill_snap_id()
5987 spec->snap_id = CEPH_NOSNAP; in rbd_spec_fill_snap_id()
6001 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; in rbd_spec_fill_names()
6002 struct rbd_spec *spec = rbd_dev->spec; in rbd_spec_fill_names()
6008 rbd_assert(spec->pool_id != CEPH_NOPOOL); in rbd_spec_fill_names()
6009 rbd_assert(spec->image_id); in rbd_spec_fill_names()
6010 rbd_assert(spec->snap_id != CEPH_NOSNAP); in rbd_spec_fill_names()
6014 pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, spec->pool_id); in rbd_spec_fill_names()
6016 rbd_warn(rbd_dev, "no pool with id %llu", spec->pool_id); in rbd_spec_fill_names()
6017 return -EIO; in rbd_spec_fill_names()
6021 return -ENOMEM; in rbd_spec_fill_names()
6031 snap_name = rbd_snap_name(rbd_dev, spec->snap_id); in rbd_spec_fill_names()
6037 spec->pool_name = pool_name; in rbd_spec_fill_names()
6038 spec->image_name = image_name; in rbd_spec_fill_names()
6039 spec->snap_name = snap_name; in rbd_spec_fill_names()
6072 return -ENOMEM; in rbd_dev_v2_snap_context()
6074 ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid, in rbd_dev_v2_snap_context()
6075 &rbd_dev->header_oloc, "get_snapcontext", in rbd_dev_v2_snap_context()
6077 dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); in rbd_dev_v2_snap_context()
6083 ret = -ERANGE; in rbd_dev_v2_snap_context()
6093 if (snap_count > (SIZE_MAX - sizeof (struct ceph_snap_context)) in rbd_dev_v2_snap_context()
6095 ret = -EINVAL; in rbd_dev_v2_snap_context()
6104 ret = -ENOMEM; in rbd_dev_v2_snap_context()
6107 snapc->seq = seq; in rbd_dev_v2_snap_context()
6109 snapc->snaps[i] = ceph_decode_64(&p); in rbd_dev_v2_snap_context()
6112 dout(" snap context seq = %llu, snap_count = %u\n", in rbd_dev_v2_snap_context()
6134 return ERR_PTR(-ENOMEM); in rbd_dev_v2_snap_name()
6137 ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid, in rbd_dev_v2_snap_name()
6138 &rbd_dev->header_oloc, "get_snapshot_name", in rbd_dev_v2_snap_name()
6140 dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); in rbd_dev_v2_snap_name()
6152 dout(" snap_id 0x%016llx snap_name = %s\n", in rbd_dev_v2_snap_name()
6167 first_time ? &header->obj_order : NULL, in rbd_dev_v2_header_info()
6168 &header->image_size); in rbd_dev_v2_header_info()
6178 ret = rbd_dev_v2_snap_context(rbd_dev, &header->snapc); in rbd_dev_v2_header_info()
6189 rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); in rbd_dev_header_info()
6190 rbd_assert(!header->object_prefix && !header->snapc); in rbd_dev_header_info()
6192 if (rbd_dev->image_format == 1) in rbd_dev_header_info()
6200 * first found non-space character (if any). Returns the length of
6201 * the token (string of non-white space characters) found. Note
6221 * that a duplicate buffer is created even for a zero-length token.
6223 * Returns a pointer to the newly-allocated duplicate, or a null
6225 * the lenp argument is a non-null pointer, the length of the token
6254 struct rbd_options *opt = pctx->opts; in rbd_parse_param()
6259 ret = ceph_parse_param(param, pctx->copts, NULL); in rbd_parse_param()
6260 if (ret != -ENOPARAM) in rbd_parse_param()
6264 dout("%s fs_parse '%s' token %d\n", __func__, param->key, token); in rbd_parse_param()
6266 if (token == -ENOPARAM) in rbd_parse_param()
6268 param->key); in rbd_parse_param()
6276 opt->queue_depth = result.uint_32; in rbd_parse_param()
6282 return inval_plog(&log, "alloc_size must be a power of 2"); in rbd_parse_param()
6283 opt->alloc_size = result.uint_32; in rbd_parse_param()
6289 opt->lock_timeout = msecs_to_jiffies(result.uint_32 * 1000); in rbd_parse_param()
6292 kfree(pctx->spec->pool_ns); in rbd_parse_param()
6293 pctx->spec->pool_ns = param->string; in rbd_parse_param()
6294 param->string = NULL; in rbd_parse_param()
6299 opt->alloc_hint_flags &= in rbd_parse_param()
6304 opt->alloc_hint_flags |= in rbd_parse_param()
6306 opt->alloc_hint_flags &= in rbd_parse_param()
6310 opt->alloc_hint_flags |= in rbd_parse_param()
6312 opt->alloc_hint_flags &= in rbd_parse_param()
6315 default: in rbd_parse_param()
6320 opt->read_only = true; in rbd_parse_param()
6323 opt->read_only = false; in rbd_parse_param()
6326 opt->lock_on_read = true; in rbd_parse_param()
6329 opt->exclusive = true; in rbd_parse_param()
6332 opt->trim = false; in rbd_parse_param()
6334 default: in rbd_parse_param()
6341 return inval_plog(&log, "%s out of range", param->key); in rbd_parse_param()
6353 dout("%s '%s'\n", __func__, options); in rbd_parse_options()
6371 return -ENOMEM; in rbd_parse_options()
6389 * and the data written is passed here via a NUL-terminated buffer.
6393 * the other parameters which return dynamically-allocated
6411 * A comma-separated list of one or more monitor addresses.
6416 * A comma-separated list of ceph and/or rbd options.
6425 * provided. Snapshot mappings are always read-only.
6445 return -EINVAL; in rbd_add_parse_args()
6451 ret = -EINVAL; in rbd_add_parse_args()
6454 return -ENOMEM; in rbd_add_parse_args()
6464 pctx.spec->pool_name = dup_token(&buf, NULL); in rbd_add_parse_args()
6465 if (!pctx.spec->pool_name) in rbd_add_parse_args()
6467 if (!*pctx.spec->pool_name) { in rbd_add_parse_args()
6472 pctx.spec->image_name = dup_token(&buf, NULL); in rbd_add_parse_args()
6473 if (!pctx.spec->image_name) in rbd_add_parse_args()
6475 if (!*pctx.spec->image_name) { in rbd_add_parse_args()
6481 * Snapshot name is optional; default is to use "-" in rbd_add_parse_args()
6487 len = sizeof (RBD_SNAP_HEAD_NAME) - 1; in rbd_add_parse_args()
6489 ret = -ENAMETOOLONG; in rbd_add_parse_args()
6496 pctx.spec->snap_name = snap_name; in rbd_add_parse_args()
6508 pctx.opts->read_only = RBD_READ_ONLY_DEFAULT; in rbd_add_parse_args()
6509 pctx.opts->queue_depth = RBD_QUEUE_DEPTH_DEFAULT; in rbd_add_parse_args()
6510 pctx.opts->alloc_size = RBD_ALLOC_SIZE_DEFAULT; in rbd_add_parse_args()
6511 pctx.opts->lock_timeout = RBD_LOCK_TIMEOUT_DEFAULT; in rbd_add_parse_args()
6512 pctx.opts->lock_on_read = RBD_LOCK_ON_READ_DEFAULT; in rbd_add_parse_args()
6513 pctx.opts->exclusive = RBD_EXCLUSIVE_DEFAULT; in rbd_add_parse_args()
6514 pctx.opts->trim = RBD_TRIM_DEFAULT; in rbd_add_parse_args()
6532 ret = -ENOMEM; in rbd_add_parse_args()
6543 down_write(&rbd_dev->lock_rwsem); in rbd_dev_image_unlock()
6546 up_write(&rbd_dev->lock_rwsem); in rbd_dev_image_unlock()
6558 if (!(rbd_dev->header.features & RBD_FEATURE_EXCLUSIVE_LOCK)) { in rbd_add_acquire_lock()
6559 if (!rbd_dev->opts->exclusive && !rbd_dev->opts->lock_on_read) in rbd_add_acquire_lock()
6562 rbd_warn(rbd_dev, "exclusive-lock feature is not enabled"); in rbd_add_acquire_lock()
6563 return -EINVAL; in rbd_add_acquire_lock()
6570 queue_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork, 0); in rbd_add_acquire_lock()
6571 ret = wait_for_completion_killable_timeout(&rbd_dev->acquire_wait, in rbd_add_acquire_lock()
6572 ceph_timeout_jiffies(rbd_dev->opts->lock_timeout)); in rbd_add_acquire_lock()
6574 ret = rbd_dev->acquire_err; in rbd_add_acquire_lock()
6576 cancel_delayed_work_sync(&rbd_dev->lock_dwork); in rbd_add_acquire_lock()
6578 ret = -ETIMEDOUT; in rbd_add_acquire_lock()
6589 * An rbd format 2 image has a unique identifier, distinct from the
6616 if (rbd_dev->spec->image_id) { in rbd_dev_image_id()
6617 rbd_dev->image_format = *rbd_dev->spec->image_id ? 2 : 1; in rbd_dev_image_id()
6623 * First, see if the format 2 image id file exists, and if in rbd_dev_image_id()
6627 rbd_dev->spec->image_name); in rbd_dev_image_id()
6631 dout("rbd id object name is %s\n", oid.name); in rbd_dev_image_id()
6637 ret = -ENOMEM; in rbd_dev_image_id()
6643 ret = rbd_obj_method_sync(rbd_dev, &oid, &rbd_dev->header_oloc, in rbd_dev_image_id()
6646 dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); in rbd_dev_image_id()
6647 if (ret == -ENOENT) { in rbd_dev_image_id()
6649 ret = image_id ? 0 : -ENOMEM; in rbd_dev_image_id()
6651 rbd_dev->image_format = 1; in rbd_dev_image_id()
6659 rbd_dev->image_format = 2; in rbd_dev_image_id()
6663 rbd_dev->spec->image_id = image_id; in rbd_dev_image_id()
6664 dout("image_id is %s\n", image_id); in rbd_dev_image_id()
6684 rbd_image_header_cleanup(&rbd_dev->header); in rbd_dev_unprobe()
6692 ret = rbd_dev_v2_object_prefix(rbd_dev, &header->object_prefix); in rbd_dev_v2_header_onetime()
6701 rbd_is_ro(rbd_dev), &header->features); in rbd_dev_v2_header_onetime()
6707 if (header->features & RBD_FEATURE_STRIPINGV2) { in rbd_dev_v2_header_onetime()
6708 ret = rbd_dev_v2_striping_info(rbd_dev, &header->stripe_unit, in rbd_dev_v2_header_onetime()
6709 &header->stripe_count); in rbd_dev_v2_header_onetime()
6714 if (header->features & RBD_FEATURE_DATA_POOL) { in rbd_dev_v2_header_onetime()
6715 ret = rbd_dev_v2_data_pool(rbd_dev, &header->data_pool_id); in rbd_dev_v2_header_onetime()
6724 * @depth is rbd_dev_image_probe() -> rbd_dev_probe_parent() ->
6733 if (!rbd_dev->parent_spec) in rbd_dev_probe_parent()
6738 ret = -EINVAL; in rbd_dev_probe_parent()
6742 parent = __rbd_dev_create(rbd_dev->parent_spec); in rbd_dev_probe_parent()
6744 ret = -ENOMEM; in rbd_dev_probe_parent()
6752 parent->rbd_client = __rbd_get_client(rbd_dev->rbd_client); in rbd_dev_probe_parent()
6753 parent->spec = rbd_spec_get(rbd_dev->parent_spec); in rbd_dev_probe_parent()
6755 __set_bit(RBD_DEV_FLAG_READONLY, &parent->flags); in rbd_dev_probe_parent()
6761 rbd_dev->parent = parent; in rbd_dev_probe_parent()
6762 atomic_set(&rbd_dev->parent_ref, 1); in rbd_dev_probe_parent()
6773 clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags); in rbd_dev_device_release()
6776 unregister_blkdev(rbd_dev->major, rbd_dev->name); in rbd_dev_device_release()
6780 * rbd_dev->header_rwsem must be locked for write and will be unlocked
6790 ret = register_blkdev(0, rbd_dev->name); in rbd_dev_device_setup()
6794 rbd_dev->major = ret; in rbd_dev_device_setup()
6795 rbd_dev->minor = 0; in rbd_dev_device_setup()
6797 rbd_dev->major = rbd_major; in rbd_dev_device_setup()
6798 rbd_dev->minor = rbd_dev_id_to_minor(rbd_dev->dev_id); in rbd_dev_device_setup()
6807 set_capacity(rbd_dev->disk, rbd_dev->mapping.size / SECTOR_SIZE); in rbd_dev_device_setup()
6808 set_disk_ro(rbd_dev->disk, rbd_is_ro(rbd_dev)); in rbd_dev_device_setup()
6810 ret = dev_set_name(&rbd_dev->dev, "%d", rbd_dev->dev_id); in rbd_dev_device_setup()
6814 set_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags); in rbd_dev_device_setup()
6815 up_write(&rbd_dev->header_rwsem); in rbd_dev_device_setup()
6822 unregister_blkdev(rbd_dev->major, rbd_dev->name); in rbd_dev_device_setup()
6824 up_write(&rbd_dev->header_rwsem); in rbd_dev_device_setup()
6830 struct rbd_spec *spec = rbd_dev->spec; in rbd_dev_header_name()
6835 rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); in rbd_dev_header_name()
6836 if (rbd_dev->image_format == 1) in rbd_dev_header_name()
6837 ret = ceph_oid_aprintf(&rbd_dev->header_oid, GFP_KERNEL, "%s%s", in rbd_dev_header_name()
6838 spec->image_name, RBD_SUFFIX); in rbd_dev_header_name()
6840 ret = ceph_oid_aprintf(&rbd_dev->header_oid, GFP_KERNEL, "%s%s", in rbd_dev_header_name()
6841 RBD_HEADER_PREFIX, spec->image_id); in rbd_dev_header_name()
6850 rbd_dev->spec->pool_name, in rbd_print_dne()
6851 rbd_dev->spec->pool_ns ?: "", in rbd_print_dne()
6852 rbd_dev->spec->pool_ns ? "/" : "", in rbd_print_dne()
6853 rbd_dev->spec->image_name); in rbd_print_dne()
6856 rbd_dev->spec->pool_name, in rbd_print_dne()
6857 rbd_dev->spec->pool_ns ?: "", in rbd_print_dne()
6858 rbd_dev->spec->pool_ns ? "/" : "", in rbd_print_dne()
6859 rbd_dev->spec->image_name, in rbd_print_dne()
6860 rbd_dev->spec->snap_name); in rbd_print_dne()
6870 rbd_dev->image_format = 0; in rbd_dev_image_release()
6871 kfree(rbd_dev->spec->image_id); in rbd_dev_image_release()
6872 rbd_dev->spec->image_id = NULL; in rbd_dev_image_release()
6891 * error, rbd_dev->spec->image_id will be filled in with in rbd_dev_image_probe()
6892 * a dynamically-allocated string, and rbd_dev->image_format in rbd_dev_image_probe()
6893 * will be set to either 1 or 2. in rbd_dev_image_probe()
6906 if (ret == -ENOENT) in rbd_dev_image_probe()
6913 down_write(&rbd_dev->header_rwsem); in rbd_dev_image_probe()
6915 ret = rbd_dev_header_info(rbd_dev, &rbd_dev->header, true); in rbd_dev_image_probe()
6917 if (ret == -ENOENT && !need_watch) in rbd_dev_image_probe()
6926 * id, image name and id, and snap name - need to fill snap id. in rbd_dev_image_probe()
6928 * and snap ids - need to fill in names for those ids. in rbd_dev_image_probe()
6935 if (ret == -ENOENT) in rbd_dev_image_probe()
6945 (rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP)) { in rbd_dev_image_probe()
6951 if (rbd_dev->header.features & RBD_FEATURE_LAYERING) { in rbd_dev_image_probe()
6961 dout("discovered format %u image, header name is %s\n", in rbd_dev_image_probe()
6962 rbd_dev->image_format, rbd_dev->header_oid.name); in rbd_dev_image_probe()
6967 up_write(&rbd_dev->header_rwsem); in rbd_dev_image_probe()
6972 rbd_dev->image_format = 0; in rbd_dev_image_probe()
6973 kfree(rbd_dev->spec->image_id); in rbd_dev_image_probe()
6974 rbd_dev->spec->image_id = NULL; in rbd_dev_image_probe()
6981 rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); in rbd_dev_update_header()
6982 rbd_assert(rbd_dev->header.object_prefix); /* !first_time */ in rbd_dev_update_header()
6984 if (rbd_dev->header.image_size != header->image_size) { in rbd_dev_update_header()
6985 rbd_dev->header.image_size = header->image_size; in rbd_dev_update_header()
6988 rbd_dev->mapping.size = header->image_size; in rbd_dev_update_header()
6993 ceph_put_snap_context(rbd_dev->header.snapc); in rbd_dev_update_header()
6994 rbd_dev->header.snapc = header->snapc; in rbd_dev_update_header()
6995 header->snapc = NULL; in rbd_dev_update_header()
6997 if (rbd_dev->image_format == 1) { in rbd_dev_update_header()
6998 kfree(rbd_dev->header.snap_names); in rbd_dev_update_header()
6999 rbd_dev->header.snap_names = header->snap_names; in rbd_dev_update_header()
7000 header->snap_names = NULL; in rbd_dev_update_header()
7002 kfree(rbd_dev->header.snap_sizes); in rbd_dev_update_header()
7003 rbd_dev->header.snap_sizes = header->snap_sizes; in rbd_dev_update_header()
7004 header->snap_sizes = NULL; in rbd_dev_update_header()
7011 if (pii->pool_id == CEPH_NOPOOL || !pii->has_overlap) { in rbd_dev_update_parent()
7025 if (rbd_dev->parent_overlap) { in rbd_dev_update_parent()
7026 rbd_dev->parent_overlap = 0; in rbd_dev_update_parent()
7029 rbd_dev->disk->disk_name); in rbd_dev_update_parent()
7032 rbd_assert(rbd_dev->parent_spec); in rbd_dev_update_parent()
7038 if (!pii->overlap && rbd_dev->parent_overlap) in rbd_dev_update_parent()
7041 rbd_dev->parent_overlap = pii->overlap; in rbd_dev_update_parent()
7051 dout("%s rbd_dev %p\n", __func__, rbd_dev); in rbd_dev_refresh()
7061 if (rbd_dev->parent) { in rbd_dev_refresh()
7067 down_write(&rbd_dev->header_rwsem); in rbd_dev_refresh()
7069 if (rbd_dev->parent) in rbd_dev_refresh()
7071 up_write(&rbd_dev->header_rwsem); in rbd_dev_refresh()
7089 return -EPERM; in do_rbd_add()
7092 return -ENODEV; in do_rbd_add()
7106 rc = ceph_pg_poolid_by_name(rbdc->client->osdc.osdmap, spec->pool_name); in do_rbd_add()
7108 if (rc == -ENOENT) in do_rbd_add()
7109 pr_info("pool %s does not exist\n", spec->pool_name); in do_rbd_add()
7112 spec->pool_id = (u64)rc; in do_rbd_add()
7116 rc = -ENOMEM; in do_rbd_add()
7123 /* if we are mapping a snapshot it will be a read-only mapping */ in do_rbd_add()
7124 if (rbd_dev->opts->read_only || in do_rbd_add()
7125 strcmp(rbd_dev->spec->snap_name, RBD_SNAP_HEAD_NAME)) in do_rbd_add()
7126 __set_bit(RBD_DEV_FLAG_READONLY, &rbd_dev->flags); in do_rbd_add()
7128 rbd_dev->config_info = kstrdup(buf, GFP_KERNEL); in do_rbd_add()
7129 if (!rbd_dev->config_info) { in do_rbd_add()
7130 rc = -ENOMEM; in do_rbd_add()
7138 if (rbd_dev->opts->alloc_size > rbd_dev->layout.object_size) { in do_rbd_add()
7140 rbd_dev->layout.object_size); in do_rbd_add()
7141 rbd_dev->opts->alloc_size = rbd_dev->layout.object_size; in do_rbd_add()
7154 rc = device_add(&rbd_dev->dev); in do_rbd_add()
7158 rc = device_add_disk(&rbd_dev->dev, rbd_dev->disk, NULL); in do_rbd_add()
7163 list_add_tail(&rbd_dev->node, &rbd_dev_list); in do_rbd_add()
7166 pr_info("%s: capacity %llu features 0x%llx\n", rbd_dev->disk->disk_name, in do_rbd_add()
7167 (unsigned long long)get_capacity(rbd_dev->disk) << SECTOR_SHIFT, in do_rbd_add()
7168 rbd_dev->header.features); in do_rbd_add()
7194 return -EINVAL; in add_store()
7207 while (rbd_dev->parent) { in rbd_dev_remove_parent()
7209 struct rbd_device *second = first->parent; in rbd_dev_remove_parent()
7216 while (second && (third = second->parent)) { in rbd_dev_remove_parent()
7223 first->parent = NULL; in rbd_dev_remove_parent()
7224 first->parent_overlap = 0; in rbd_dev_remove_parent()
7226 rbd_assert(first->parent_spec); in rbd_dev_remove_parent()
7227 rbd_spec_put(first->parent_spec); in rbd_dev_remove_parent()
7228 first->parent_spec = NULL; in rbd_dev_remove_parent()
7241 return -EPERM; in do_rbd_remove()
7243 dev_id = -1; in do_rbd_remove()
7248 return -EINVAL; in do_rbd_remove()
7255 return -EINVAL; in do_rbd_remove()
7259 ret = -ENOENT; in do_rbd_remove()
7262 if (rbd_dev->dev_id == dev_id) { in do_rbd_remove()
7268 spin_lock_irq(&rbd_dev->lock); in do_rbd_remove()
7269 if (rbd_dev->open_count && !force) in do_rbd_remove()
7270 ret = -EBUSY; in do_rbd_remove()
7272 &rbd_dev->flags)) in do_rbd_remove()
7273 ret = -EINPROGRESS; in do_rbd_remove()
7274 spin_unlock_irq(&rbd_dev->lock); in do_rbd_remove()
7285 blk_mq_freeze_queue(rbd_dev->disk->queue); in do_rbd_remove()
7286 blk_mark_disk_dead(rbd_dev->disk); in do_rbd_remove()
7289 del_gendisk(rbd_dev->disk); in do_rbd_remove()
7291 list_del_init(&rbd_dev->node); in do_rbd_remove()
7293 device_del(&rbd_dev->dev); in do_rbd_remove()
7305 return -EINVAL; in remove_store()
7348 return -ENOMEM; in rbd_slab_init()
7360 return -ENOMEM; in rbd_slab_init()
7380 return -EINVAL; in rbd_init()
7389 * rbd devices * queue depth, so leave @max_active at default. in rbd_init()
7393 rc = -ENOMEM; in rbd_init()