summaryrefslogtreecommitdiff
path: root/drivers/block/rbd.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-07-02 21:35:00 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2015-07-02 21:35:00 +0300
commit0c76c6ba246043bbc5c0f9620a0645ae78217421 (patch)
tree644a4db58706c4e97478951f0a3a0087ddf26e5e /drivers/block/rbd.c
parent8688d9540cc6e17df4cba71615e27f04e0378fe6 (diff)
parent5a60e87603c4c533492c515b7f62578189b03c9c (diff)
downloadlinux-0c76c6ba246043bbc5c0f9620a0645ae78217421.tar.xz
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
Pull Ceph updates from Sage Weil: "We have a pile of bug fixes from Ilya, including a few patches that sync up the CRUSH code with the latest from userspace. There is also a long series from Zheng that fixes various issues with snapshots, inline data, and directory fsync, some simplification and improvement in the cap release code, and a rework of the caching of directory contents. To top it off there are a few small fixes and cleanups from Benoit and Hong" * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: (40 commits) rbd: use GFP_NOIO in rbd_obj_request_create() crush: fix a bug in tree bucket decode libceph: Fix ceph_tcp_sendpage()'s more boolean usage libceph: Remove spurious kunmap() of the zero page rbd: queue_depth map option rbd: store rbd_options in rbd_device rbd: terminate rbd_opts_tokens with Opt_err ceph: fix ceph_writepages_start() rbd: bump queue_max_segments ceph: rework dcache readdir crush: sync up with userspace crush: fix crash from invalid 'take' argument ceph: switch some GFP_NOFS memory allocation to GFP_KERNEL ceph: pre-allocate data structure that tracks caps flushing ceph: re-send flushing caps (which are revoked) in reconnect stage ceph: send TID of the oldest pending caps flush to MDS ceph: track pending caps flushing globally ceph: track pending caps flushing accurately libceph: fix wrong name "Ceph filesystem for Linux" ceph: fix directory fsync ...
Diffstat (limited to 'drivers/block/rbd.c')
-rw-r--r--drivers/block/rbd.c111
1 files changed, 69 insertions, 42 deletions
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index ec6c5c6e1ac9..d94529d5c8e9 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -346,6 +346,7 @@ struct rbd_device {
struct rbd_image_header header;
unsigned long flags; /* possibly lock protected */
struct rbd_spec *spec;
+ struct rbd_options *opts;
char *header_name;
@@ -724,34 +725,36 @@ static struct rbd_client *rbd_client_find(struct ceph_options *ceph_opts)
}
/*
- * mount options
+ * (Per device) rbd map options
*/
enum {
+ Opt_queue_depth,
Opt_last_int,
/* int args above */
Opt_last_string,
/* string args above */
Opt_read_only,
Opt_read_write,
- /* Boolean args above */
- Opt_last_bool,
+ Opt_err
};
static match_table_t rbd_opts_tokens = {
+ {Opt_queue_depth, "queue_depth=%d"},
/* int args above */
/* string args above */
{Opt_read_only, "read_only"},
{Opt_read_only, "ro"}, /* Alternate spelling */
{Opt_read_write, "read_write"},
{Opt_read_write, "rw"}, /* Alternate spelling */
- /* Boolean args above */
- {-1, NULL}
+ {Opt_err, NULL}
};
struct rbd_options {
+ int queue_depth;
bool read_only;
};
+#define RBD_QUEUE_DEPTH_DEFAULT BLKDEV_MAX_RQ
#define RBD_READ_ONLY_DEFAULT false
static int parse_rbd_opts_token(char *c, void *private)
@@ -761,27 +764,27 @@ static int parse_rbd_opts_token(char *c, void *private)
int token, intval, ret;
token = match_token(c, rbd_opts_tokens, argstr);
- if (token < 0)
- return -EINVAL;
-
if (token < Opt_last_int) {
ret = match_int(&argstr[0], &intval);
if (ret < 0) {
- pr_err("bad mount option arg (not int) "
- "at '%s'\n", c);
+ pr_err("bad mount option arg (not int) at '%s'\n", c);
return ret;
}
dout("got int token %d val %d\n", token, intval);
} else if (token > Opt_last_int && token < Opt_last_string) {
- dout("got string token %d val %s\n", token,
- argstr[0].from);
- } else if (token > Opt_last_string && token < Opt_last_bool) {
- dout("got Boolean token %d\n", token);
+ dout("got string token %d val %s\n", token, argstr[0].from);
} else {
dout("got token %d\n", token);
}
switch (token) {
+ case Opt_queue_depth:
+ if (intval < 1) {
+ pr_err("queue_depth out of range\n");
+ return -EINVAL;
+ }
+ rbd_opts->queue_depth = intval;
+ break;
case Opt_read_only:
rbd_opts->read_only = true;
break;
@@ -789,9 +792,10 @@ static int parse_rbd_opts_token(char *c, void *private)
rbd_opts->read_only = false;
break;
default:
- rbd_assert(false);
- break;
+ /* libceph prints "bad option" msg */
+ return -EINVAL;
}
+
return 0;
}
@@ -1563,22 +1567,39 @@ static void rbd_obj_request_end(struct rbd_obj_request *obj_request)
/*
* Wait for an object request to complete. If interrupted, cancel the
* underlying osd request.
+ *
+ * @timeout: in jiffies, 0 means "wait forever"
*/
-static int rbd_obj_request_wait(struct rbd_obj_request *obj_request)
+static int __rbd_obj_request_wait(struct rbd_obj_request *obj_request,
+ unsigned long timeout)
{
- int ret;
+ long ret;
dout("%s %p\n", __func__, obj_request);
-
- ret = wait_for_completion_interruptible(&obj_request->completion);
- if (ret < 0) {
- dout("%s %p interrupted\n", __func__, obj_request);
+ ret = wait_for_completion_interruptible_timeout(
+ &obj_request->completion,
+ ceph_timeout_jiffies(timeout));
+ if (ret <= 0) {
+ if (ret == 0)
+ ret = -ETIMEDOUT;
rbd_obj_request_end(obj_request);
- return ret;
+ } else {
+ ret = 0;
}
- dout("%s %p done\n", __func__, obj_request);
- return 0;
+ dout("%s %p ret %d\n", __func__, obj_request, (int)ret);
+ return ret;
+}
+
+static int rbd_obj_request_wait(struct rbd_obj_request *obj_request)
+{
+ return __rbd_obj_request_wait(obj_request, 0);
+}
+
+static int rbd_obj_request_wait_timeout(struct rbd_obj_request *obj_request,
+ unsigned long timeout)
+{
+ return __rbd_obj_request_wait(obj_request, timeout);
}
static void rbd_img_request_complete(struct rbd_img_request *img_request)
@@ -2001,11 +2022,11 @@ static struct rbd_obj_request *rbd_obj_request_create(const char *object_name,
rbd_assert(obj_request_type_valid(type));
size = strlen(object_name) + 1;
- name = kmalloc(size, GFP_KERNEL);
+ name = kmalloc(size, GFP_NOIO);
if (!name)
return NULL;
- obj_request = kmem_cache_zalloc(rbd_obj_request_cache, GFP_KERNEL);
+ obj_request = kmem_cache_zalloc(rbd_obj_request_cache, GFP_NOIO);
if (!obj_request) {
kfree(name);
return NULL;
@@ -2376,7 +2397,7 @@ static void rbd_img_obj_request_fill(struct rbd_obj_request *obj_request,
}
if (opcode == CEPH_OSD_OP_DELETE)
- osd_req_op_init(osd_request, num_ops, opcode);
+ osd_req_op_init(osd_request, num_ops, opcode, 0);
else
osd_req_op_extent_init(osd_request, num_ops, opcode,
offset, length, 0, 0);
@@ -2848,7 +2869,7 @@ static int rbd_img_obj_exists_submit(struct rbd_obj_request *obj_request)
goto out;
stat_request->callback = rbd_img_obj_exists_callback;
- osd_req_op_init(stat_request->osd_req, 0, CEPH_OSD_OP_STAT);
+ osd_req_op_init(stat_request->osd_req, 0, CEPH_OSD_OP_STAT, 0);
osd_req_op_raw_data_in_pages(stat_request->osd_req, 0, pages, size, 0,
false, false);
rbd_osd_req_format_read(stat_request);
@@ -3122,6 +3143,7 @@ static struct rbd_obj_request *rbd_obj_watch_request_helper(
bool watch)
{
struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
+ struct ceph_options *opts = osdc->client->options;
struct rbd_obj_request *obj_request;
int ret;
@@ -3148,7 +3170,7 @@ static struct rbd_obj_request *rbd_obj_watch_request_helper(
if (ret)
goto out;
- ret = rbd_obj_request_wait(obj_request);
+ ret = rbd_obj_request_wait_timeout(obj_request, opts->mount_timeout);
if (ret)
goto out;
@@ -3750,10 +3772,9 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
memset(&rbd_dev->tag_set, 0, sizeof(rbd_dev->tag_set));
rbd_dev->tag_set.ops = &rbd_mq_ops;
- rbd_dev->tag_set.queue_depth = BLKDEV_MAX_RQ;
+ rbd_dev->tag_set.queue_depth = rbd_dev->opts->queue_depth;
rbd_dev->tag_set.numa_node = NUMA_NO_NODE;
- rbd_dev->tag_set.flags =
- BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
+ rbd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
rbd_dev->tag_set.nr_hw_queues = 1;
rbd_dev->tag_set.cmd_size = sizeof(struct work_struct);
@@ -3773,6 +3794,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
/* set io sizes to object size */
segment_size = rbd_obj_bytes(&rbd_dev->header);
blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE);
+ blk_queue_max_segments(q, segment_size / SECTOR_SIZE);
blk_queue_max_segment_size(q, segment_size);
blk_queue_io_min(q, segment_size);
blk_queue_io_opt(q, segment_size);
@@ -4044,7 +4066,8 @@ static void rbd_spec_free(struct kref *kref)
}
static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc,
- struct rbd_spec *spec)
+ struct rbd_spec *spec,
+ struct rbd_options *opts)
{
struct rbd_device *rbd_dev;
@@ -4058,8 +4081,9 @@ static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc,
INIT_LIST_HEAD(&rbd_dev->node);
init_rwsem(&rbd_dev->header_rwsem);
- rbd_dev->spec = spec;
rbd_dev->rbd_client = rbdc;
+ rbd_dev->spec = spec;
+ rbd_dev->opts = opts;
/* Initialize the layout used for all rbd requests */
@@ -4075,6 +4099,7 @@ static void rbd_dev_destroy(struct rbd_device *rbd_dev)
{
rbd_put_client(rbd_dev->rbd_client);
rbd_spec_put(rbd_dev->spec);
+ kfree(rbd_dev->opts);
kfree(rbd_dev);
}
@@ -4933,6 +4958,7 @@ static int rbd_add_parse_args(const char *buf,
goto out_mem;
rbd_opts->read_only = RBD_READ_ONLY_DEFAULT;
+ rbd_opts->queue_depth = RBD_QUEUE_DEPTH_DEFAULT;
copts = ceph_parse_options(options, mon_addrs,
mon_addrs + mon_addrs_size - 1,
@@ -4963,8 +4989,8 @@ out_err:
*/
static int rbd_add_get_pool_id(struct rbd_client *rbdc, const char *pool_name)
{
+ struct ceph_options *opts = rbdc->client->options;
u64 newest_epoch;
- unsigned long timeout = rbdc->client->options->mount_timeout * HZ;
int tries = 0;
int ret;
@@ -4979,7 +5005,8 @@ again:
if (rbdc->client->osdc.osdmap->epoch < newest_epoch) {
ceph_monc_request_next_osdmap(&rbdc->client->monc);
(void) ceph_monc_wait_osdmap(&rbdc->client->monc,
- newest_epoch, timeout);
+ newest_epoch,
+ opts->mount_timeout);
goto again;
} else {
/* the osdmap we have is new enough */
@@ -5148,7 +5175,7 @@ static int rbd_dev_probe_parent(struct rbd_device *rbd_dev)
rbdc = __rbd_get_client(rbd_dev->rbd_client);
ret = -ENOMEM;
- parent = rbd_dev_create(rbdc, parent_spec);
+ parent = rbd_dev_create(rbdc, parent_spec, NULL);
if (!parent)
goto out_err;
@@ -5394,9 +5421,6 @@ static ssize_t do_rbd_add(struct bus_type *bus,
rc = rbd_add_parse_args(buf, &ceph_opts, &rbd_opts, &spec);
if (rc < 0)
goto err_out_module;
- read_only = rbd_opts->read_only;
- kfree(rbd_opts);
- rbd_opts = NULL; /* done with this */
rbdc = rbd_get_client(ceph_opts);
if (IS_ERR(rbdc)) {
@@ -5422,11 +5446,12 @@ static ssize_t do_rbd_add(struct bus_type *bus,
goto err_out_client;
}
- rbd_dev = rbd_dev_create(rbdc, spec);
+ rbd_dev = rbd_dev_create(rbdc, spec, rbd_opts);
if (!rbd_dev)
goto err_out_client;
rbdc = NULL; /* rbd_dev now owns this */
spec = NULL; /* rbd_dev now owns this */
+ rbd_opts = NULL; /* rbd_dev now owns this */
rc = rbd_dev_image_probe(rbd_dev, true);
if (rc < 0)
@@ -5434,6 +5459,7 @@ static ssize_t do_rbd_add(struct bus_type *bus,
/* If we are mapping a snapshot it must be marked read-only */
+ read_only = rbd_dev->opts->read_only;
if (rbd_dev->spec->snap_id != CEPH_NOSNAP)
read_only = true;
rbd_dev->mapping.read_only = read_only;
@@ -5458,6 +5484,7 @@ err_out_client:
rbd_put_client(rbdc);
err_out_args:
rbd_spec_put(spec);
+ kfree(rbd_opts);
err_out_module:
module_put(THIS_MODULE);