summaryrefslogtreecommitdiff
path: root/fs/nfs/blocklayout
diff options
context:
space:
mode:
Diffstat (limited to 'fs/nfs/blocklayout')
-rw-r--r--fs/nfs/blocklayout/blocklayout.c42
-rw-r--r--fs/nfs/blocklayout/blocklayout.h11
-rw-r--r--fs/nfs/blocklayout/dev.c174
-rw-r--r--fs/nfs/blocklayout/extent_tree.c20
-rw-r--r--fs/nfs/blocklayout/rpc_pipefs.c2
5 files changed, 172 insertions, 77 deletions
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 6be13e0ec170..5d6edafbed20 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -149,8 +149,8 @@ do_add_page_to_bio(struct bio *bio, int npg, enum req_op op, sector_t isect,
/* limit length to what the device mapping allows */
end = disk_addr + *len;
- if (end >= map->start + map->len)
- *len = map->start + map->len - disk_addr;
+ if (end >= map->disk_offset + map->len)
+ *len = map->disk_offset + map->len - disk_addr;
retry:
if (!bio) {
@@ -564,25 +564,45 @@ bl_find_get_deviceid(struct nfs_server *server,
gfp_t gfp_mask)
{
struct nfs4_deviceid_node *node;
- unsigned long start, end;
+ int err = -ENODEV;
retry:
node = nfs4_find_get_deviceid(server, id, cred, gfp_mask);
if (!node)
return ERR_PTR(-ENODEV);
- if (test_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags) == 0)
- return node;
+ /*
+ * Devices that are marked unavailable are left in the cache with a
+ * timeout to avoid sending GETDEVINFO after every LAYOUTGET, or
+ * constantly attempting to register the device. Once marked as
+ * unavailable they must be deleted and never reused.
+ */
+ if (test_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags)) {
+ unsigned long end = jiffies;
+ unsigned long start = end - PNFS_DEVICE_RETRY_TIMEOUT;
+
+ if (!time_in_range(node->timestamp_unavailable, start, end)) {
+ /* Uncork subsequent GETDEVINFO operations for this device */
+ nfs4_delete_deviceid(node->ld, node->nfs_client, id);
+ goto retry;
+ }
+ goto out_put;
+ }
- end = jiffies;
- start = end - PNFS_DEVICE_RETRY_TIMEOUT;
- if (!time_in_range(node->timestamp_unavailable, start, end)) {
- nfs4_delete_deviceid(node->ld, node->nfs_client, id);
- goto retry;
+ if (!bl_register_dev(container_of(node, struct pnfs_block_dev, node))) {
+ /*
+ * If we cannot register, treat this device as transient:
+ * Make a negative cache entry for the device
+ */
+ nfs4_mark_deviceid_unavailable(node);
+ goto out_put;
}
+ return node;
+
+out_put:
nfs4_put_deviceid_node(node);
- return ERR_PTR(-ENODEV);
+ return ERR_PTR(err);
}
static int
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index 716bc75e9ed2..6da40ca19570 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -104,20 +104,26 @@ struct pnfs_block_dev {
u64 start;
u64 len;
+ enum pnfs_block_volume_type type;
u32 nr_children;
struct pnfs_block_dev *children;
u64 chunk_size;
- struct block_device *bdev;
+ struct file *bdev_file;
u64 disk_offset;
+ unsigned long flags;
u64 pr_key;
- bool pr_registered;
bool (*map)(struct pnfs_block_dev *dev, u64 offset,
struct pnfs_block_dev_map *map);
};
+/* pnfs_block_dev flag bits */
+enum {
+ PNFS_BDEV_REGISTERED = 0,
+};
+
/* sector_t fields are all in 512-byte sectors */
struct pnfs_block_extent {
union {
@@ -172,6 +178,7 @@ struct bl_msg_hdr {
#define BL_DEVICE_REQUEST_ERR 0x2 /* User level process fails */
/* dev.c */
+bool bl_register_dev(struct pnfs_block_dev *d);
struct nfs4_deviceid_node *bl_alloc_deviceid_node(struct nfs_server *server,
struct pnfs_device *pdev, gfp_t gfp_mask);
void bl_free_deviceid_node(struct nfs4_deviceid_node *d);
diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c
index 65cbb5607a5f..44306ac22353 100644
--- a/fs/nfs/blocklayout/dev.c
+++ b/fs/nfs/blocklayout/dev.c
@@ -10,12 +10,81 @@
#include <linux/pr.h>
#include "blocklayout.h"
+#include "../nfs4trace.h"
#define NFSDBG_FACILITY NFSDBG_PNFS_LD
+static void bl_unregister_scsi(struct pnfs_block_dev *dev)
+{
+ struct block_device *bdev = file_bdev(dev->bdev_file);
+ const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
+ int status;
+
+ status = ops->pr_register(bdev, dev->pr_key, 0, false);
+ if (status)
+ trace_bl_pr_key_unreg_err(bdev, dev->pr_key, status);
+ else
+ trace_bl_pr_key_unreg(bdev, dev->pr_key);
+}
+
+static bool bl_register_scsi(struct pnfs_block_dev *dev)
+{
+ struct block_device *bdev = file_bdev(dev->bdev_file);
+ const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
+ int status;
+
+ if (test_and_set_bit(PNFS_BDEV_REGISTERED, &dev->flags))
+ return true;
+
+ status = ops->pr_register(bdev, 0, dev->pr_key, true);
+ if (status) {
+ trace_bl_pr_key_reg_err(bdev, dev->pr_key, status);
+ return false;
+ }
+ trace_bl_pr_key_reg(bdev, dev->pr_key);
+ return true;
+}
+
+static void bl_unregister_dev(struct pnfs_block_dev *dev)
+{
+ u32 i;
+
+ if (dev->nr_children) {
+ for (i = 0; i < dev->nr_children; i++)
+ bl_unregister_dev(&dev->children[i]);
+ return;
+ }
+
+ if (dev->type == PNFS_BLOCK_VOLUME_SCSI &&
+ test_and_clear_bit(PNFS_BDEV_REGISTERED, &dev->flags))
+ bl_unregister_scsi(dev);
+}
+
+bool bl_register_dev(struct pnfs_block_dev *dev)
+{
+ u32 i;
+
+ if (dev->nr_children) {
+ for (i = 0; i < dev->nr_children; i++) {
+ if (!bl_register_dev(&dev->children[i])) {
+ while (i > 0)
+ bl_unregister_dev(&dev->children[--i]);
+ return false;
+ }
+ }
+ return true;
+ }
+
+ if (dev->type == PNFS_BLOCK_VOLUME_SCSI)
+ return bl_register_scsi(dev);
+ return true;
+}
+
static void
bl_free_device(struct pnfs_block_dev *dev)
{
+ bl_unregister_dev(dev);
+
if (dev->nr_children) {
int i;
@@ -23,19 +92,8 @@ bl_free_device(struct pnfs_block_dev *dev)
bl_free_device(&dev->children[i]);
kfree(dev->children);
} else {
- if (dev->pr_registered) {
- const struct pr_ops *ops =
- dev->bdev->bd_disk->fops->pr_ops;
- int error;
-
- error = ops->pr_register(dev->bdev, dev->pr_key, 0,
- false);
- if (error)
- pr_err("failed to unregister PR key.\n");
- }
-
- if (dev->bdev)
- blkdev_put(dev->bdev, NULL);
+ if (dev->bdev_file)
+ fput(dev->bdev_file);
}
}
@@ -169,7 +227,7 @@ static bool bl_map_simple(struct pnfs_block_dev *dev, u64 offset,
map->start = dev->start;
map->len = dev->len;
map->disk_offset = dev->disk_offset;
- map->bdev = dev->bdev;
+ map->bdev = file_bdev(dev->bdev_file);
return true;
}
@@ -199,10 +257,11 @@ static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset,
struct pnfs_block_dev *child;
u64 chunk;
u32 chunk_idx;
+ u64 disk_chunk;
u64 disk_offset;
chunk = div_u64(offset, dev->chunk_size);
- div_u64_rem(chunk, dev->nr_children, &chunk_idx);
+ disk_chunk = div_u64_rem(chunk, dev->nr_children, &chunk_idx);
if (chunk_idx >= dev->nr_children) {
dprintk("%s: invalid chunk idx %d (%lld/%lld)\n",
@@ -215,7 +274,7 @@ static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset,
offset = chunk * dev->chunk_size;
/* disk offset of the stripe */
- disk_offset = div_u64(offset, dev->nr_children);
+ disk_offset = disk_chunk * dev->chunk_size;
child = &dev->children[chunk_idx];
child->map(child, disk_offset, map);
@@ -236,28 +295,26 @@ bl_parse_simple(struct nfs_server *server, struct pnfs_block_dev *d,
struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
{
struct pnfs_block_volume *v = &volumes[idx];
- struct block_device *bdev;
+ struct file *bdev_file;
dev_t dev;
dev = bl_resolve_deviceid(server, v, gfp_mask);
if (!dev)
return -EIO;
- bdev = blkdev_get_by_dev(dev, BLK_OPEN_READ | BLK_OPEN_WRITE, NULL,
- NULL);
- if (IS_ERR(bdev)) {
+ bdev_file = bdev_file_open_by_dev(dev, BLK_OPEN_READ | BLK_OPEN_WRITE,
+ NULL, NULL);
+ if (IS_ERR(bdev_file)) {
printk(KERN_WARNING "pNFS: failed to open device %d:%d (%ld)\n",
- MAJOR(dev), MINOR(dev), PTR_ERR(bdev));
- return PTR_ERR(bdev);
+ MAJOR(dev), MINOR(dev), PTR_ERR(bdev_file));
+ return PTR_ERR(bdev_file);
}
- d->bdev = bdev;
-
-
- d->len = bdev_nr_bytes(d->bdev);
+ d->bdev_file = bdev_file;
+ d->len = bdev_nr_bytes(file_bdev(bdev_file));
d->map = bl_map_simple;
printk(KERN_INFO "pNFS: using block device %s\n",
- d->bdev->bd_disk->disk_name);
+ file_bdev(bdev_file)->bd_disk->disk_name);
return 0;
}
@@ -302,10 +359,10 @@ bl_validate_designator(struct pnfs_block_volume *v)
}
}
-static struct block_device *
+static struct file *
bl_open_path(struct pnfs_block_volume *v, const char *prefix)
{
- struct block_device *bdev;
+ struct file *bdev_file;
const char *devname;
devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/%s%*phN",
@@ -313,15 +370,15 @@ bl_open_path(struct pnfs_block_volume *v, const char *prefix)
if (!devname)
return ERR_PTR(-ENOMEM);
- bdev = blkdev_get_by_path(devname, BLK_OPEN_READ | BLK_OPEN_WRITE, NULL,
- NULL);
- if (IS_ERR(bdev)) {
- pr_warn("pNFS: failed to open device %s (%ld)\n",
- devname, PTR_ERR(bdev));
+ bdev_file = bdev_file_open_by_path(devname, BLK_OPEN_READ | BLK_OPEN_WRITE,
+ NULL, NULL);
+ if (IS_ERR(bdev_file)) {
+ dprintk("failed to open device %s (%ld)\n",
+ devname, PTR_ERR(bdev_file));
}
kfree(devname);
- return bdev;
+ return bdev_file;
}
static int
@@ -331,6 +388,7 @@ bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d,
struct pnfs_block_volume *v = &volumes[idx];
struct block_device *bdev;
const struct pr_ops *ops;
+ struct file *bdev_file;
int error;
if (!bl_validate_designator(v))
@@ -342,40 +400,38 @@ bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d,
* On other distributions like Debian, the default SCSI by-id path will
* point to the dm-multipath device if one exists.
*/
- bdev = bl_open_path(v, "dm-uuid-mpath-0x");
- if (IS_ERR(bdev))
- bdev = bl_open_path(v, "wwn-0x");
- if (IS_ERR(bdev))
- return PTR_ERR(bdev);
- d->bdev = bdev;
-
- d->len = bdev_nr_bytes(d->bdev);
+ bdev_file = bl_open_path(v, "dm-uuid-mpath-0x");
+ if (IS_ERR(bdev_file))
+ bdev_file = bl_open_path(v, "wwn-0x");
+ if (IS_ERR(bdev_file))
+ bdev_file = bl_open_path(v, "nvme-eui.");
+ if (IS_ERR(bdev_file)) {
+ pr_warn("pNFS: no device found for volume %*phN\n",
+ v->scsi.designator_len, v->scsi.designator);
+ return PTR_ERR(bdev_file);
+ }
+ d->bdev_file = bdev_file;
+ bdev = file_bdev(bdev_file);
+
+ d->len = bdev_nr_bytes(bdev);
d->map = bl_map_simple;
d->pr_key = v->scsi.pr_key;
- pr_info("pNFS: using block device %s (reservation key 0x%llx)\n",
- d->bdev->bd_disk->disk_name, d->pr_key);
+ if (d->len == 0)
+ return -ENODEV;
- ops = d->bdev->bd_disk->fops->pr_ops;
+ ops = bdev->bd_disk->fops->pr_ops;
if (!ops) {
pr_err("pNFS: block device %s does not support reservations.",
- d->bdev->bd_disk->disk_name);
+ bdev->bd_disk->disk_name);
error = -EINVAL;
goto out_blkdev_put;
}
- error = ops->pr_register(d->bdev, 0, d->pr_key, true);
- if (error) {
- pr_err("pNFS: failed to register key for block device %s.",
- d->bdev->bd_disk->disk_name);
- goto out_blkdev_put;
- }
-
- d->pr_registered = true;
return 0;
out_blkdev_put:
- blkdev_put(d->bdev, NULL);
+ fput(d->bdev_file);
return error;
}
@@ -457,7 +513,9 @@ static int
bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
{
- switch (volumes[idx].type) {
+ d->type = volumes[idx].type;
+
+ switch (d->type) {
case PNFS_BLOCK_VOLUME_SIMPLE:
return bl_parse_simple(server, d, volumes, idx, gfp_mask);
case PNFS_BLOCK_VOLUME_SLICE:
@@ -469,7 +527,7 @@ bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
case PNFS_BLOCK_VOLUME_SCSI:
return bl_parse_scsi(server, d, volumes, idx, gfp_mask);
default:
- dprintk("unsupported volume type: %d\n", volumes[idx].type);
+ dprintk("unsupported volume type: %d\n", d->type);
return -EIO;
}
}
diff --git a/fs/nfs/blocklayout/extent_tree.c b/fs/nfs/blocklayout/extent_tree.c
index 8f7cff7a4293..0add0f329816 100644
--- a/fs/nfs/blocklayout/extent_tree.c
+++ b/fs/nfs/blocklayout/extent_tree.c
@@ -552,6 +552,15 @@ static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p,
return ret;
}
+/**
+ * ext_tree_prepare_commit - encode extents that need to be committed
+ * @arg: layout commit data
+ *
+ * Return values:
+ * %0: Success, all required extents are encoded
+ * %-ENOSPC: Some extents are encoded, but not all, due to RPC size limit
+ * %-ENOMEM: Out of memory, extents not encoded
+ */
int
ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg)
{
@@ -568,12 +577,12 @@ ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg)
start_p = page_address(arg->layoutupdate_page);
arg->layoutupdate_pages = &arg->layoutupdate_page;
-retry:
- ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size, &count, &arg->lastbytewritten);
+ ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size,
+ &count, &arg->lastbytewritten);
if (unlikely(ret)) {
ext_tree_free_commitdata(arg, buffer_size);
- buffer_size = ext_tree_layoutupdate_size(bl, count);
+ buffer_size = NFS_SERVER(arg->inode)->wsize;
count = 0;
arg->layoutupdate_pages =
@@ -588,7 +597,8 @@ retry:
return -ENOMEM;
}
- goto retry;
+ ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size,
+ &count, &arg->lastbytewritten);
}
*start_p = cpu_to_be32(count);
@@ -608,7 +618,7 @@ retry:
}
dprintk("%s found %zu ranges\n", __func__, count);
- return 0;
+ return ret;
}
void
diff --git a/fs/nfs/blocklayout/rpc_pipefs.c b/fs/nfs/blocklayout/rpc_pipefs.c
index 6c977288cc28..d8d50a88de04 100644
--- a/fs/nfs/blocklayout/rpc_pipefs.c
+++ b/fs/nfs/blocklayout/rpc_pipefs.c
@@ -75,7 +75,7 @@ bl_resolve_deviceid(struct nfs_server *server, struct pnfs_block_volume *b,
msg->len = sizeof(*bl_msg) + b->simple.len;
msg->data = kzalloc(msg->len, gfp_mask);
if (!msg->data)
- goto out_free_data;
+ goto out_unlock;
bl_msg = msg->data;
bl_msg->type = BL_DEVICE_MOUNT;