diff options
Diffstat (limited to 'fs/nfs/blocklayout')
-rw-r--r-- | fs/nfs/blocklayout/blocklayout.c | 42 | ||||
-rw-r--r-- | fs/nfs/blocklayout/blocklayout.h | 11 | ||||
-rw-r--r-- | fs/nfs/blocklayout/dev.c | 174 | ||||
-rw-r--r-- | fs/nfs/blocklayout/extent_tree.c | 20 | ||||
-rw-r--r-- | fs/nfs/blocklayout/rpc_pipefs.c | 2 |
5 files changed, 172 insertions, 77 deletions
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 6be13e0ec170..5d6edafbed20 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -149,8 +149,8 @@ do_add_page_to_bio(struct bio *bio, int npg, enum req_op op, sector_t isect, /* limit length to what the device mapping allows */ end = disk_addr + *len; - if (end >= map->start + map->len) - *len = map->start + map->len - disk_addr; + if (end >= map->disk_offset + map->len) + *len = map->disk_offset + map->len - disk_addr; retry: if (!bio) { @@ -564,25 +564,45 @@ bl_find_get_deviceid(struct nfs_server *server, gfp_t gfp_mask) { struct nfs4_deviceid_node *node; - unsigned long start, end; + int err = -ENODEV; retry: node = nfs4_find_get_deviceid(server, id, cred, gfp_mask); if (!node) return ERR_PTR(-ENODEV); - if (test_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags) == 0) - return node; + /* + * Devices that are marked unavailable are left in the cache with a + * timeout to avoid sending GETDEVINFO after every LAYOUTGET, or + * constantly attempting to register the device. Once marked as + * unavailable they must be deleted and never reused. + */ + if (test_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags)) { + unsigned long end = jiffies; + unsigned long start = end - PNFS_DEVICE_RETRY_TIMEOUT; + + if (!time_in_range(node->timestamp_unavailable, start, end)) { + /* Uncork subsequent GETDEVINFO operations for this device */ + nfs4_delete_deviceid(node->ld, node->nfs_client, id); + goto retry; + } + goto out_put; + } - end = jiffies; - start = end - PNFS_DEVICE_RETRY_TIMEOUT; - if (!time_in_range(node->timestamp_unavailable, start, end)) { - nfs4_delete_deviceid(node->ld, node->nfs_client, id); - goto retry; + if (!bl_register_dev(container_of(node, struct pnfs_block_dev, node))) { + /* + * If we cannot register, treat this device as transient: + * Make a negative cache entry for the device + */ + nfs4_mark_deviceid_unavailable(node); + goto out_put; } + return node; + +out_put: nfs4_put_deviceid_node(node); - return ERR_PTR(-ENODEV); + return ERR_PTR(err); } static int diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h index 716bc75e9ed2..6da40ca19570 100644 --- a/fs/nfs/blocklayout/blocklayout.h +++ b/fs/nfs/blocklayout/blocklayout.h @@ -104,20 +104,26 @@ struct pnfs_block_dev { u64 start; u64 len; + enum pnfs_block_volume_type type; u32 nr_children; struct pnfs_block_dev *children; u64 chunk_size; - struct block_device *bdev; + struct file *bdev_file; u64 disk_offset; + unsigned long flags; u64 pr_key; - bool pr_registered; bool (*map)(struct pnfs_block_dev *dev, u64 offset, struct pnfs_block_dev_map *map); }; +/* pnfs_block_dev flag bits */ +enum { + PNFS_BDEV_REGISTERED = 0, +}; + /* sector_t fields are all in 512-byte sectors */ struct pnfs_block_extent { union { @@ -172,6 +178,7 @@ struct bl_msg_hdr { #define BL_DEVICE_REQUEST_ERR 0x2 /* User level process fails */ /* dev.c */ +bool bl_register_dev(struct pnfs_block_dev *d); struct nfs4_deviceid_node *bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, gfp_t gfp_mask); void bl_free_deviceid_node(struct nfs4_deviceid_node *d); diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c index 65cbb5607a5f..44306ac22353 100644 --- a/fs/nfs/blocklayout/dev.c +++ b/fs/nfs/blocklayout/dev.c @@ -10,12 +10,81 @@ #include <linux/pr.h> #include "blocklayout.h" +#include "../nfs4trace.h" #define NFSDBG_FACILITY NFSDBG_PNFS_LD +static void bl_unregister_scsi(struct pnfs_block_dev *dev) +{ + struct block_device *bdev = file_bdev(dev->bdev_file); + const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; + int status; + + status = ops->pr_register(bdev, dev->pr_key, 0, false); + if (status) + trace_bl_pr_key_unreg_err(bdev, dev->pr_key, status); + else + trace_bl_pr_key_unreg(bdev, dev->pr_key); +} + +static bool bl_register_scsi(struct pnfs_block_dev *dev) +{ + struct block_device *bdev = file_bdev(dev->bdev_file); + const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; + int status; + + if (test_and_set_bit(PNFS_BDEV_REGISTERED, &dev->flags)) + return true; + + status = ops->pr_register(bdev, 0, dev->pr_key, true); + if (status) { + trace_bl_pr_key_reg_err(bdev, dev->pr_key, status); + return false; + } + trace_bl_pr_key_reg(bdev, dev->pr_key); + return true; +} + +static void bl_unregister_dev(struct pnfs_block_dev *dev) +{ + u32 i; + + if (dev->nr_children) { + for (i = 0; i < dev->nr_children; i++) + bl_unregister_dev(&dev->children[i]); + return; + } + + if (dev->type == PNFS_BLOCK_VOLUME_SCSI && + test_and_clear_bit(PNFS_BDEV_REGISTERED, &dev->flags)) + bl_unregister_scsi(dev); +} + +bool bl_register_dev(struct pnfs_block_dev *dev) +{ + u32 i; + + if (dev->nr_children) { + for (i = 0; i < dev->nr_children; i++) { + if (!bl_register_dev(&dev->children[i])) { + while (i > 0) + bl_unregister_dev(&dev->children[--i]); + return false; + } + } + return true; + } + + if (dev->type == PNFS_BLOCK_VOLUME_SCSI) + return bl_register_scsi(dev); + return true; +} + static void bl_free_device(struct pnfs_block_dev *dev) { + bl_unregister_dev(dev); + if (dev->nr_children) { int i; @@ -23,19 +92,8 @@ bl_free_device(struct pnfs_block_dev *dev) bl_free_device(&dev->children[i]); kfree(dev->children); } else { - if (dev->pr_registered) { - const struct pr_ops *ops = - dev->bdev->bd_disk->fops->pr_ops; - int error; - - error = ops->pr_register(dev->bdev, dev->pr_key, 0, - false); - if (error) - pr_err("failed to unregister PR key.\n"); - } - - if (dev->bdev) - blkdev_put(dev->bdev, NULL); + if (dev->bdev_file) + fput(dev->bdev_file); } } @@ -169,7 +227,7 @@ static bool bl_map_simple(struct pnfs_block_dev *dev, u64 offset, map->start = dev->start; map->len = dev->len; map->disk_offset = dev->disk_offset; - map->bdev = dev->bdev; + map->bdev = file_bdev(dev->bdev_file); return true; } @@ -199,10 +257,11 @@ static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset, struct pnfs_block_dev *child; u64 chunk; u32 chunk_idx; + u64 disk_chunk; u64 disk_offset; chunk = div_u64(offset, dev->chunk_size); - div_u64_rem(chunk, dev->nr_children, &chunk_idx); + disk_chunk = div_u64_rem(chunk, dev->nr_children, &chunk_idx); if (chunk_idx >= dev->nr_children) { dprintk("%s: invalid chunk idx %d (%lld/%lld)\n", @@ -215,7 +274,7 @@ static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset, offset = chunk * dev->chunk_size; /* disk offset of the stripe */ - disk_offset = div_u64(offset, dev->nr_children); + disk_offset = disk_chunk * dev->chunk_size; child = &dev->children[chunk_idx]; child->map(child, disk_offset, map); @@ -236,28 +295,26 @@ bl_parse_simple(struct nfs_server *server, struct pnfs_block_dev *d, struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) { struct pnfs_block_volume *v = &volumes[idx]; - struct block_device *bdev; + struct file *bdev_file; dev_t dev; dev = bl_resolve_deviceid(server, v, gfp_mask); if (!dev) return -EIO; - bdev = blkdev_get_by_dev(dev, BLK_OPEN_READ | BLK_OPEN_WRITE, NULL, - NULL); - if (IS_ERR(bdev)) { + bdev_file = bdev_file_open_by_dev(dev, BLK_OPEN_READ | BLK_OPEN_WRITE, + NULL, NULL); + if (IS_ERR(bdev_file)) { printk(KERN_WARNING "pNFS: failed to open device %d:%d (%ld)\n", - MAJOR(dev), MINOR(dev), PTR_ERR(bdev)); - return PTR_ERR(bdev); + MAJOR(dev), MINOR(dev), PTR_ERR(bdev_file)); + return PTR_ERR(bdev_file); } - d->bdev = bdev; - - - d->len = bdev_nr_bytes(d->bdev); + d->bdev_file = bdev_file; + d->len = bdev_nr_bytes(file_bdev(bdev_file)); d->map = bl_map_simple; printk(KERN_INFO "pNFS: using block device %s\n", - d->bdev->bd_disk->disk_name); + file_bdev(bdev_file)->bd_disk->disk_name); return 0; } @@ -302,10 +359,10 @@ bl_validate_designator(struct pnfs_block_volume *v) } } -static struct block_device * +static struct file * bl_open_path(struct pnfs_block_volume *v, const char *prefix) { - struct block_device *bdev; + struct file *bdev_file; const char *devname; devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/%s%*phN", @@ -313,15 +370,15 @@ bl_open_path(struct pnfs_block_volume *v, const char *prefix) if (!devname) return ERR_PTR(-ENOMEM); - bdev = blkdev_get_by_path(devname, BLK_OPEN_READ | BLK_OPEN_WRITE, NULL, - NULL); - if (IS_ERR(bdev)) { - pr_warn("pNFS: failed to open device %s (%ld)\n", - devname, PTR_ERR(bdev)); + bdev_file = bdev_file_open_by_path(devname, BLK_OPEN_READ | BLK_OPEN_WRITE, + NULL, NULL); + if (IS_ERR(bdev_file)) { + dprintk("failed to open device %s (%ld)\n", + devname, PTR_ERR(bdev_file)); } kfree(devname); - return bdev; + return bdev_file; } static int @@ -331,6 +388,7 @@ bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d, struct pnfs_block_volume *v = &volumes[idx]; struct block_device *bdev; const struct pr_ops *ops; + struct file *bdev_file; int error; if (!bl_validate_designator(v)) @@ -342,40 +400,38 @@ bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d, * On other distributions like Debian, the default SCSI by-id path will * point to the dm-multipath device if one exists. */ - bdev = bl_open_path(v, "dm-uuid-mpath-0x"); - if (IS_ERR(bdev)) - bdev = bl_open_path(v, "wwn-0x"); - if (IS_ERR(bdev)) - return PTR_ERR(bdev); - d->bdev = bdev; - - d->len = bdev_nr_bytes(d->bdev); + bdev_file = bl_open_path(v, "dm-uuid-mpath-0x"); + if (IS_ERR(bdev_file)) + bdev_file = bl_open_path(v, "wwn-0x"); + if (IS_ERR(bdev_file)) + bdev_file = bl_open_path(v, "nvme-eui."); + if (IS_ERR(bdev_file)) { + pr_warn("pNFS: no device found for volume %*phN\n", + v->scsi.designator_len, v->scsi.designator); + return PTR_ERR(bdev_file); + } + d->bdev_file = bdev_file; + bdev = file_bdev(bdev_file); + + d->len = bdev_nr_bytes(bdev); d->map = bl_map_simple; d->pr_key = v->scsi.pr_key; - pr_info("pNFS: using block device %s (reservation key 0x%llx)\n", - d->bdev->bd_disk->disk_name, d->pr_key); + if (d->len == 0) + return -ENODEV; - ops = d->bdev->bd_disk->fops->pr_ops; + ops = bdev->bd_disk->fops->pr_ops; if (!ops) { pr_err("pNFS: block device %s does not support reservations.", - d->bdev->bd_disk->disk_name); + bdev->bd_disk->disk_name); error = -EINVAL; goto out_blkdev_put; } - error = ops->pr_register(d->bdev, 0, d->pr_key, true); - if (error) { - pr_err("pNFS: failed to register key for block device %s.", - d->bdev->bd_disk->disk_name); - goto out_blkdev_put; - } - - d->pr_registered = true; return 0; out_blkdev_put: - blkdev_put(d->bdev, NULL); + fput(d->bdev_file); return error; } @@ -457,7 +513,9 @@ static int bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d, struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) { - switch (volumes[idx].type) { + d->type = volumes[idx].type; + + switch (d->type) { case PNFS_BLOCK_VOLUME_SIMPLE: return bl_parse_simple(server, d, volumes, idx, gfp_mask); case PNFS_BLOCK_VOLUME_SLICE: @@ -469,7 +527,7 @@ bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d, case PNFS_BLOCK_VOLUME_SCSI: return bl_parse_scsi(server, d, volumes, idx, gfp_mask); default: - dprintk("unsupported volume type: %d\n", volumes[idx].type); + dprintk("unsupported volume type: %d\n", d->type); return -EIO; } } diff --git a/fs/nfs/blocklayout/extent_tree.c b/fs/nfs/blocklayout/extent_tree.c index 8f7cff7a4293..0add0f329816 100644 --- a/fs/nfs/blocklayout/extent_tree.c +++ b/fs/nfs/blocklayout/extent_tree.c @@ -552,6 +552,15 @@ static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p, return ret; } +/** + * ext_tree_prepare_commit - encode extents that need to be committed + * @arg: layout commit data + * + * Return values: + * %0: Success, all required extents are encoded + * %-ENOSPC: Some extents are encoded, but not all, due to RPC size limit + * %-ENOMEM: Out of memory, extents not encoded + */ int ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg) { @@ -568,12 +577,12 @@ ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg) start_p = page_address(arg->layoutupdate_page); arg->layoutupdate_pages = &arg->layoutupdate_page; -retry: - ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size, &count, &arg->lastbytewritten); + ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size, + &count, &arg->lastbytewritten); if (unlikely(ret)) { ext_tree_free_commitdata(arg, buffer_size); - buffer_size = ext_tree_layoutupdate_size(bl, count); + buffer_size = NFS_SERVER(arg->inode)->wsize; count = 0; arg->layoutupdate_pages = @@ -588,7 +597,8 @@ retry: return -ENOMEM; } - goto retry; + ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size, + &count, &arg->lastbytewritten); } *start_p = cpu_to_be32(count); @@ -608,7 +618,7 @@ retry: } dprintk("%s found %zu ranges\n", __func__, count); - return 0; + return ret; } void diff --git a/fs/nfs/blocklayout/rpc_pipefs.c b/fs/nfs/blocklayout/rpc_pipefs.c index 6c977288cc28..d8d50a88de04 100644 --- a/fs/nfs/blocklayout/rpc_pipefs.c +++ b/fs/nfs/blocklayout/rpc_pipefs.c @@ -75,7 +75,7 @@ bl_resolve_deviceid(struct nfs_server *server, struct pnfs_block_volume *b, msg->len = sizeof(*bl_msg) + b->simple.len; msg->data = kzalloc(msg->len, gfp_mask); if (!msg->data) - goto out_free_data; + goto out_unlock; bl_msg = msg->data; bl_msg->type = BL_DEVICE_MOUNT; |