diff options
Diffstat (limited to 'drivers/block')
34 files changed, 2140 insertions, 180 deletions
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c index 4e2c367fec11..1f286ab461d3 100644 --- a/drivers/block/DAC960.c +++ b/drivers/block/DAC960.c @@ -36,7 +36,7 @@ #include <linux/ioport.h> #include <linux/mm.h> #include <linux/slab.h> -#include <linux/smp_lock.h> +#include <linux/mutex.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/reboot.h> @@ -54,6 +54,7 @@ #define DAC960_GAM_MINOR 252 +static DEFINE_MUTEX(DAC960_mutex); static DAC960_Controller_T *DAC960_Controllers[DAC960_MaxControllers]; static int DAC960_ControllerCount; static struct proc_dir_entry *DAC960_ProcDirectoryEntry; @@ -81,7 +82,7 @@ static int DAC960_open(struct block_device *bdev, fmode_t mode) int drive_nr = (long)disk->private_data; int ret = -ENXIO; - lock_kernel(); + mutex_lock(&DAC960_mutex); if (p->FirmwareType == DAC960_V1_Controller) { if (p->V1.LogicalDriveInformation[drive_nr]. LogicalDriveState == DAC960_V1_LogicalDrive_Offline) @@ -99,7 +100,7 @@ static int DAC960_open(struct block_device *bdev, fmode_t mode) goto out; ret = 0; out: - unlock_kernel(); + mutex_unlock(&DAC960_mutex); return ret; } @@ -6625,7 +6626,7 @@ static long DAC960_gam_ioctl(struct file *file, unsigned int Request, long ErrorCode = 0; if (!capable(CAP_SYS_ADMIN)) return -EACCES; - lock_kernel(); + mutex_lock(&DAC960_mutex); switch (Request) { case DAC960_IOCTL_GET_CONTROLLER_COUNT: @@ -7056,13 +7057,14 @@ static long DAC960_gam_ioctl(struct file *file, unsigned int Request, default: ErrorCode = -ENOTTY; } - unlock_kernel(); + mutex_unlock(&DAC960_mutex); return ErrorCode; } static const struct file_operations DAC960_gam_fops = { .owner = THIS_MODULE, - .unlocked_ioctl = DAC960_gam_ioctl + .unlocked_ioctl = DAC960_gam_ioctl, + .llseek = noop_llseek, }; static struct miscdevice DAC960_gam_dev = { diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index de277689da61..4b9359a6f6ca 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -488,4 +488,21 @@ config BLK_DEV_HD If unsure, say N. +config BLK_DEV_RBD + tristate "Rados block device (RBD)" + depends on INET && EXPERIMENTAL && BLOCK + select CEPH_LIB + select LIBCRC32C + select CRYPTO_AES + select CRYPTO + default n + help + Say Y here if you want include the Rados block device, which stripes + a block device over objects stored in the Ceph distributed object + store. + + More information at http://ceph.newdream.net/. + + If unsure, say N. + endif # BLK_DEV diff --git a/drivers/block/Makefile b/drivers/block/Makefile index aff5ac925c34..d7f463d6312d 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -37,5 +37,6 @@ obj-$(CONFIG_BLK_DEV_HD) += hd.o obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o obj-$(CONFIG_BLK_DEV_DRBD) += drbd/ +obj-$(CONFIG_BLK_DEV_RBD) += rbd.o swim_mod-objs := swim.o swim_asm.o diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c index 327ed27dfe1a..a1725e6488d3 100644 --- a/drivers/block/amiflop.c +++ b/drivers/block/amiflop.c @@ -60,7 +60,7 @@ #include <linux/hdreg.h> #include <linux/delay.h> #include <linux/init.h> -#include <linux/smp_lock.h> +#include <linux/mutex.h> #include <linux/amifdreg.h> #include <linux/amifd.h> #include <linux/buffer_head.h> @@ -109,6 +109,7 @@ #define FD_HD_3 0x55555555 /* high-density 3.5" (1760K) drive */ #define FD_DD_5 0xaaaaaaaa /* double-density 5.25" (440K) drive */ +static DEFINE_MUTEX(amiflop_mutex); static unsigned long int fd_def_df0 = FD_DD_3; /* default for df0 if it doesn't identify */ module_param(fd_def_df0, ulong, 0); @@ -1541,9 +1542,9 @@ static int fd_ioctl(struct block_device *bdev, fmode_t mode, { int ret; - lock_kernel(); + mutex_lock(&amiflop_mutex); ret = fd_locked_ioctl(bdev, mode, cmd, param); - unlock_kernel(); + mutex_unlock(&amiflop_mutex); return ret; } @@ -1590,11 +1591,11 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) int old_dev; unsigned long flags; - lock_kernel(); + mutex_lock(&amiflop_mutex); old_dev = fd_device[drive]; if (fd_ref[drive] && old_dev != system) { - unlock_kernel(); + mutex_unlock(&amiflop_mutex); return -EBUSY; } @@ -1610,7 +1611,7 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) rel_fdc(); if (wrprot) { - unlock_kernel(); + mutex_unlock(&amiflop_mutex); return -EROFS; } } @@ -1629,7 +1630,7 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) printk(KERN_INFO "fd%d: accessing %s-disk with %s-layout\n",drive, unit[drive].type->name, data_types[system].name); - unlock_kernel(); + mutex_unlock(&amiflop_mutex); return 0; } @@ -1638,7 +1639,7 @@ static int floppy_release(struct gendisk *disk, fmode_t mode) struct amiga_floppy_struct *p = disk->private_data; int drive = p - unit; - lock_kernel(); + mutex_lock(&amiflop_mutex); if (unit[drive].dirty == 1) { del_timer (flush_track_timer + drive); non_int_flush_track (drive); @@ -1652,7 +1653,7 @@ static int floppy_release(struct gendisk *disk, fmode_t mode) /* the mod_use counter is handled this way */ floppy_off (drive | 0x40000000); #endif - unlock_kernel(); + mutex_unlock(&amiflop_mutex); return 0; } diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index a946929735a5..f21c237a9e5e 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -12,9 +12,10 @@ #include <linux/slab.h> #include <linux/genhd.h> #include <linux/netdevice.h> -#include <linux/smp_lock.h> +#include <linux/mutex.h> #include "aoe.h" +static DEFINE_MUTEX(aoeblk_mutex); static struct kmem_cache *buf_pool_cache; static ssize_t aoedisk_show_state(struct device *dev, @@ -125,16 +126,16 @@ aoeblk_open(struct block_device *bdev, fmode_t mode) struct aoedev *d = bdev->bd_disk->private_data; ulong flags; - lock_kernel(); + mutex_lock(&aoeblk_mutex); spin_lock_irqsave(&d->lock, flags); if (d->flags & DEVFL_UP) { d->nopen++; spin_unlock_irqrestore(&d->lock, flags); - unlock_kernel(); + mutex_unlock(&aoeblk_mutex); return 0; } spin_unlock_irqrestore(&d->lock, flags); - unlock_kernel(); + mutex_unlock(&aoeblk_mutex); return -ENODEV; } diff --git a/drivers/block/aoe/aoechr.c b/drivers/block/aoe/aoechr.c index 4a1b9e7464aa..146296ca4965 100644 --- a/drivers/block/aoe/aoechr.c +++ b/drivers/block/aoe/aoechr.c @@ -9,7 +9,7 @@ #include <linux/completion.h> #include <linux/delay.h> #include <linux/slab.h> -#include <linux/smp_lock.h> +#include <linux/mutex.h> #include <linux/skbuff.h> #include "aoe.h" @@ -37,6 +37,7 @@ struct ErrMsg { char *msg; }; +static DEFINE_MUTEX(aoechr_mutex); static struct ErrMsg emsgs[NMSG]; static int emsgs_head_idx, emsgs_tail_idx; static struct completion emsgs_comp; @@ -183,16 +184,16 @@ aoechr_open(struct inode *inode, struct file *filp) { int n, i; - lock_kernel(); + mutex_lock(&aoechr_mutex); n = iminor(inode); filp->private_data = (void *) (unsigned long) n; for (i = 0; i < ARRAY_SIZE(chardevs); ++i) if (chardevs[i].minor == n) { - unlock_kernel(); + mutex_unlock(&aoechr_mutex); return 0; } - unlock_kernel(); + mutex_unlock(&aoechr_mutex); return -EINVAL; } @@ -265,6 +266,7 @@ static const struct file_operations aoe_fops = { .open = aoechr_open, .release = aoechr_rel, .owner = THIS_MODULE, + .llseek = noop_llseek, }; static char *aoe_devnode(struct device *dev, mode_t *mode) diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index 0f4eec442e5d..4e4cc6c828cb 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -67,7 +67,7 @@ #include <linux/delay.h> #include <linux/init.h> #include <linux/blkdev.h> -#include <linux/smp_lock.h> +#include <linux/mutex.h> #include <asm/atafd.h> #include <asm/atafdreg.h> @@ -79,6 +79,7 @@ #undef DEBUG +static DEFINE_MUTEX(ataflop_mutex); static struct request *fd_request; static int fdc_queue; @@ -1694,9 +1695,9 @@ static int fd_ioctl(struct block_device *bdev, fmode_t mode, { int ret; - lock_kernel(); + mutex_lock(&ataflop_mutex); ret = fd_locked_ioctl(bdev, mode, cmd, arg); - unlock_kernel(); + mutex_unlock(&ataflop_mutex); return ret; } @@ -1877,9 +1878,9 @@ static int floppy_unlocked_open(struct block_device *bdev, fmode_t mode) { int ret; - lock_kernel(); + mutex_lock(&ataflop_mutex); ret = floppy_open(bdev, mode); - unlock_kernel(); + mutex_unlock(&ataflop_mutex); return ret; } @@ -1887,14 +1888,14 @@ static int floppy_unlocked_open(struct block_device *bdev, fmode_t mode) static int floppy_release(struct gendisk *disk, fmode_t mode) { struct atari_floppy_struct *p = disk->private_data; - lock_kernel(); + mutex_lock(&ataflop_mutex); if (p->ref < 0) p->ref = 0; else if (!p->ref--) { printk(KERN_ERR "floppy_release with fd_ref == 0"); p->ref = 0; } - unlock_kernel(); + mutex_unlock(&ataflop_mutex); return 0; } diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 1c7f63792ff8..82bfd5bb4a97 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -15,7 +15,7 @@ #include <linux/blkdev.h> #include <linux/bio.h> #include <linux/highmem.h> -#include <linux/smp_lock.h> +#include <linux/mutex.h> #include <linux/radix-tree.h> #include <linux/buffer_head.h> /* invalidate_bh_lrus() */ #include <linux/slab.h> @@ -55,6 +55,7 @@ struct brd_device { /* * Look up and return a brd's page for a given sector. */ +static DEFINE_MUTEX(brd_mutex); static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector) { pgoff_t idx; @@ -402,7 +403,7 @@ static int brd_ioctl(struct block_device *bdev, fmode_t mode, * ram device BLKFLSBUF has special semantics, we want to actually * release and destroy the ramdisk data. */ - lock_kernel(); + mutex_lock(&brd_mutex); mutex_lock(&bdev->bd_mutex); error = -EBUSY; if (bdev->bd_openers <= 1) { @@ -419,7 +420,7 @@ static int brd_ioctl(struct block_device *bdev, fmode_t mode, error = 0; } mutex_unlock(&bdev->bd_mutex); - unlock_kernel(); + mutex_unlock(&brd_mutex); return error; } diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 41f9acb8ecd7..f09e6df15aa7 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -26,7 +26,6 @@ #include <linux/pci.h> #include <linux/kernel.h> #include <linux/slab.h> -#include <linux/smp_lock.h> #include <linux/delay.h> #include <linux/major.h> #include <linux/fs.h> @@ -66,6 +65,7 @@ MODULE_SUPPORTED_DEVICE("HP Smart Array Controllers"); MODULE_VERSION("3.6.26"); MODULE_LICENSE("GPL"); +static DEFINE_MUTEX(cciss_mutex); static int cciss_allow_hpsa; module_param(cciss_allow_hpsa, int, S_IRUGO|S_IWUSR); MODULE_PARM_DESC(cciss_allow_hpsa, @@ -299,6 +299,8 @@ static void enqueue_cmd_and_start_io(ctlr_info_t *h, spin_lock_irqsave(&h->lock, flags); addQ(&h->reqQ, c); h->Qdepth++; + if (h->Qdepth > h->maxQsinceinit) + h->maxQsinceinit = h->Qdepth; start_io(h); spin_unlock_irqrestore(&h->lock, flags); } @@ -1059,9 +1061,9 @@ static int cciss_unlocked_open(struct block_device *bdev, fmode_t mode) { int ret; - lock_kernel(); + mutex_lock(&cciss_mutex); ret = cciss_open(bdev, mode); - unlock_kernel(); + mutex_unlock(&cciss_mutex); return ret; } @@ -1074,13 +1076,13 @@ static int cciss_release(struct gendisk *disk, fmode_t mode) ctlr_info_t *h; drive_info_struct *drv; - lock_kernel(); + mutex_lock(&cciss_mutex); h = get_host(disk); drv = get_drv(disk); dev_dbg(&h->pdev->dev, "cciss_release %s\n", disk->disk_name); drv->usage_count--; h->usage_count--; - unlock_kernel(); + mutex_unlock(&cciss_mutex); return 0; } @@ -1088,9 +1090,9 @@ static int do_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, unsigned long arg) { int ret; - lock_kernel(); + mutex_lock(&cciss_mutex); ret = cciss_ioctl(bdev, mode, cmd, arg); - unlock_kernel(); + mutex_unlock(&cciss_mutex); return ret; } @@ -4503,6 +4505,12 @@ static __devinit int cciss_kdump_hard_reset_controller(struct pci_dev *pdev) misc_fw_support = readl(&cfgtable->misc_fw_support); use_doorbell = misc_fw_support & MISC_FW_DOORBELL_RESET; + /* The doorbell reset seems to cause lockups on some Smart + * Arrays (e.g. P410, P410i, maybe others). Until this is + * fixed or at least isolated, avoid the doorbell reset. + */ + use_doorbell = 0; + rc = cciss_controller_hard_reset(pdev, vaddr, use_doorbell); if (rc) goto unmap_cfgtable; @@ -4696,6 +4704,9 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, h->scatter_list = kmalloc(h->max_commands * sizeof(struct scatterlist *), GFP_KERNEL); + if (!h->scatter_list) + goto clean4; + for (k = 0; k < h->nr_cmds; k++) { h->scatter_list[k] = kmalloc(sizeof(struct scatterlist) * h->maxsgentries, @@ -4765,7 +4776,7 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, clean4: kfree(h->cmd_pool_bits); /* Free up sg elements */ - for (k = 0; k < h->nr_cmds; k++) + for (k-- ; k >= 0; k--) kfree(h->scatter_list[k]); kfree(h->scatter_list); cciss_free_sg_chain_blocks(h->cmd_sg_list, h->nr_cmds); diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c index d53b0291c44b..946dad4caef3 100644 --- a/drivers/block/cpqarray.c +++ b/drivers/block/cpqarray.c @@ -35,7 +35,7 @@ #include <linux/seq_file.h> #include <linux/init.h> #include <linux/hdreg.h> -#include <linux/smp_lock.h> +#include <linux/mutex.h> #include <linux/spinlock.h> #include <linux/blkdev.h> #include <linux/genhd.h> @@ -68,6 +68,7 @@ MODULE_LICENSE("GPL"); #define CPQARRAY_DMA_MASK 0xFFFFFFFF /* 32 bit DMA */ +static DEFINE_MUTEX(cpqarray_mutex); static int nr_ctlr; static ctlr_info_t *hba[MAX_CTLR]; @@ -845,9 +846,9 @@ static int ida_unlocked_open(struct block_device *bdev, fmode_t mode) { int ret; - lock_kernel(); + mutex_lock(&cpqarray_mutex); ret = ida_open(bdev, mode); - unlock_kernel(); + mutex_unlock(&cpqarray_mutex); return ret; } @@ -859,10 +860,10 @@ static int ida_release(struct gendisk *disk, fmode_t mode) { ctlr_info_t *host; - lock_kernel(); + mutex_lock(&cpqarray_mutex); host = get_host(disk); host->usage_count--; - unlock_kernel(); + mutex_unlock(&cpqarray_mutex); return 0; } @@ -1217,9 +1218,9 @@ static int ida_ioctl(struct block_device *bdev, fmode_t mode, { int ret; - lock_kernel(); + mutex_lock(&cpqarray_mutex); ret = ida_locked_ioctl(bdev, mode, cmd, param); - unlock_kernel(); + mutex_unlock(&cpqarray_mutex); return ret; } diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 8bfedc7164fa..c5dfe6486cf3 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -32,7 +32,7 @@ #include <asm/types.h> #include <net/sock.h> #include <linux/ctype.h> -#include <linux/smp_lock.h> +#include <linux/mutex.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/proc_fs.h> @@ -64,6 +64,7 @@ struct after_state_chg_work { struct completion *done; }; +static DEFINE_MUTEX(drbd_main_mutex); int drbdd_init(struct drbd_thread *); int drbd_worker(struct drbd_thread *); int drbd_asender(struct drbd_thread *); @@ -2672,7 +2673,7 @@ static int drbd_open(struct block_device *bdev, fmode_t mode) unsigned long flags; int rv = 0; - lock_kernel(); + mutex_lock(&drbd_main_mutex); spin_lock_irqsave(&mdev->req_lock, flags); /* to have a stable mdev->state.role * and no race with updating open_cnt */ @@ -2687,7 +2688,7 @@ static int drbd_open(struct block_device *bdev, fmode_t mode) if (!rv) mdev->open_cnt++; spin_unlock_irqrestore(&mdev->req_lock, flags); - unlock_kernel(); + mutex_unlock(&drbd_main_mutex); return rv; } @@ -2695,9 +2696,9 @@ static int drbd_open(struct block_device *bdev, fmode_t mode) static int drbd_release(struct gendisk *gd, fmode_t mode) { struct drbd_conf *mdev = gd->private_data; - lock_kernel(); + mutex_lock(&drbd_main_mutex); mdev->open_cnt--; - unlock_kernel(); + mutex_unlock(&drbd_main_mutex); return 0; } diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 6ec922c623a1..760ae0df9251 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3071,7 +3071,6 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned * we still need to figure out whether we accept that. */ mdev->p_size = p_size; -#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r)) if (get_ldev(mdev)) { warn_if_differ_considerably(mdev, "lower level device sizes", p_size, drbd_get_max_capacity(mdev->ldev)); diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index aa42e7766c6a..767107cce982 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -178,7 +178,6 @@ static int print_unex = 1; #include <linux/slab.h> #include <linux/mm.h> #include <linux/bio.h> -#include <linux/smp_lock.h> #include <linux/string.h> #include <linux/jiffies.h> #include <linux/fcntl.h> @@ -199,6 +198,7 @@ static int print_unex = 1; * It's been recommended that take about 1/4 of the default speed * in some more extreme cases. */ +static DEFINE_MUTEX(floppy_mutex); static int slow_floppy; #include <asm/dma.h> @@ -3578,9 +3578,9 @@ static int fd_ioctl(struct block_device *bdev, fmode_t mode, { int ret; - lock_kernel(); + mutex_lock(&floppy_mutex); ret = fd_locked_ioctl(bdev, mode, cmd, param); - unlock_kernel(); + mutex_unlock(&floppy_mutex); return ret; } @@ -3641,7 +3641,7 @@ static int floppy_release(struct gendisk *disk, fmode_t mode) { int drive = (long)disk->private_data; - lock_kernel(); + mutex_lock(&floppy_mutex); mutex_lock(&open_lock); if (UDRS->fd_ref < 0) UDRS->fd_ref = 0; @@ -3652,7 +3652,7 @@ static int floppy_release(struct gendisk *disk, fmode_t mode) if (!UDRS->fd_ref) opened_bdev[drive] = NULL; mutex_unlock(&open_lock); - unlock_kernel(); + mutex_unlock(&floppy_mutex); return 0; } @@ -3670,7 +3670,7 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) int res = -EBUSY; char *tmp; - lock_kernel(); + mutex_lock(&floppy_mutex); mutex_lock(&open_lock); old_dev = UDRS->fd_device; if (opened_bdev[drive] && opened_bdev[drive] != bdev) @@ -3747,7 +3747,7 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) goto out; } mutex_unlock(&open_lock); - unlock_kernel(); + mutex_unlock(&floppy_mutex); return 0; out: if (UDRS->fd_ref < 0) @@ -3758,7 +3758,7 @@ out: opened_bdev[drive] = NULL; out2: mutex_unlock(&open_lock); - unlock_kernel(); + mutex_unlock(&floppy_mutex); return res; } diff --git a/drivers/block/loop.c b/drivers/block/loop.c index dc552308668e..de3083b0a4f5 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -67,7 +67,7 @@ #include <linux/compat.h> #include <linux/suspend.h> #include <linux/freezer.h> -#include <linux/smp_lock.h> +#include <linux/mutex.h> #include <linux/writeback.h> #include <linux/buffer_head.h> /* for invalidate_bdev() */ #include <linux/completion.h> @@ -78,6 +78,7 @@ #include <asm/uaccess.h> +static DEFINE_MUTEX(loop_mutex); static LIST_HEAD(loop_devices); static DEFINE_MUTEX(loop_devices_mutex); @@ -478,7 +479,7 @@ static int do_bio_filebacked(struct loop_device *lo, struct bio *bio) pos = ((loff_t) bio->bi_sector << 9) + lo->lo_offset; if (bio_rw(bio) == WRITE) { - bool barrier = (bio->bi_rw & REQ_HARDBARRIER); + bool barrier = !!(bio->bi_rw & REQ_HARDBARRIER); struct file *file = lo->lo_backing_file; if (barrier) { @@ -1510,11 +1511,11 @@ static int lo_open(struct block_device *bdev, fmode_t mode) { struct loop_device *lo = bdev->bd_disk->private_data; - lock_kernel(); + mutex_lock(&loop_mutex); mutex_lock(&lo->lo_ctl_mutex); lo->lo_refcnt++; mutex_unlock(&lo->lo_ctl_mutex); - unlock_kernel(); + mutex_unlock(&loop_mutex); return 0; } @@ -1524,7 +1525,7 @@ static int lo_release(struct gendisk *disk, fmode_t mode) struct loop_device *lo = disk->private_data; int err; - lock_kernel(); + mutex_lock(&loop_mutex); mutex_lock(&lo->lo_ctl_mutex); if (--lo->lo_refcnt) @@ -1549,7 +1550,7 @@ static int lo_release(struct gendisk *disk, fmode_t mode) out: mutex_unlock(&lo->lo_ctl_mutex); out_unlocked: - lock_kernel(); + mutex_unlock(&loop_mutex); return 0; } diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c index b82c5ce5e9df..76fa3deaee84 100644 --- a/drivers/block/mg_disk.c +++ b/drivers/block/mg_disk.c @@ -974,8 +974,7 @@ static int mg_probe(struct platform_device *plat_dev) host->breq->queuedata = host; /* mflash is random device, thanx for the noop */ - elevator_exit(host->breq->elevator); - err = elevator_init(host->breq, "noop"); + err = elevator_change(host->breq, "noop"); if (err) { printk(KERN_ERR "%s:%d (elevator_init) fail\n", __func__, __LINE__); diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 0daa422aa281..a32fb41246f8 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -24,7 +24,7 @@ #include <linux/errno.h> #include <linux/file.h> #include <linux/ioctl.h> -#include <linux/smp_lock.h> +#include <linux/mutex.h> #include <linux/compiler.h> #include <linux/err.h> #include <linux/kernel.h> @@ -53,6 +53,7 @@ #define DBG_BLKDEV 0x0100 #define DBG_RX 0x0200 #define DBG_TX 0x0400 +static DEFINE_MUTEX(nbd_mutex); static unsigned int debugflags; #endif /* NDEBUG */ @@ -717,11 +718,11 @@ static int nbd_ioctl(struct block_device *bdev, fmode_t mode, dprintk(DBG_IOCTL, "%s: nbd_ioctl cmd=%s(0x%x) arg=%lu\n", lo->disk->disk_name, ioctl_cmd_to_ascii(cmd), cmd, arg); - lock_kernel(); + mutex_lock(&nbd_mutex); mutex_lock(&lo->tx_lock); error = __nbd_ioctl(bdev, lo, cmd, arg); mutex_unlock(&lo->tx_lock); - unlock_kernel(); + mutex_unlock(&nbd_mutex); return error; } diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c index 76f8565e1e8d..62cec6afd7ad 100644 --- a/drivers/block/paride/pcd.c +++ b/drivers/block/paride/pcd.c @@ -138,9 +138,10 @@ enum {D_PRT, D_PRO, D_UNI, D_MOD, D_SLV, D_DLY}; #include <linux/cdrom.h> #include <linux/spinlock.h> #include <linux/blkdev.h> -#include <linux/smp_lock.h> +#include <linux/mutex.h> #include <asm/uaccess.h> +static DEFINE_MUTEX(pcd_mutex); static DEFINE_SPINLOCK(pcd_lock); module_param(verbose, bool, 0644); @@ -227,9 +228,9 @@ static int pcd_block_open(struct block_device *bdev, fmode_t mode) struct pcd_unit *cd = bdev->bd_disk->private_data; int ret; - lock_kernel(); + mutex_lock(&pcd_mutex); ret = cdrom_open(&cd->info, bdev, mode); - unlock_kernel(); + mutex_unlock(&pcd_mutex); return ret; } @@ -237,9 +238,9 @@ static int pcd_block_open(struct block_device *bdev, fmode_t mode) static int pcd_block_release(struct gendisk *disk, fmode_t mode) { struct pcd_unit *cd = disk->private_data; - lock_kernel(); + mutex_lock(&pcd_mutex); cdrom_release(&cd->info, mode); - unlock_kernel(); + mutex_unlock(&pcd_mutex); return 0; } @@ -249,9 +250,9 @@ static int pcd_block_ioctl(struct block_device *bdev, fmode_t mode, struct pcd_unit *cd = bdev->bd_disk->private_data; int ret; - lock_kernel(); + mutex_lock(&pcd_mutex); ret = cdrom_ioctl(&cd->info, bdev, mode, cmd, arg); - unlock_kernel(); + mutex_unlock(&pcd_mutex); return ret; } diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index 985f0d4f1d1e..c0ee1558b9bb 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c @@ -153,10 +153,11 @@ enum {D_PRT, D_PRO, D_UNI, D_MOD, D_GEO, D_SBY, D_DLY, D_SLV}; #include <linux/blkdev.h> #include <linux/blkpg.h> #include <linux/kernel.h> -#include <linux/smp_lock.h> +#include <linux/mutex.h> #include <asm/uaccess.h> #include <linux/workqueue.h> +static DEFINE_MUTEX(pd_mutex); static DEFINE_SPINLOCK(pd_lock); module_param(verbose, bool, 0); @@ -736,14 +737,14 @@ static int pd_open(struct block_device *bdev, fmode_t mode) { struct pd_unit *disk = bdev->bd_disk->private_data; - lock_kernel(); + mutex_lock(&pd_mutex); disk->access++; if (disk->removable) { pd_special_command(disk, pd_media_check); pd_special_command(disk, pd_door_lock); } - unlock_kernel(); + mutex_unlock(&pd_mutex); return 0; } @@ -771,10 +772,10 @@ static int pd_ioctl(struct block_device *bdev, fmode_t mode, switch (cmd) { case CDROMEJECT: - lock_kernel(); + mutex_lock(&pd_mutex); if (disk->access == 1) pd_special_command(disk, pd_eject); - unlock_kernel(); + mutex_unlock(&pd_mutex); return 0; default: return -EINVAL; @@ -785,10 +786,10 @@ static int pd_release(struct gendisk *p, fmode_t mode) { struct pd_unit *disk = p->private_data; - lock_kernel(); + mutex_lock(&pd_mutex); if (!--disk->access && disk->removable) pd_special_command(disk, pd_door_unlock); - unlock_kernel(); + mutex_unlock(&pd_mutex); return 0; } diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c index 4457b494882a..635f25dd9e10 100644 --- a/drivers/block/paride/pf.c +++ b/drivers/block/paride/pf.c @@ -152,9 +152,10 @@ enum {D_PRT, D_PRO, D_UNI, D_MOD, D_SLV, D_LUN, D_DLY}; #include <linux/spinlock.h> #include <linux/blkdev.h> #include <linux/blkpg.h> -#include <linux/smp_lock.h> +#include <linux/mutex.h> #include <asm/uaccess.h> +static DEFINE_MUTEX(pf_mutex); static DEFINE_SPINLOCK(pf_spin_lock); module_param(verbose, bool, 0644); @@ -302,7 +303,7 @@ static int pf_open(struct block_device *bdev, fmode_t mode) struct pf_unit *pf = bdev->bd_disk->private_data; int ret; - lock_kernel(); + mutex_lock(&pf_mutex); pf_identify(pf); ret = -ENODEV; @@ -318,7 +319,7 @@ static int pf_open(struct block_device *bdev, fmode_t mode) if (pf->removable) pf_lock(pf, 1); out: - unlock_kernel(); + mutex_unlock(&pf_mutex); return ret; } @@ -349,9 +350,9 @@ static int pf_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, u if (pf->access != 1) return -EBUSY; - lock_kernel(); + mutex_lock(&pf_mutex); pf_eject(pf); - unlock_kernel(); + mutex_unlock(&pf_mutex); return 0; } @@ -360,9 +361,9 @@ static int pf_release(struct gendisk *disk, fmode_t mode) { struct pf_unit *pf = disk->private_data; - lock_kernel(); + mutex_lock(&pf_mutex); if (pf->access <= 0) { - unlock_kernel(); + mutex_unlock(&pf_mutex); return -EINVAL; } @@ -371,7 +372,7 @@ static int pf_release(struct gendisk *disk, fmode_t mode) if (!pf->access && pf->removable) pf_lock(pf, 0); - unlock_kernel(); + mutex_unlock(&pf_mutex); return 0; } diff --git a/drivers/block/paride/pg.c b/drivers/block/paride/pg.c index c397b3ddba9b..6b9a2000d56a 100644 --- a/drivers/block/paride/pg.c +++ b/drivers/block/paride/pg.c @@ -162,7 +162,7 @@ enum {D_PRT, D_PRO, D_UNI, D_MOD, D_SLV, D_DLY}; #include <linux/pg.h> #include <linux/device.h> #include <linux/sched.h> /* current, TASK_* */ -#include <linux/smp_lock.h> +#include <linux/mutex.h> #include <linux/jiffies.h> #include <asm/uaccess.h> @@ -193,6 +193,7 @@ module_param_array(drive3, int, NULL, 0); #define ATAPI_IDENTIFY 0x12 +static DEFINE_MUTEX(pg_mutex); static int pg_open(struct inode *inode, struct file *file); static int pg_release(struct inode *inode, struct file *file); static ssize_t pg_read(struct file *filp, char __user *buf, @@ -234,6 +235,7 @@ static const struct file_operations pg_fops = { .write = pg_write, .open = pg_open, .release = pg_release, + .llseek = noop_llseek, }; static void pg_init_units(void) @@ -518,7 +520,7 @@ static int pg_open(struct inode *inode, struct file *file) struct pg *dev = &devices[unit]; int ret = 0; - lock_kernel(); + mutex_lock(&pg_mutex); if ((unit >= PG_UNITS) || (!dev->present)) { ret = -ENODEV; goto out; @@ -547,7 +549,7 @@ static int pg_open(struct inode *inode, struct file *file) file->private_data = dev; out: - unlock_kernel(); + mutex_unlock(&pg_mutex); return ret; } diff --git a/drivers/block/paride/pt.c b/drivers/block/paride/pt.c index bc5825fdeaab..7179f79d7468 100644 --- a/drivers/block/paride/pt.c +++ b/drivers/block/paride/pt.c @@ -146,7 +146,7 @@ static int (*drives[4])[6] = {&drive0, &drive1, &drive2, &drive3}; #include <linux/mtio.h> #include <linux/device.h> #include <linux/sched.h> /* current, TASK_*, schedule_timeout() */ -#include <linux/smp_lock.h> +#include <linux/mutex.h> #include <asm/uaccess.h> @@ -189,6 +189,7 @@ module_param_array(drive3, int, NULL, 0); #define ATAPI_MODE_SENSE 0x1a #define ATAPI_LOG_SENSE 0x4d +static DEFINE_MUTEX(pt_mutex); static int pt_open(struct inode *inode, struct file *file); static long pt_ioctl(struct file *file, unsigned int cmd, unsigned long arg); static int pt_release(struct inode *inode, struct file *file); @@ -239,6 +240,7 @@ static const struct file_operations pt_fops = { .unlocked_ioctl = pt_ioctl, .open = pt_open, .release = pt_release, + .llseek = noop_llseek, }; /* sysfs class support */ @@ -650,9 +652,9 @@ static int pt_open(struct inode *inode, struct file *file) struct pt_unit *tape = pt + unit; int err; - lock_kernel(); + mutex_lock(&pt_mutex); if (unit >= PT_UNITS || (!tape->present)) { - unlock_kernel(); + mutex_unlock(&pt_mutex); return -ENODEV; } @@ -681,12 +683,12 @@ static int pt_open(struct inode *inode, struct file *file) } file->private_data = tape; - unlock_kernel(); + mutex_unlock(&pt_mutex); return 0; out: atomic_inc(&tape->available); - unlock_kernel(); + mutex_unlock(&pt_mutex); return err; } @@ -704,15 +706,15 @@ static long pt_ioctl(struct file *file, unsigned int cmd, unsigned long arg) switch (mtop.mt_op) { case MTREW: - lock_kernel(); + mutex_lock(&pt_mutex); pt_rewind(tape); - unlock_kernel(); + mutex_unlock(&pt_mutex); return 0; case MTWEOF: - lock_kernel(); + mutex_lock(&pt_mutex); pt_write_fm(tape); - unlock_kernel(); + mutex_unlock(&pt_mutex); return 0; default: diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index b1cbeb59bb76..ef58fccadad3 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -57,7 +57,6 @@ #include <linux/seq_file.h> #include <linux/miscdevice.h> #include <linux/freezer.h> -#include <linux/smp_lock.h> #include <linux/mutex.h> #include <linux/slab.h> #include <scsi/scsi_cmnd.h> @@ -86,6 +85,7 @@ #define ZONE(sector, pd) (((sector) + (pd)->offset) & ~((pd)->settings.size - 1)) +static DEFINE_MUTEX(pktcdvd_mutex); static struct pktcdvd_device *pkt_devs[MAX_WRITERS]; static struct proc_dir_entry *pkt_proc; static int pktdev_major; @@ -2369,7 +2369,7 @@ static void pkt_release_dev(struct pktcdvd_device *pd, int flush) pkt_shrink_pktlist(pd); } -static struct pktcdvd_device *pkt_find_dev_from_minor(int dev_minor) +static struct pktcdvd_device *pkt_find_dev_from_minor(unsigned int dev_minor) { if (dev_minor >= MAX_WRITERS) return NULL; @@ -2383,7 +2383,7 @@ static int pkt_open(struct block_device *bdev, fmode_t mode) VPRINTK(DRIVER_NAME": entering open\n"); - lock_kernel(); + mutex_lock(&pktcdvd_mutex); mutex_lock(&ctl_mutex); pd = pkt_find_dev_from_minor(MINOR(bdev->bd_dev)); if (!pd) { @@ -2411,7 +2411,7 @@ static int pkt_open(struct block_device *bdev, fmode_t mode) } mutex_unlock(&ctl_mutex); - unlock_kernel(); + mutex_unlock(&pktcdvd_mutex); return 0; out_dec: @@ -2419,7 +2419,7 @@ out_dec: out: VPRINTK(DRIVER_NAME": failed open (%d)\n", ret); mutex_unlock(&ctl_mutex); - unlock_kernel(); + mutex_unlock(&pktcdvd_mutex); return ret; } @@ -2428,7 +2428,7 @@ static int pkt_close(struct gendisk *disk, fmode_t mode) struct pktcdvd_device *pd = disk->private_data; int ret = 0; - lock_kernel(); + mutex_lock(&pktcdvd_mutex); mutex_lock(&ctl_mutex); pd->refcnt--; BUG_ON(pd->refcnt < 0); @@ -2437,7 +2437,7 @@ static int pkt_close(struct gendisk *disk, fmode_t mode) pkt_release_dev(pd, flush); } mutex_unlock(&ctl_mutex); - unlock_kernel(); + mutex_unlock(&pktcdvd_mutex); return ret; } @@ -2773,7 +2773,7 @@ static int pkt_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, VPRINTK("pkt_ioctl: cmd %x, dev %d:%d\n", cmd, MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev)); - lock_kernel(); + mutex_lock(&pktcdvd_mutex); switch (cmd) { case CDROMEJECT: /* @@ -2798,7 +2798,7 @@ static int pkt_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, VPRINTK(DRIVER_NAME": Unknown ioctl for %s (%x)\n", pd->name, cmd); ret = -ENOTTY; } - unlock_kernel(); + mutex_unlock(&pktcdvd_mutex); return ret; } @@ -3046,6 +3046,7 @@ static const struct file_operations pkt_ctl_fops = { .compat_ioctl = pkt_ctl_compat_ioctl, #endif .owner = THIS_MODULE, + .llseek = no_llseek, }; static struct miscdevice pkt_misc = { diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index e9da874d0419..03688c2da319 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c @@ -113,7 +113,7 @@ static void ps3disk_scatter_gather(struct ps3_storage_device *dev, memcpy(buf, dev->bounce_buf+offset, size); offset += size; flush_kernel_dcache_page(bvec->bv_page); - bvec_kunmap_irq(bvec, &flags); + bvec_kunmap_irq(buf, &flags); i++; } } diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c new file mode 100644 index 000000000000..6ec9d53806c5 --- /dev/null +++ b/drivers/block/rbd.c @@ -0,0 +1,1841 @@ +/* + rbd.c -- Export ceph rados objects as a Linux block device + + + based on drivers/block/osdblk.c: + + Copyright 2009 Red Hat, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; see the file COPYING. If not, write to + the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + + + + Instructions for use + -------------------- + + 1) Map a Linux block device to an existing rbd image. + + Usage: <mon ip addr> <options> <pool name> <rbd image name> [snap name] + + $ echo "192.168.0.1 name=admin rbd foo" > /sys/class/rbd/add + + The snapshot name can be "-" or omitted to map the image read/write. + + 2) List all active blkdev<->object mappings. + + In this example, we have performed step #1 twice, creating two blkdevs, + mapped to two separate rados objects in the rados rbd pool + + $ cat /sys/class/rbd/list + #id major client_name pool name snap KB + 0 254 client4143 rbd foo - 1024000 + + The columns, in order, are: + - blkdev unique id + - blkdev assigned major + - rados client id + - rados pool name + - rados block device name + - mapped snapshot ("-" if none) + - device size in KB + + + 3) Create a snapshot. + + Usage: <blkdev id> <snapname> + + $ echo "0 mysnap" > /sys/class/rbd/snap_create + + + 4) Listing a snapshot. + + $ cat /sys/class/rbd/snaps_list + #id snap KB + 0 - 1024000 (*) + 0 foo 1024000 + + The columns, in order, are: + - blkdev unique id + - snapshot name, '-' means none (active read/write version) + - size of device at time of snapshot + - the (*) indicates this is the active version + + 5) Rollback to snapshot. + + Usage: <blkdev id> <snapname> + + $ echo "0 mysnap" > /sys/class/rbd/snap_rollback + + + 6) Mapping an image using snapshot. + + A snapshot mapping is read-only. This is being done by passing + snap=<snapname> to the options when adding a device. + + $ echo "192.168.0.1 name=admin,snap=mysnap rbd foo" > /sys/class/rbd/add + + + 7) Remove an active blkdev<->rbd image mapping. + + In this example, we remove the mapping with blkdev unique id 1. + + $ echo 1 > /sys/class/rbd/remove + + + NOTE: The actual creation and deletion of rados objects is outside the scope + of this driver. + + */ + +#include <linux/ceph/libceph.h> +#include <linux/ceph/osd_client.h> +#include <linux/ceph/mon_client.h> +#include <linux/ceph/decode.h> + +#include <linux/kernel.h> +#include <linux/device.h> +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/blkdev.h> + +#include "rbd_types.h" + +#define DRV_NAME "rbd" +#define DRV_NAME_LONG "rbd (rados block device)" + +#define RBD_MINORS_PER_MAJOR 256 /* max minors per blkdev */ + +#define RBD_MAX_MD_NAME_LEN (96 + sizeof(RBD_SUFFIX)) +#define RBD_MAX_POOL_NAME_LEN 64 +#define RBD_MAX_SNAP_NAME_LEN 32 +#define RBD_MAX_OPT_LEN 1024 + +#define RBD_SNAP_HEAD_NAME "-" + +#define DEV_NAME_LEN 32 + +/* + * block device image metadata (in-memory version) + */ +struct rbd_image_header { + u64 image_size; + char block_name[32]; + __u8 obj_order; + __u8 crypt_type; + __u8 comp_type; + struct rw_semaphore snap_rwsem; + struct ceph_snap_context *snapc; + size_t snap_names_len; + u64 snap_seq; + u32 total_snaps; + + char *snap_names; + u64 *snap_sizes; +}; + +/* + * an instance of the client. multiple devices may share a client. + */ +struct rbd_client { + struct ceph_client *client; + struct kref kref; + struct list_head node; +}; + +/* + * a single io request + */ +struct rbd_request { + struct request *rq; /* blk layer request */ + struct bio *bio; /* cloned bio */ + struct page **pages; /* list of used pages */ + u64 len; +}; + +/* + * a single device + */ +struct rbd_device { + int id; /* blkdev unique id */ + + int major; /* blkdev assigned major */ + struct gendisk *disk; /* blkdev's gendisk and rq */ + struct request_queue *q; + + struct ceph_client *client; + struct rbd_client *rbd_client; + + char name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */ + + spinlock_t lock; /* queue lock */ + + struct rbd_image_header header; + char obj[RBD_MAX_OBJ_NAME_LEN]; /* rbd image name */ + int obj_len; + char obj_md_name[RBD_MAX_MD_NAME_LEN]; /* hdr nm. */ + char pool_name[RBD_MAX_POOL_NAME_LEN]; + int poolid; + + char snap_name[RBD_MAX_SNAP_NAME_LEN]; + u32 cur_snap; /* index+1 of current snapshot within snap context + 0 - for the head */ + int read_only; + + struct list_head node; +}; + +static spinlock_t node_lock; /* protects client get/put */ + +static struct class *class_rbd; /* /sys/class/rbd */ +static DEFINE_MUTEX(ctl_mutex); /* Serialize open/close/setup/teardown */ +static LIST_HEAD(rbd_dev_list); /* devices */ +static LIST_HEAD(rbd_client_list); /* clients */ + + +static int rbd_open(struct block_device *bdev, fmode_t mode) +{ + struct gendisk *disk = bdev->bd_disk; + struct rbd_device *rbd_dev = disk->private_data; + + set_device_ro(bdev, rbd_dev->read_only); + + if ((mode & FMODE_WRITE) && rbd_dev->read_only) + return -EROFS; + + return 0; +} + +static const struct block_device_operations rbd_bd_ops = { + .owner = THIS_MODULE, + .open = rbd_open, +}; + +/* + * Initialize an rbd client instance. + * We own *opt. + */ +static struct rbd_client *rbd_client_create(struct ceph_options *opt) +{ + struct rbd_client *rbdc; + int ret = -ENOMEM; + + dout("rbd_client_create\n"); + rbdc = kmalloc(sizeof(struct rbd_client), GFP_KERNEL); + if (!rbdc) + goto out_opt; + + kref_init(&rbdc->kref); + INIT_LIST_HEAD(&rbdc->node); + + rbdc->client = ceph_create_client(opt, rbdc); + if (IS_ERR(rbdc->client)) + goto out_rbdc; + opt = NULL; /* Now rbdc->client is responsible for opt */ + + ret = ceph_open_session(rbdc->client); + if (ret < 0) + goto out_err; + + spin_lock(&node_lock); + list_add_tail(&rbdc->node, &rbd_client_list); + spin_unlock(&node_lock); + + dout("rbd_client_create created %p\n", rbdc); + return rbdc; + +out_err: + ceph_destroy_client(rbdc->client); +out_rbdc: + kfree(rbdc); +out_opt: + if (opt) + ceph_destroy_options(opt); + return ERR_PTR(ret); +} + +/* + * Find a ceph client with specific addr and configuration. + */ +static struct rbd_client *__rbd_client_find(struct ceph_options *opt) +{ + struct rbd_client *client_node; + + if (opt->flags & CEPH_OPT_NOSHARE) + return NULL; + + list_for_each_entry(client_node, &rbd_client_list, node) + if (ceph_compare_options(opt, client_node->client) == 0) + return client_node; + return NULL; +} + +/* + * Get a ceph client with specific addr and configuration, if one does + * not exist create it. + */ +static int rbd_get_client(struct rbd_device *rbd_dev, const char *mon_addr, + char *options) +{ + struct rbd_client *rbdc; + struct ceph_options *opt; + int ret; + + ret = ceph_parse_options(&opt, options, mon_addr, + mon_addr + strlen(mon_addr), NULL, NULL); + if (ret < 0) + return ret; + + spin_lock(&node_lock); + rbdc = __rbd_client_find(opt); + if (rbdc) { + ceph_destroy_options(opt); + + /* using an existing client */ + kref_get(&rbdc->kref); + rbd_dev->rbd_client = rbdc; + rbd_dev->client = rbdc->client; + spin_unlock(&node_lock); + return 0; + } + spin_unlock(&node_lock); + + rbdc = rbd_client_create(opt); + if (IS_ERR(rbdc)) + return PTR_ERR(rbdc); + + rbd_dev->rbd_client = rbdc; + rbd_dev->client = rbdc->client; + return 0; +} + +/* + * Destroy ceph client + */ +static void rbd_client_release(struct kref *kref) +{ + struct rbd_client *rbdc = container_of(kref, struct rbd_client, kref); + + dout("rbd_release_client %p\n", rbdc); + spin_lock(&node_lock); + list_del(&rbdc->node); + spin_unlock(&node_lock); + + ceph_destroy_client(rbdc->client); + kfree(rbdc); +} + +/* + * Drop reference to ceph client node. If it's not referenced anymore, release + * it. + */ +static void rbd_put_client(struct rbd_device *rbd_dev) +{ + kref_put(&rbd_dev->rbd_client->kref, rbd_client_release); + rbd_dev->rbd_client = NULL; + rbd_dev->client = NULL; +} + + +/* + * Create a new header structure, translate header format from the on-disk + * header. + */ +static int rbd_header_from_disk(struct rbd_image_header *header, + struct rbd_image_header_ondisk *ondisk, + int allocated_snaps, + gfp_t gfp_flags) +{ + int i; + u32 snap_count = le32_to_cpu(ondisk->snap_count); + int ret = -ENOMEM; + + init_rwsem(&header->snap_rwsem); + + header->snap_names_len = le64_to_cpu(ondisk->snap_names_len); + header->snapc = kmalloc(sizeof(struct ceph_snap_context) + + snap_count * + sizeof(struct rbd_image_snap_ondisk), + gfp_flags); + if (!header->snapc) + return -ENOMEM; + if (snap_count) { + header->snap_names = kmalloc(header->snap_names_len, + GFP_KERNEL); + if (!header->snap_names) + goto err_snapc; + header->snap_sizes = kmalloc(snap_count * sizeof(u64), + GFP_KERNEL); + if (!header->snap_sizes) + goto err_names; + } else { + header->snap_names = NULL; + header->snap_sizes = NULL; + } + memcpy(header->block_name, ondisk->block_name, + sizeof(ondisk->block_name)); + + header->image_size = le64_to_cpu(ondisk->image_size); + header->obj_order = ondisk->options.order; + header->crypt_type = ondisk->options.crypt_type; + header->comp_type = ondisk->options.comp_type; + + atomic_set(&header->snapc->nref, 1); + header->snap_seq = le64_to_cpu(ondisk->snap_seq); + header->snapc->num_snaps = snap_count; + header->total_snaps = snap_count; + + if (snap_count && + allocated_snaps == snap_count) { + for (i = 0; i < snap_count; i++) { + header->snapc->snaps[i] = + le64_to_cpu(ondisk->snaps[i].id); + header->snap_sizes[i] = + le64_to_cpu(ondisk->snaps[i].image_size); + } + + /* copy snapshot names */ + memcpy(header->snap_names, &ondisk->snaps[i], + header->snap_names_len); + } + + return 0; + +err_names: + kfree(header->snap_names); +err_snapc: + kfree(header->snapc); + return ret; +} + +static int snap_index(struct rbd_image_header *header, int snap_num) +{ + return header->total_snaps - snap_num; +} + +static u64 cur_snap_id(struct rbd_device *rbd_dev) +{ + struct rbd_image_header *header = &rbd_dev->header; + + if (!rbd_dev->cur_snap) + return 0; + + return header->snapc->snaps[snap_index(header, rbd_dev->cur_snap)]; +} + +static int snap_by_name(struct rbd_image_header *header, const char *snap_name, + u64 *seq, u64 *size) +{ + int i; + char *p = header->snap_names; + + for (i = 0; i < header->total_snaps; i++, p += strlen(p) + 1) { + if (strcmp(snap_name, p) == 0) + break; + } + if (i == header->total_snaps) + return -ENOENT; + if (seq) + *seq = header->snapc->snaps[i]; + + if (size) + *size = header->snap_sizes[i]; + + return i; +} + +static int rbd_header_set_snap(struct rbd_device *dev, + const char *snap_name, + u64 *size) +{ + struct rbd_image_header *header = &dev->header; + struct ceph_snap_context *snapc = header->snapc; + int ret = -ENOENT; + + down_write(&header->snap_rwsem); + + if (!snap_name || + !*snap_name || + strcmp(snap_name, "-") == 0 || + strcmp(snap_name, RBD_SNAP_HEAD_NAME) == 0) { + if (header->total_snaps) + snapc->seq = header->snap_seq; + else + snapc->seq = 0; + dev->cur_snap = 0; + dev->read_only = 0; + if (size) + *size = header->image_size; + } else { + ret = snap_by_name(header, snap_name, &snapc->seq, size); + if (ret < 0) + goto done; + + dev->cur_snap = header->total_snaps - ret; + dev->read_only = 1; + } + + ret = 0; +done: + up_write(&header->snap_rwsem); + return ret; +} + +static void rbd_header_free(struct rbd_image_header *header) +{ + kfree(header->snapc); + kfree(header->snap_names); + kfree(header->snap_sizes); +} + +/* + * get the actual striped segment name, offset and length + */ +static u64 rbd_get_segment(struct rbd_image_header *header, + const char *block_name, + u64 ofs, u64 len, + char *seg_name, u64 *segofs) +{ + u64 seg = ofs >> header->obj_order; + + if (seg_name) + snprintf(seg_name, RBD_MAX_SEG_NAME_LEN, + "%s.%012llx", block_name, seg); + + ofs = ofs & ((1 << header->obj_order) - 1); + len = min_t(u64, len, (1 << header->obj_order) - ofs); + + if (segofs) + *segofs = ofs; + + return len; +} + +/* + * bio helpers + */ + +static void bio_chain_put(struct bio *chain) +{ + struct bio *tmp; + + while (chain) { + tmp = chain; + chain = chain->bi_next; + bio_put(tmp); + } +} + +/* + * zeros a bio chain, starting at specific offset + */ +static void zero_bio_chain(struct bio *chain, int start_ofs) +{ + struct bio_vec *bv; + unsigned long flags; + void *buf; + int i; + int pos = 0; + + while (chain) { + bio_for_each_segment(bv, chain, i) { + if (pos + bv->bv_len > start_ofs) { + int remainder = max(start_ofs - pos, 0); + buf = bvec_kmap_irq(bv, &flags); + memset(buf + remainder, 0, + bv->bv_len - remainder); + bvec_kunmap_irq(buf, &flags); + } + pos += bv->bv_len; + } + + chain = chain->bi_next; + } +} + +/* + * bio_chain_clone - clone a chain of bios up to a certain length. + * might return a bio_pair that will need to be released. + */ +static struct bio *bio_chain_clone(struct bio **old, struct bio **next, + struct bio_pair **bp, + int len, gfp_t gfpmask) +{ + struct bio *tmp, *old_chain = *old, *new_chain = NULL, *tail = NULL; + int total = 0; + + if (*bp) { + bio_pair_release(*bp); + *bp = NULL; + } + + while (old_chain && (total < len)) { + tmp = bio_kmalloc(gfpmask, old_chain->bi_max_vecs); + if (!tmp) + goto err_out; + + if (total + old_chain->bi_size > len) { + struct bio_pair *bp; + + /* + * this split can only happen with a single paged bio, + * split_bio will BUG_ON if this is not the case + */ + dout("bio_chain_clone split! total=%d remaining=%d" + "bi_size=%d\n", + (int)total, (int)len-total, + (int)old_chain->bi_size); + + /* split the bio. We'll release it either in the next + call, or it will have to be released outside */ + bp = bio_split(old_chain, (len - total) / 512ULL); + if (!bp) + goto err_out; + + __bio_clone(tmp, &bp->bio1); + + *next = &bp->bio2; + } else { + __bio_clone(tmp, old_chain); + *next = old_chain->bi_next; + } + + tmp->bi_bdev = NULL; + gfpmask &= ~__GFP_WAIT; + tmp->bi_next = NULL; + + if (!new_chain) { + new_chain = tail = tmp; + } else { + tail->bi_next = tmp; + tail = tmp; + } + old_chain = old_chain->bi_next; + + total += tmp->bi_size; + } + + BUG_ON(total < len); + + if (tail) + tail->bi_next = NULL; + + *old = old_chain; + + return new_chain; + +err_out: + dout("bio_chain_clone with err\n"); + bio_chain_put(new_chain); + return NULL; +} + +/* + * helpers for osd request op vectors. + */ +static int rbd_create_rw_ops(struct ceph_osd_req_op **ops, + int num_ops, + int opcode, + u32 payload_len) +{ + *ops = kzalloc(sizeof(struct ceph_osd_req_op) * (num_ops + 1), + GFP_NOIO); + if (!*ops) + return -ENOMEM; + (*ops)[0].op = opcode; + /* + * op extent offset and length will be set later on + * in calc_raw_layout() + */ + (*ops)[0].payload_len = payload_len; + return 0; +} + +static void rbd_destroy_ops(struct ceph_osd_req_op *ops) +{ + kfree(ops); +} + +/* + * Send ceph osd request + */ +static int rbd_do_request(struct request *rq, + struct rbd_device *dev, + struct ceph_snap_context *snapc, + u64 snapid, + const char *obj, u64 ofs, u64 len, + struct bio *bio, + struct page **pages, + int num_pages, + int flags, + struct ceph_osd_req_op *ops, + int num_reply, + void (*rbd_cb)(struct ceph_osd_request *req, + struct ceph_msg *msg)) +{ + struct ceph_osd_request *req; + struct ceph_file_layout *layout; + int ret; + u64 bno; + struct timespec mtime = CURRENT_TIME; + struct rbd_request *req_data; + struct ceph_osd_request_head *reqhead; + struct rbd_image_header *header = &dev->header; + + ret = -ENOMEM; + req_data = kzalloc(sizeof(*req_data), GFP_NOIO); + if (!req_data) + goto done; + + dout("rbd_do_request len=%lld ofs=%lld\n", len, ofs); + + down_read(&header->snap_rwsem); + + req = ceph_osdc_alloc_request(&dev->client->osdc, flags, + snapc, + ops, + false, + GFP_NOIO, pages, bio); + if (IS_ERR(req)) { + up_read(&header->snap_rwsem); + ret = PTR_ERR(req); + goto done_pages; + } + + req->r_callback = rbd_cb; + + req_data->rq = rq; + req_data->bio = bio; + req_data->pages = pages; + req_data->len = len; + + req->r_priv = req_data; + + reqhead = req->r_request->front.iov_base; + reqhead->snapid = cpu_to_le64(CEPH_NOSNAP); + + strncpy(req->r_oid, obj, sizeof(req->r_oid)); + req->r_oid_len = strlen(req->r_oid); + + layout = &req->r_file_layout; + memset(layout, 0, sizeof(*layout)); + layout->fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); + layout->fl_stripe_count = cpu_to_le32(1); + layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); + layout->fl_pg_preferred = cpu_to_le32(-1); + layout->fl_pg_pool = cpu_to_le32(dev->poolid); + ceph_calc_raw_layout(&dev->client->osdc, layout, snapid, + ofs, &len, &bno, req, ops); + + ceph_osdc_build_request(req, ofs, &len, + ops, + snapc, + &mtime, + req->r_oid, req->r_oid_len); + up_read(&header->snap_rwsem); + + ret = ceph_osdc_start_request(&dev->client->osdc, req, false); + if (ret < 0) + goto done_err; + + if (!rbd_cb) { + ret = ceph_osdc_wait_request(&dev->client->osdc, req); + ceph_osdc_put_request(req); + } + return ret; + +done_err: + bio_chain_put(req_data->bio); + ceph_osdc_put_request(req); +done_pages: + kfree(req_data); +done: + if (rq) + blk_end_request(rq, ret, len); + return ret; +} + +/* + * Ceph osd op callback + */ +static void rbd_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg) +{ + struct rbd_request *req_data = req->r_priv; + struct ceph_osd_reply_head *replyhead; + struct ceph_osd_op *op; + __s32 rc; + u64 bytes; + int read_op; + + /* parse reply */ + replyhead = msg->front.iov_base; + WARN_ON(le32_to_cpu(replyhead->num_ops) == 0); + op = (void *)(replyhead + 1); + rc = le32_to_cpu(replyhead->result); + bytes = le64_to_cpu(op->extent.length); + read_op = (le32_to_cpu(op->op) == CEPH_OSD_OP_READ); + + dout("rbd_req_cb bytes=%lld readop=%d rc=%d\n", bytes, read_op, rc); + + if (rc == -ENOENT && read_op) { + zero_bio_chain(req_data->bio, 0); + rc = 0; + } else if (rc == 0 && read_op && bytes < req_data->len) { + zero_bio_chain(req_data->bio, bytes); + bytes = req_data->len; + } + + blk_end_request(req_data->rq, rc, bytes); + + if (req_data->bio) + bio_chain_put(req_data->bio); + + ceph_osdc_put_request(req); + kfree(req_data); +} + +/* + * Do a synchronous ceph osd operation + */ +static int rbd_req_sync_op(struct rbd_device *dev, + struct ceph_snap_context *snapc, + u64 snapid, + int opcode, + int flags, + struct ceph_osd_req_op *orig_ops, + int num_reply, + const char *obj, + u64 ofs, u64 len, + char *buf) +{ + int ret; + struct page **pages; + int num_pages; + struct ceph_osd_req_op *ops = orig_ops; + u32 payload_len; + + num_pages = calc_pages_for(ofs , len); + pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL); + if (IS_ERR(pages)) + return PTR_ERR(pages); + + if (!orig_ops) { + payload_len = (flags & CEPH_OSD_FLAG_WRITE ? len : 0); + ret = rbd_create_rw_ops(&ops, 1, opcode, payload_len); + if (ret < 0) + goto done; + + if ((flags & CEPH_OSD_FLAG_WRITE) && buf) { + ret = ceph_copy_to_page_vector(pages, buf, ofs, len); + if (ret < 0) + goto done_ops; + } + } + + ret = rbd_do_request(NULL, dev, snapc, snapid, + obj, ofs, len, NULL, + pages, num_pages, + flags, + ops, + 2, + NULL); + if (ret < 0) + goto done_ops; + + if ((flags & CEPH_OSD_FLAG_READ) && buf) + ret = ceph_copy_from_page_vector(pages, buf, ofs, ret); + +done_ops: + if (!orig_ops) + rbd_destroy_ops(ops); +done: + ceph_release_page_vector(pages, num_pages); + return ret; +} + +/* + * Do an asynchronous ceph osd operation + */ +static int rbd_do_op(struct request *rq, + struct rbd_device *rbd_dev , + struct ceph_snap_context *snapc, + u64 snapid, + int opcode, int flags, int num_reply, + u64 ofs, u64 len, + struct bio *bio) +{ + char *seg_name; + u64 seg_ofs; + u64 seg_len; + int ret; + struct ceph_osd_req_op *ops; + u32 payload_len; + + seg_name = kmalloc(RBD_MAX_SEG_NAME_LEN + 1, GFP_NOIO); + if (!seg_name) + return -ENOMEM; + + seg_len = rbd_get_segment(&rbd_dev->header, + rbd_dev->header.block_name, + ofs, len, + seg_name, &seg_ofs); + + payload_len = (flags & CEPH_OSD_FLAG_WRITE ? seg_len : 0); + + ret = rbd_create_rw_ops(&ops, 1, opcode, payload_len); + if (ret < 0) + goto done; + + /* we've taken care of segment sizes earlier when we + cloned the bios. We should never have a segment + truncated at this point */ + BUG_ON(seg_len < len); + + ret = rbd_do_request(rq, rbd_dev, snapc, snapid, + seg_name, seg_ofs, seg_len, + bio, + NULL, 0, + flags, + ops, + num_reply, + rbd_req_cb); +done: + kfree(seg_name); + return ret; +} + +/* + * Request async osd write + */ +static int rbd_req_write(struct request *rq, + struct rbd_device *rbd_dev, + struct ceph_snap_context *snapc, + u64 ofs, u64 len, + struct bio *bio) +{ + return rbd_do_op(rq, rbd_dev, snapc, CEPH_NOSNAP, + CEPH_OSD_OP_WRITE, + CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, + 2, + ofs, len, bio); +} + +/* + * Request async osd read + */ +static int rbd_req_read(struct request *rq, + struct rbd_device *rbd_dev, + u64 snapid, + u64 ofs, u64 len, + struct bio *bio) +{ + return rbd_do_op(rq, rbd_dev, NULL, + (snapid ? snapid : CEPH_NOSNAP), + CEPH_OSD_OP_READ, + CEPH_OSD_FLAG_READ, + 2, + ofs, len, bio); +} + +/* + * Request sync osd read + */ +static int rbd_req_sync_read(struct rbd_device *dev, + struct ceph_snap_context *snapc, + u64 snapid, + const char *obj, + u64 ofs, u64 len, + char *buf) +{ + return rbd_req_sync_op(dev, NULL, + (snapid ? snapid : CEPH_NOSNAP), + CEPH_OSD_OP_READ, + CEPH_OSD_FLAG_READ, + NULL, + 1, obj, ofs, len, buf); +} + +/* + * Request sync osd read + */ +static int rbd_req_sync_rollback_obj(struct rbd_device *dev, + u64 snapid, + const char *obj) +{ + struct ceph_osd_req_op *ops; + int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_ROLLBACK, 0); + if (ret < 0) + return ret; + + ops[0].snap.snapid = snapid; + + ret = rbd_req_sync_op(dev, NULL, + CEPH_NOSNAP, + 0, + CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, + ops, + 1, obj, 0, 0, NULL); + + rbd_destroy_ops(ops); + + if (ret < 0) + return ret; + + return ret; +} + +/* + * Request sync osd read + */ +static int rbd_req_sync_exec(struct rbd_device *dev, + const char *obj, + const char *cls, + const char *method, + const char *data, + int len) +{ + struct ceph_osd_req_op *ops; + int cls_len = strlen(cls); + int method_len = strlen(method); + int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_CALL, + cls_len + method_len + len); + if (ret < 0) + return ret; + + ops[0].cls.class_name = cls; + ops[0].cls.class_len = (__u8)cls_len; + ops[0].cls.method_name = method; + ops[0].cls.method_len = (__u8)method_len; + ops[0].cls.argc = 0; + ops[0].cls.indata = data; + ops[0].cls.indata_len = len; + + ret = rbd_req_sync_op(dev, NULL, + CEPH_NOSNAP, + 0, + CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, + ops, + 1, obj, 0, 0, NULL); + + rbd_destroy_ops(ops); + + dout("cls_exec returned %d\n", ret); + return ret; +} + +/* + * block device queue callback + */ +static void rbd_rq_fn(struct request_queue *q) +{ + struct rbd_device *rbd_dev = q->queuedata; + struct request *rq; + struct bio_pair *bp = NULL; + + rq = blk_fetch_request(q); + + while (1) { + struct bio *bio; + struct bio *rq_bio, *next_bio = NULL; + bool do_write; + int size, op_size = 0; + u64 ofs; + + /* peek at request from block layer */ + if (!rq) + break; + + dout("fetched request\n"); + + /* filter out block requests we don't understand */ + if ((rq->cmd_type != REQ_TYPE_FS)) { + __blk_end_request_all(rq, 0); + goto next; + } + + /* deduce our operation (read, write) */ + do_write = (rq_data_dir(rq) == WRITE); + + size = blk_rq_bytes(rq); + ofs = blk_rq_pos(rq) * 512ULL; + rq_bio = rq->bio; + if (do_write && rbd_dev->read_only) { + __blk_end_request_all(rq, -EROFS); + goto next; + } + + spin_unlock_irq(q->queue_lock); + + dout("%s 0x%x bytes at 0x%llx\n", + do_write ? "write" : "read", + size, blk_rq_pos(rq) * 512ULL); + + do { + /* a bio clone to be passed down to OSD req */ + dout("rq->bio->bi_vcnt=%d\n", rq->bio->bi_vcnt); + op_size = rbd_get_segment(&rbd_dev->header, + rbd_dev->header.block_name, + ofs, size, + NULL, NULL); + bio = bio_chain_clone(&rq_bio, &next_bio, &bp, + op_size, GFP_ATOMIC); + if (!bio) { + spin_lock_irq(q->queue_lock); + __blk_end_request_all(rq, -ENOMEM); + goto next; + } + + /* init OSD command: write or read */ + if (do_write) + rbd_req_write(rq, rbd_dev, + rbd_dev->header.snapc, + ofs, + op_size, bio); + else + rbd_req_read(rq, rbd_dev, + cur_snap_id(rbd_dev), + ofs, + op_size, bio); + + size -= op_size; + ofs += op_size; + + rq_bio = next_bio; + } while (size > 0); + + if (bp) + bio_pair_release(bp); + + spin_lock_irq(q->queue_lock); +next: + rq = blk_fetch_request(q); + } +} + +/* + * a queue callback. Makes sure that we don't create a bio that spans across + * multiple osd objects. One exception would be with a single page bios, + * which we handle later at bio_chain_clone + */ +static int rbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bmd, + struct bio_vec *bvec) +{ + struct rbd_device *rbd_dev = q->queuedata; + unsigned int chunk_sectors = 1 << (rbd_dev->header.obj_order - 9); + sector_t sector = bmd->bi_sector + get_start_sect(bmd->bi_bdev); + unsigned int bio_sectors = bmd->bi_size >> 9; + int max; + + max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + + bio_sectors)) << 9; + if (max < 0) + max = 0; /* bio_add cannot handle a negative return */ + if (max <= bvec->bv_len && bio_sectors == 0) + return bvec->bv_len; + return max; +} + +static void rbd_free_disk(struct rbd_device *rbd_dev) +{ + struct gendisk *disk = rbd_dev->disk; + + if (!disk) + return; + + rbd_header_free(&rbd_dev->header); + + if (disk->flags & GENHD_FL_UP) + del_gendisk(disk); + if (disk->queue) + blk_cleanup_queue(disk->queue); + put_disk(disk); +} + +/* + * reload the ondisk the header + */ +static int rbd_read_header(struct rbd_device *rbd_dev, + struct rbd_image_header *header) +{ + ssize_t rc; + struct rbd_image_header_ondisk *dh; + int snap_count = 0; + u64 snap_names_len = 0; + + while (1) { + int len = sizeof(*dh) + + snap_count * sizeof(struct rbd_image_snap_ondisk) + + snap_names_len; + + rc = -ENOMEM; + dh = kmalloc(len, GFP_KERNEL); + if (!dh) + return -ENOMEM; + + rc = rbd_req_sync_read(rbd_dev, + NULL, CEPH_NOSNAP, + rbd_dev->obj_md_name, + 0, len, + (char *)dh); + if (rc < 0) + goto out_dh; + + rc = rbd_header_from_disk(header, dh, snap_count, GFP_KERNEL); + if (rc < 0) + goto out_dh; + + if (snap_count != header->total_snaps) { + snap_count = header->total_snaps; + snap_names_len = header->snap_names_len; + rbd_header_free(header); + kfree(dh); + continue; + } + break; + } + +out_dh: + kfree(dh); + return rc; +} + +/* + * create a snapshot + */ +static int rbd_header_add_snap(struct rbd_device *dev, + const char *snap_name, + gfp_t gfp_flags) +{ + int name_len = strlen(snap_name); + u64 new_snapid; + int ret; + void *data, *data_start, *data_end; + + /* we should create a snapshot only if we're pointing at the head */ + if (dev->cur_snap) + return -EINVAL; + + ret = ceph_monc_create_snapid(&dev->client->monc, dev->poolid, + &new_snapid); + dout("created snapid=%lld\n", new_snapid); + if (ret < 0) + return ret; + + data = kmalloc(name_len + 16, gfp_flags); + if (!data) + return -ENOMEM; + + data_start = data; + data_end = data + name_len + 16; + + ceph_encode_string_safe(&data, data_end, snap_name, name_len, bad); + ceph_encode_64_safe(&data, data_end, new_snapid, bad); + + ret = rbd_req_sync_exec(dev, dev->obj_md_name, "rbd", "snap_add", + data_start, data - data_start); + + kfree(data_start); + + if (ret < 0) + return ret; + + dev->header.snapc->seq = new_snapid; + + return 0; +bad: + return -ERANGE; +} + +/* + * only read the first part of the ondisk header, without the snaps info + */ +static int rbd_update_snaps(struct rbd_device *rbd_dev) +{ + int ret; + struct rbd_image_header h; + u64 snap_seq; + + ret = rbd_read_header(rbd_dev, &h); + if (ret < 0) + return ret; + + down_write(&rbd_dev->header.snap_rwsem); + + snap_seq = rbd_dev->header.snapc->seq; + + kfree(rbd_dev->header.snapc); + kfree(rbd_dev->header.snap_names); + kfree(rbd_dev->header.snap_sizes); + + rbd_dev->header.total_snaps = h.total_snaps; + rbd_dev->header.snapc = h.snapc; + rbd_dev->header.snap_names = h.snap_names; + rbd_dev->header.snap_sizes = h.snap_sizes; + rbd_dev->header.snapc->seq = snap_seq; + + up_write(&rbd_dev->header.snap_rwsem); + + return 0; +} + +static int rbd_init_disk(struct rbd_device *rbd_dev) +{ + struct gendisk *disk; + struct request_queue *q; + int rc; + u64 total_size = 0; + + /* contact OSD, request size info about the object being mapped */ + rc = rbd_read_header(rbd_dev, &rbd_dev->header); + if (rc) + return rc; + + rc = rbd_header_set_snap(rbd_dev, rbd_dev->snap_name, &total_size); + if (rc) + return rc; + + /* create gendisk info */ + rc = -ENOMEM; + disk = alloc_disk(RBD_MINORS_PER_MAJOR); + if (!disk) + goto out; + + sprintf(disk->disk_name, DRV_NAME "%d", rbd_dev->id); + disk->major = rbd_dev->major; + disk->first_minor = 0; + disk->fops = &rbd_bd_ops; + disk->private_data = rbd_dev; + + /* init rq */ + rc = -ENOMEM; + q = blk_init_queue(rbd_rq_fn, &rbd_dev->lock); + if (!q) + goto out_disk; + blk_queue_merge_bvec(q, rbd_merge_bvec); + disk->queue = q; + + q->queuedata = rbd_dev; + + rbd_dev->disk = disk; + rbd_dev->q = q; + + /* finally, announce the disk to the world */ + set_capacity(disk, total_size / 512ULL); + add_disk(disk); + + pr_info("%s: added with size 0x%llx\n", + disk->disk_name, (unsigned long long)total_size); + return 0; + +out_disk: + put_disk(disk); +out: + return rc; +} + +/******************************************************************** + * /sys/class/rbd/ + * add map rados objects to blkdev + * remove unmap rados objects + * list show mappings + *******************************************************************/ + +static void class_rbd_release(struct class *cls) +{ + kfree(cls); +} + +static ssize_t class_rbd_list(struct class *c, + struct class_attribute *attr, + char *data) +{ + int n = 0; + struct list_head *tmp; + int max = PAGE_SIZE; + + mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); + + n += snprintf(data, max, + "#id\tmajor\tclient_name\tpool\tname\tsnap\tKB\n"); + + list_for_each(tmp, &rbd_dev_list) { + struct rbd_device *rbd_dev; + + rbd_dev = list_entry(tmp, struct rbd_device, node); + n += snprintf(data+n, max-n, + "%d\t%d\tclient%lld\t%s\t%s\t%s\t%lld\n", + rbd_dev->id, + rbd_dev->major, + ceph_client_id(rbd_dev->client), + rbd_dev->pool_name, + rbd_dev->obj, rbd_dev->snap_name, + rbd_dev->header.image_size >> 10); + if (n == max) + break; + } + + mutex_unlock(&ctl_mutex); + return n; +} + +static ssize_t class_rbd_add(struct class *c, + struct class_attribute *attr, + const char *buf, size_t count) +{ + struct ceph_osd_client *osdc; + struct rbd_device *rbd_dev; + ssize_t rc = -ENOMEM; + int irc, new_id = 0; + struct list_head *tmp; + char *mon_dev_name; + char *options; + + if (!try_module_get(THIS_MODULE)) + return -ENODEV; + + mon_dev_name = kmalloc(RBD_MAX_OPT_LEN, GFP_KERNEL); + if (!mon_dev_name) + goto err_out_mod; + + options = kmalloc(RBD_MAX_OPT_LEN, GFP_KERNEL); + if (!options) + goto err_mon_dev; + + /* new rbd_device object */ + rbd_dev = kzalloc(sizeof(*rbd_dev), GFP_KERNEL); + if (!rbd_dev) + goto err_out_opt; + + /* static rbd_device initialization */ + spin_lock_init(&rbd_dev->lock); + INIT_LIST_HEAD(&rbd_dev->node); + + /* generate unique id: find highest unique id, add one */ + mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); + + list_for_each(tmp, &rbd_dev_list) { + struct rbd_device *rbd_dev; + + rbd_dev = list_entry(tmp, struct rbd_device, node); + if (rbd_dev->id >= new_id) + new_id = rbd_dev->id + 1; + } + + rbd_dev->id = new_id; + + /* add to global list */ + list_add_tail(&rbd_dev->node, &rbd_dev_list); + + /* parse add command */ + if (sscanf(buf, "%" __stringify(RBD_MAX_OPT_LEN) "s " + "%" __stringify(RBD_MAX_OPT_LEN) "s " + "%" __stringify(RBD_MAX_POOL_NAME_LEN) "s " + "%" __stringify(RBD_MAX_OBJ_NAME_LEN) "s" + "%" __stringify(RBD_MAX_SNAP_NAME_LEN) "s", + mon_dev_name, options, rbd_dev->pool_name, + rbd_dev->obj, rbd_dev->snap_name) < 4) { + rc = -EINVAL; + goto err_out_slot; + } + + if (rbd_dev->snap_name[0] == 0) + rbd_dev->snap_name[0] = '-'; + + rbd_dev->obj_len = strlen(rbd_dev->obj); + snprintf(rbd_dev->obj_md_name, sizeof(rbd_dev->obj_md_name), "%s%s", + rbd_dev->obj, RBD_SUFFIX); + + /* initialize rest of new object */ + snprintf(rbd_dev->name, DEV_NAME_LEN, DRV_NAME "%d", rbd_dev->id); + rc = rbd_get_client(rbd_dev, mon_dev_name, options); + if (rc < 0) + goto err_out_slot; + + mutex_unlock(&ctl_mutex); + + /* pick the pool */ + osdc = &rbd_dev->client->osdc; + rc = ceph_pg_poolid_by_name(osdc->osdmap, rbd_dev->pool_name); + if (rc < 0) + goto err_out_client; + rbd_dev->poolid = rc; + + /* register our block device */ + irc = register_blkdev(0, rbd_dev->name); + if (irc < 0) { + rc = irc; + goto err_out_client; + } + rbd_dev->major = irc; + + /* set up and announce blkdev mapping */ + rc = rbd_init_disk(rbd_dev); + if (rc) + goto err_out_blkdev; + + return count; + +err_out_blkdev: + unregister_blkdev(rbd_dev->major, rbd_dev->name); +err_out_client: + rbd_put_client(rbd_dev); + mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); +err_out_slot: + list_del_init(&rbd_dev->node); + mutex_unlock(&ctl_mutex); + + kfree(rbd_dev); +err_out_opt: + kfree(options); +err_mon_dev: + kfree(mon_dev_name); +err_out_mod: + dout("Error adding device %s\n", buf); + module_put(THIS_MODULE); + return rc; +} + +static struct rbd_device *__rbd_get_dev(unsigned long id) +{ + struct list_head *tmp; + struct rbd_device *rbd_dev; + + list_for_each(tmp, &rbd_dev_list) { + rbd_dev = list_entry(tmp, struct rbd_device, node); + if (rbd_dev->id == id) + return rbd_dev; + } + return NULL; +} + +static ssize_t class_rbd_remove(struct class *c, + struct class_attribute *attr, + const char *buf, + size_t count) +{ + struct rbd_device *rbd_dev = NULL; + int target_id, rc; + unsigned long ul; + + rc = strict_strtoul(buf, 10, &ul); + if (rc) + return rc; + + /* convert to int; abort if we lost anything in the conversion */ + target_id = (int) ul; + if (target_id != ul) + return -EINVAL; + + /* remove object from list immediately */ + mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); + + rbd_dev = __rbd_get_dev(target_id); + if (rbd_dev) + list_del_init(&rbd_dev->node); + + mutex_unlock(&ctl_mutex); + + if (!rbd_dev) + return -ENOENT; + + rbd_put_client(rbd_dev); + + /* clean up and free blkdev */ + rbd_free_disk(rbd_dev); + unregister_blkdev(rbd_dev->major, rbd_dev->name); + kfree(rbd_dev); + + /* release module ref */ + module_put(THIS_MODULE); + + return count; +} + +static ssize_t class_rbd_snaps_list(struct class *c, + struct class_attribute *attr, + char *data) +{ + struct rbd_device *rbd_dev = NULL; + struct list_head *tmp; + struct rbd_image_header *header; + int i, n = 0, max = PAGE_SIZE; + int ret; + + mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); + + n += snprintf(data, max, "#id\tsnap\tKB\n"); + + list_for_each(tmp, &rbd_dev_list) { + char *names, *p; + struct ceph_snap_context *snapc; + + rbd_dev = list_entry(tmp, struct rbd_device, node); + header = &rbd_dev->header; + + down_read(&header->snap_rwsem); + + names = header->snap_names; + snapc = header->snapc; + + n += snprintf(data + n, max - n, "%d\t%s\t%lld%s\n", + rbd_dev->id, RBD_SNAP_HEAD_NAME, + header->image_size >> 10, + (!rbd_dev->cur_snap ? " (*)" : "")); + if (n == max) + break; + + p = names; + for (i = 0; i < header->total_snaps; i++, p += strlen(p) + 1) { + n += snprintf(data + n, max - n, "%d\t%s\t%lld%s\n", + rbd_dev->id, p, header->snap_sizes[i] >> 10, + (rbd_dev->cur_snap && + (snap_index(header, i) == rbd_dev->cur_snap) ? + " (*)" : "")); + if (n == max) + break; + } + + up_read(&header->snap_rwsem); + } + + + ret = n; + mutex_unlock(&ctl_mutex); + return ret; +} + +static ssize_t class_rbd_snaps_refresh(struct class *c, + struct class_attribute *attr, + const char *buf, + size_t count) +{ + struct rbd_device *rbd_dev = NULL; + int target_id, rc; + unsigned long ul; + int ret = count; + + rc = strict_strtoul(buf, 10, &ul); + if (rc) + return rc; + + /* convert to int; abort if we lost anything in the conversion */ + target_id = (int) ul; + if (target_id != ul) + return -EINVAL; + + mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); + + rbd_dev = __rbd_get_dev(target_id); + if (!rbd_dev) { + ret = -ENOENT; + goto done; + } + + rc = rbd_update_snaps(rbd_dev); + if (rc < 0) + ret = rc; + +done: + mutex_unlock(&ctl_mutex); + return ret; +} + +static ssize_t class_rbd_snap_create(struct class *c, + struct class_attribute *attr, + const char *buf, + size_t count) +{ + struct rbd_device *rbd_dev = NULL; + int target_id, ret; + char *name; + + name = kmalloc(RBD_MAX_SNAP_NAME_LEN + 1, GFP_KERNEL); + if (!name) + return -ENOMEM; + + /* parse snaps add command */ + if (sscanf(buf, "%d " + "%" __stringify(RBD_MAX_SNAP_NAME_LEN) "s", + &target_id, + name) != 2) { + ret = -EINVAL; + goto done; + } + + mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); + + rbd_dev = __rbd_get_dev(target_id); + if (!rbd_dev) { + ret = -ENOENT; + goto done_unlock; + } + + ret = rbd_header_add_snap(rbd_dev, + name, GFP_KERNEL); + if (ret < 0) + goto done_unlock; + + ret = rbd_update_snaps(rbd_dev); + if (ret < 0) + goto done_unlock; + + ret = count; +done_unlock: + mutex_unlock(&ctl_mutex); +done: + kfree(name); + return ret; +} + +static ssize_t class_rbd_rollback(struct class *c, + struct class_attribute *attr, + const char *buf, + size_t count) +{ + struct rbd_device *rbd_dev = NULL; + int target_id, ret; + u64 snapid; + char snap_name[RBD_MAX_SNAP_NAME_LEN]; + u64 cur_ofs; + char *seg_name; + + /* parse snaps add command */ + if (sscanf(buf, "%d " + "%" __stringify(RBD_MAX_SNAP_NAME_LEN) "s", + &target_id, + snap_name) != 2) { + return -EINVAL; + } + + ret = -ENOMEM; + seg_name = kmalloc(RBD_MAX_SEG_NAME_LEN + 1, GFP_NOIO); + if (!seg_name) + return ret; + + mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); + + rbd_dev = __rbd_get_dev(target_id); + if (!rbd_dev) { + ret = -ENOENT; + goto done_unlock; + } + + ret = snap_by_name(&rbd_dev->header, snap_name, &snapid, NULL); + if (ret < 0) + goto done_unlock; + + dout("snapid=%lld\n", snapid); + + cur_ofs = 0; + while (cur_ofs < rbd_dev->header.image_size) { + cur_ofs += rbd_get_segment(&rbd_dev->header, + rbd_dev->obj, + cur_ofs, (u64)-1, + seg_name, NULL); + dout("seg_name=%s\n", seg_name); + + ret = rbd_req_sync_rollback_obj(rbd_dev, snapid, seg_name); + if (ret < 0) + pr_warning("could not roll back obj %s err=%d\n", + seg_name, ret); + } + + ret = rbd_update_snaps(rbd_dev); + if (ret < 0) + goto done_unlock; + + ret = count; + +done_unlock: + mutex_unlock(&ctl_mutex); + kfree(seg_name); + + return ret; +} + +static struct class_attribute class_rbd_attrs[] = { + __ATTR(add, 0200, NULL, class_rbd_add), + __ATTR(remove, 0200, NULL, class_rbd_remove), + __ATTR(list, 0444, class_rbd_list, NULL), + __ATTR(snaps_refresh, 0200, NULL, class_rbd_snaps_refresh), + __ATTR(snap_create, 0200, NULL, class_rbd_snap_create), + __ATTR(snaps_list, 0444, class_rbd_snaps_list, NULL), + __ATTR(snap_rollback, 0200, NULL, class_rbd_rollback), + __ATTR_NULL +}; + +/* + * create control files in sysfs + * /sys/class/rbd/... + */ +static int rbd_sysfs_init(void) +{ + int ret = -ENOMEM; + + class_rbd = kzalloc(sizeof(*class_rbd), GFP_KERNEL); + if (!class_rbd) + goto out; + + class_rbd->name = DRV_NAME; + class_rbd->owner = THIS_MODULE; + class_rbd->class_release = class_rbd_release; + class_rbd->class_attrs = class_rbd_attrs; + + ret = class_register(class_rbd); + if (ret) + goto out_class; + return 0; + +out_class: + kfree(class_rbd); + class_rbd = NULL; + pr_err(DRV_NAME ": failed to create class rbd\n"); +out: + return ret; +} + +static void rbd_sysfs_cleanup(void) +{ + if (class_rbd) + class_destroy(class_rbd); + class_rbd = NULL; +} + +int __init rbd_init(void) +{ + int rc; + + rc = rbd_sysfs_init(); + if (rc) + return rc; + spin_lock_init(&node_lock); + pr_info("loaded " DRV_NAME_LONG "\n"); + return 0; +} + +void __exit rbd_exit(void) +{ + rbd_sysfs_cleanup(); +} + +module_init(rbd_init); +module_exit(rbd_exit); + +MODULE_AUTHOR("Sage Weil <sage@newdream.net>"); +MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>"); +MODULE_DESCRIPTION("rados block device"); + +/* following authorship retained from original osdblk.c */ +MODULE_AUTHOR("Jeff Garzik <jeff@garzik.org>"); + +MODULE_LICENSE("GPL"); diff --git a/drivers/block/rbd_types.h b/drivers/block/rbd_types.h new file mode 100644 index 000000000000..fc6c678aa2cb --- /dev/null +++ b/drivers/block/rbd_types.h @@ -0,0 +1,73 @@ +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2004-2010 Sage Weil <sage@newdream.net> + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#ifndef CEPH_RBD_TYPES_H +#define CEPH_RBD_TYPES_H + +#include <linux/types.h> + +/* + * rbd image 'foo' consists of objects + * foo.rbd - image metadata + * foo.00000000 + * foo.00000001 + * ... - data + */ + +#define RBD_SUFFIX ".rbd" +#define RBD_DIRECTORY "rbd_directory" +#define RBD_INFO "rbd_info" + +#define RBD_DEFAULT_OBJ_ORDER 22 /* 4MB */ +#define RBD_MIN_OBJ_ORDER 16 +#define RBD_MAX_OBJ_ORDER 30 + +#define RBD_MAX_OBJ_NAME_LEN 96 +#define RBD_MAX_SEG_NAME_LEN 128 + +#define RBD_COMP_NONE 0 +#define RBD_CRYPT_NONE 0 + +#define RBD_HEADER_TEXT "<<< Rados Block Device Image >>>\n" +#define RBD_HEADER_SIGNATURE "RBD" +#define RBD_HEADER_VERSION "001.005" + +struct rbd_info { + __le64 max_id; +} __attribute__ ((packed)); + +struct rbd_image_snap_ondisk { + __le64 id; + __le64 image_size; +} __attribute__((packed)); + +struct rbd_image_header_ondisk { + char text[40]; + char block_name[24]; + char signature[4]; + char version[8]; + struct { + __u8 order; + __u8 crypt_type; + __u8 comp_type; + __u8 unused; + } __attribute__((packed)) options; + __le64 image_size; + __le64 snap_seq; + __le32 snap_count; + __le32 reserved; + __le64 snap_names_len; + struct rbd_image_snap_ondisk snaps[0]; +} __attribute__((packed)); + + +#endif diff --git a/drivers/block/swim.c b/drivers/block/swim.c index 2e46815876df..75333d0a3327 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -20,7 +20,7 @@ #include <linux/fd.h> #include <linux/slab.h> #include <linux/blkdev.h> -#include <linux/smp_lock.h> +#include <linux/mutex.h> #include <linux/hdreg.h> #include <linux/kernel.h> #include <linux/delay.h> @@ -222,6 +222,7 @@ extern int swim_read_sector_header(struct swim __iomem *base, extern int swim_read_sector_data(struct swim __iomem *base, unsigned char *data); +static DEFINE_MUTEX(swim_mutex); static inline void set_swim_mode(struct swim __iomem *base, int enable) { struct iwm __iomem *iwm_base; @@ -666,9 +667,9 @@ static int floppy_unlocked_open(struct block_device *bdev, fmode_t mode) { int ret; - lock_kernel(); + mutex_lock(&swim_mutex); ret = floppy_open(bdev, mode); - unlock_kernel(); + mutex_unlock(&swim_mutex); return ret; } @@ -678,7 +679,7 @@ static int floppy_release(struct gendisk *disk, fmode_t mode) struct floppy_state *fs = disk->private_data; struct swim __iomem *base = fs->swd->base; - lock_kernel(); + mutex_lock(&swim_mutex); if (fs->ref_count < 0) fs->ref_count = 0; else if (fs->ref_count > 0) @@ -686,7 +687,7 @@ static int floppy_release(struct gendisk *disk, fmode_t mode) if (fs->ref_count == 0) swim_motor(base, OFF); - unlock_kernel(); + mutex_unlock(&swim_mutex); return 0; } @@ -704,9 +705,9 @@ static int floppy_ioctl(struct block_device *bdev, fmode_t mode, case FDEJECT: if (fs->ref_count != 1) return -EBUSY; - lock_kernel(); + mutex_lock(&swim_mutex); err = floppy_eject(fs); - unlock_kernel(); + mutex_unlock(&swim_mutex); return err; case FDGETPRM: diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index cc6a3864822c..bf3a5b859299 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -25,7 +25,7 @@ #include <linux/ioctl.h> #include <linux/blkdev.h> #include <linux/interrupt.h> -#include <linux/smp_lock.h> +#include <linux/mutex.h> #include <linux/module.h> #include <linux/spinlock.h> #include <asm/io.h> @@ -36,6 +36,7 @@ #include <asm/machdep.h> #include <asm/pmac_feature.h> +static DEFINE_MUTEX(swim3_mutex); static struct request_queue *swim3_queue; static struct gendisk *disks[2]; static struct request *fd_req; @@ -873,9 +874,9 @@ static int floppy_ioctl(struct block_device *bdev, fmode_t mode, { int ret; - lock_kernel(); + mutex_lock(&swim3_mutex); ret = floppy_locked_ioctl(bdev, mode, cmd, param); - unlock_kernel(); + mutex_unlock(&swim3_mutex); return ret; } @@ -953,9 +954,9 @@ static int floppy_unlocked_open(struct block_device *bdev, fmode_t mode) { int ret; - lock_kernel(); + mutex_lock(&swim3_mutex); ret = floppy_open(bdev, mode); - unlock_kernel(); + mutex_unlock(&swim3_mutex); return ret; } @@ -964,13 +965,13 @@ static int floppy_release(struct gendisk *disk, fmode_t mode) { struct floppy_state *fs = disk->private_data; struct swim3 __iomem *sw = fs->swim3; - lock_kernel(); + mutex_lock(&swim3_mutex); if (fs->ref_count > 0 && --fs->ref_count == 0) { swim3_action(fs, MOTOR_OFF); out_8(&sw->control_bic, 0xff); swim3_select(fs, RELAX); } - unlock_kernel(); + mutex_unlock(&swim3_mutex); return 0; } diff --git a/drivers/block/ub.c b/drivers/block/ub.c index c48e14878582..b5690a045a01 100644 --- a/drivers/block/ub.c +++ b/drivers/block/ub.c @@ -28,7 +28,7 @@ #include <linux/timer.h> #include <linux/scatterlist.h> #include <linux/slab.h> -#include <linux/smp_lock.h> +#include <linux/mutex.h> #include <scsi/scsi.h> #define DRV_NAME "ub" @@ -248,6 +248,7 @@ struct ub_completion { spinlock_t lock; }; +static DEFINE_MUTEX(ub_mutex); static inline void ub_init_completion(struct ub_completion *x) { x->done = 0; @@ -1715,9 +1716,9 @@ static int ub_bd_unlocked_open(struct block_device *bdev, fmode_t mode) { int ret; - lock_kernel(); + mutex_lock(&ub_mutex); ret = ub_bd_open(bdev, mode); - unlock_kernel(); + mutex_unlock(&ub_mutex); return ret; } @@ -1730,9 +1731,9 @@ static int ub_bd_release(struct gendisk *disk, fmode_t mode) struct ub_lun *lun = disk->private_data; struct ub_dev *sc = lun->udev; - lock_kernel(); + mutex_lock(&ub_mutex); ub_put(sc); - unlock_kernel(); + mutex_unlock(&ub_mutex); return 0; } @@ -1747,9 +1748,9 @@ static int ub_bd_ioctl(struct block_device *bdev, fmode_t mode, void __user *usermem = (void __user *) arg; int ret; - lock_kernel(); + mutex_lock(&ub_mutex); ret = scsi_cmd_ioctl(disk->queue, disk, mode, cmd, usermem); - unlock_kernel(); + mutex_unlock(&ub_mutex); return ret; } diff --git a/drivers/block/viodasd.c b/drivers/block/viodasd.c index f651e51a3319..e2ff697697c2 100644 --- a/drivers/block/viodasd.c +++ b/drivers/block/viodasd.c @@ -41,7 +41,7 @@ #include <linux/errno.h> #include <linux/init.h> #include <linux/string.h> -#include <linux/smp_lock.h> +#include <linux/mutex.h> #include <linux/dma-mapping.h> #include <linux/completion.h> #include <linux/device.h> @@ -73,6 +73,7 @@ enum { MAX_DISK_NAME = FIELD_SIZEOF(struct gendisk, disk_name) }; +static DEFINE_MUTEX(viodasd_mutex); static DEFINE_SPINLOCK(viodasd_spinlock); #define VIOMAXREQ 16 @@ -180,9 +181,9 @@ static int viodasd_unlocked_open(struct block_device *bdev, fmode_t mode) { int ret; - lock_kernel(); + mutex_lock(&viodasd_mutex); ret = viodasd_open(bdev, mode); - unlock_kernel(); + mutex_unlock(&viodasd_mutex); return ret; } @@ -196,7 +197,7 @@ static int viodasd_release(struct gendisk *disk, fmode_t mode) struct viodasd_device *d = disk->private_data; HvLpEvent_Rc hvrc; - lock_kernel(); + mutex_lock(&viodasd_mutex); /* Send the event to OS/400. We DON'T expect a response */ hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp, HvLpEvent_Type_VirtualIo, @@ -210,7 +211,7 @@ static int viodasd_release(struct gendisk *disk, fmode_t mode) if (hvrc != 0) pr_warning("HV close call failed %d\n", (int)hvrc); - unlock_kernel(); + mutex_unlock(&viodasd_mutex); return 0; } diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 2aafafca2b13..8320490226b7 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -2,7 +2,6 @@ #include <linux/spinlock.h> #include <linux/slab.h> #include <linux/blkdev.h> -#include <linux/smp_lock.h> #include <linux/hdreg.h> #include <linux/virtio.h> #include <linux/virtio_blk.h> @@ -202,6 +201,7 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str) struct virtio_blk *vblk = disk->private_data; struct request *req; struct bio *bio; + int err; bio = bio_map_kern(vblk->disk->queue, id_str, VIRTIO_BLK_ID_BYTES, GFP_KERNEL); @@ -215,11 +215,14 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str) } req->cmd_type = REQ_TYPE_SPECIAL; - return blk_execute_rq(vblk->disk->queue, vblk->disk, req, false); + err = blk_execute_rq(vblk->disk->queue, vblk->disk, req, false); + blk_put_request(req); + + return err; } -static int virtblk_locked_ioctl(struct block_device *bdev, fmode_t mode, - unsigned cmd, unsigned long data) +static int virtblk_ioctl(struct block_device *bdev, fmode_t mode, + unsigned int cmd, unsigned long data) { struct gendisk *disk = bdev->bd_disk; struct virtio_blk *vblk = disk->private_data; @@ -234,18 +237,6 @@ static int virtblk_locked_ioctl(struct block_device *bdev, fmode_t mode, (void __user *)data); } -static int virtblk_ioctl(struct block_device *bdev, fmode_t mode, - unsigned int cmd, unsigned long param) -{ - int ret; - - lock_kernel(); - ret = virtblk_locked_ioctl(bdev, mode, cmd, param); - unlock_kernel(); - - return ret; -} - /* We provide getgeo only to please some old bootloader/partitioning tools */ static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo) { diff --git a/drivers/block/xd.c b/drivers/block/xd.c index d5a3cd750561..4abd2bcd20fb 100644 --- a/drivers/block/xd.c +++ b/drivers/block/xd.c @@ -46,7 +46,7 @@ #include <linux/init.h> #include <linux/wait.h> #include <linux/blkdev.h> -#include <linux/smp_lock.h> +#include <linux/mutex.h> #include <linux/blkpg.h> #include <linux/delay.h> #include <linux/io.h> @@ -58,6 +58,7 @@ #include "xd.h" +static DEFINE_MUTEX(xd_mutex); static void __init do_xd_setup (int *integers); #ifdef MODULE static int xd[5] = { -1,-1,-1,-1, }; @@ -381,9 +382,9 @@ static int xd_ioctl(struct block_device *bdev, fmode_t mode, { int ret; - lock_kernel(); + mutex_lock(&xd_mutex); ret = xd_locked_ioctl(bdev, mode, cmd, param); - unlock_kernel(); + mutex_unlock(&xd_mutex); return ret; } diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index ac1b682edecb..3ff06f475eef 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -41,7 +41,7 @@ #include <linux/cdrom.h> #include <linux/module.h> #include <linux/slab.h> -#include <linux/smp_lock.h> +#include <linux/mutex.h> #include <linux/scatterlist.h> #include <xen/xen.h> @@ -69,6 +69,7 @@ struct blk_shadow { unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST]; }; +static DEFINE_MUTEX(blkfront_mutex); static const struct block_device_operations xlvbd_block_fops; #define BLK_RING_SIZE __RING_SIZE((struct blkif_sring *)0, PAGE_SIZE) @@ -834,7 +835,7 @@ static int blkfront_probe(struct xenbus_device *dev, char *type; int len; /* no unplug has been done: do not hook devices != xen vbds */ - if (xen_platform_pci_unplug & XEN_UNPLUG_IGNORE) { + if (xen_platform_pci_unplug & XEN_UNPLUG_UNNECESSARY) { int major; if (!VDEV_IS_EXTENDED(vdevice)) @@ -1201,7 +1202,7 @@ static int blkif_open(struct block_device *bdev, fmode_t mode) struct blkfront_info *info; int err = 0; - lock_kernel(); + mutex_lock(&blkfront_mutex); info = disk->private_data; if (!info) { @@ -1219,7 +1220,7 @@ static int blkif_open(struct block_device *bdev, fmode_t mode) mutex_unlock(&info->mutex); out: - unlock_kernel(); + mutex_unlock(&blkfront_mutex); return err; } @@ -1229,7 +1230,7 @@ static int blkif_release(struct gendisk *disk, fmode_t mode) struct block_device *bdev; struct xenbus_device *xbdev; - lock_kernel(); + mutex_lock(&blkfront_mutex); bdev = bdget_disk(disk, 0); bdput(bdev); @@ -1263,7 +1264,7 @@ static int blkif_release(struct gendisk *disk, fmode_t mode) } out: - unlock_kernel(); + mutex_unlock(&blkfront_mutex); return 0; } diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c index 057413bb16e2..6e968cd4893c 100644 --- a/drivers/block/xsysace.c +++ b/drivers/block/xsysace.c @@ -89,7 +89,7 @@ #include <linux/delay.h> #include <linux/slab.h> #include <linux/blkdev.h> -#include <linux/smp_lock.h> +#include <linux/mutex.h> #include <linux/ata.h> #include <linux/hdreg.h> #include <linux/platform_device.h> @@ -214,6 +214,7 @@ struct ace_device { u16 cf_id[ATA_ID_WORDS]; }; +static DEFINE_MUTEX(xsysace_mutex); static int ace_major; /* --------------------------------------------------------------------- @@ -903,13 +904,13 @@ static int ace_open(struct block_device *bdev, fmode_t mode) dev_dbg(ace->dev, "ace_open() users=%i\n", ace->users + 1); - lock_kernel(); + mutex_lock(&xsysace_mutex); spin_lock_irqsave(&ace->lock, flags); ace->users++; spin_unlock_irqrestore(&ace->lock, flags); check_disk_change(bdev); - unlock_kernel(); + mutex_unlock(&xsysace_mutex); return 0; } @@ -922,7 +923,7 @@ static int ace_release(struct gendisk *disk, fmode_t mode) dev_dbg(ace->dev, "ace_release() users=%i\n", ace->users - 1); - lock_kernel(); + mutex_lock(&xsysace_mutex); spin_lock_irqsave(&ace->lock, flags); ace->users--; if (ace->users == 0) { @@ -930,7 +931,7 @@ static int ace_release(struct gendisk *disk, fmode_t mode) ace_out(ace, ACE_CTRL, val & ~ACE_CTRL_LOCKREQ); } spin_unlock_irqrestore(&ace->lock, flags); - unlock_kernel(); + mutex_unlock(&xsysace_mutex); return 0; } diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c index d75b2bb601ad..dcd4cfcf4126 100644 --- a/drivers/block/z2ram.c +++ b/drivers/block/z2ram.c @@ -33,7 +33,7 @@ #include <linux/module.h> #include <linux/blkdev.h> #include <linux/bitops.h> -#include <linux/smp_lock.h> +#include <linux/mutex.h> #include <linux/slab.h> #include <asm/setup.h> @@ -57,6 +57,7 @@ extern struct mem_info m68k_memory[NUM_MEMINFO]; #define Z2RAM_CHUNK1024 ( Z2RAM_CHUNKSIZE >> 10 ) +static DEFINE_MUTEX(z2ram_mutex); static u_long *z2ram_map = NULL; static u_long z2ram_size = 0; static int z2_count = 0; @@ -154,7 +155,7 @@ static int z2_open(struct block_device *bdev, fmode_t mode) device = MINOR(bdev->bd_dev); - lock_kernel(); + mutex_lock(&z2ram_mutex); if ( current_device != -1 && current_device != device ) { rc = -EBUSY; @@ -296,25 +297,25 @@ static int z2_open(struct block_device *bdev, fmode_t mode) set_capacity(z2ram_gendisk, z2ram_size >> 9); } - unlock_kernel(); + mutex_unlock(&z2ram_mutex); return 0; err_out_kfree: kfree(z2ram_map); err_out: - unlock_kernel(); + mutex_unlock(&z2ram_mutex); return rc; } static int z2_release(struct gendisk *disk, fmode_t mode) { - lock_kernel(); + mutex_lock(&z2ram_mutex); if ( current_device == -1 ) { - unlock_kernel(); + mutex_unlock(&z2ram_mutex); return 0; } - unlock_kernel(); + mutex_unlock(&z2ram_mutex); /* * FIXME: unmap memory */ |