diff options
Diffstat (limited to 'drivers/block')
-rw-r--r-- | drivers/block/brd.c | 20 | ||||
-rw-r--r-- | drivers/block/cciss_scsi.c | 3 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_bitmap.c | 50 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_nl.c | 6 | ||||
-rw-r--r-- | drivers/block/floppy.c | 37 | ||||
-rw-r--r-- | drivers/block/hd.c | 1 | ||||
-rw-r--r-- | drivers/block/loop.c | 16 | ||||
-rw-r--r-- | drivers/block/mtip32xx/Kconfig | 2 | ||||
-rw-r--r-- | drivers/block/mtip32xx/mtip32xx.c | 860 | ||||
-rw-r--r-- | drivers/block/mtip32xx/mtip32xx.h | 58 | ||||
-rw-r--r-- | drivers/block/nbd.c | 296 | ||||
-rw-r--r-- | drivers/block/nvme.c | 1 | ||||
-rw-r--r-- | drivers/block/pktcdvd.c | 8 | ||||
-rw-r--r-- | drivers/block/rbd.c | 730 | ||||
-rw-r--r-- | drivers/block/rbd_types.h | 4 | ||||
-rw-r--r-- | drivers/block/sunvdc.c | 5 | ||||
-rw-r--r-- | drivers/block/viodasd.c | 809 | ||||
-rw-r--r-- | drivers/block/virtio_blk.c | 42 | ||||
-rw-r--r-- | drivers/block/xd.c | 1 | ||||
-rw-r--r-- | drivers/block/xen-blkback/blkback.c | 50 | ||||
-rw-r--r-- | drivers/block/xen-blkback/common.h | 6 | ||||
-rw-r--r-- | drivers/block/xen-blkback/xenbus.c | 89 | ||||
-rw-r--r-- | drivers/block/xen-blkfront.c | 44 |
23 files changed, 1457 insertions, 1681 deletions
diff --git a/drivers/block/brd.c b/drivers/block/brd.c index ec246437f5a4..531ceb31d0ff 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -242,9 +242,9 @@ static void copy_to_brd(struct brd_device *brd, const void *src, page = brd_lookup_page(brd, sector); BUG_ON(!page); - dst = kmap_atomic(page, KM_USER1); + dst = kmap_atomic(page); memcpy(dst + offset, src, copy); - kunmap_atomic(dst, KM_USER1); + kunmap_atomic(dst); if (copy < n) { src += copy; @@ -253,9 +253,9 @@ static void copy_to_brd(struct brd_device *brd, const void *src, page = brd_lookup_page(brd, sector); BUG_ON(!page); - dst = kmap_atomic(page, KM_USER1); + dst = kmap_atomic(page); memcpy(dst, src, copy); - kunmap_atomic(dst, KM_USER1); + kunmap_atomic(dst); } } @@ -273,9 +273,9 @@ static void copy_from_brd(void *dst, struct brd_device *brd, copy = min_t(size_t, n, PAGE_SIZE - offset); page = brd_lookup_page(brd, sector); if (page) { - src = kmap_atomic(page, KM_USER1); + src = kmap_atomic(page); memcpy(dst, src + offset, copy); - kunmap_atomic(src, KM_USER1); + kunmap_atomic(src); } else memset(dst, 0, copy); @@ -285,9 +285,9 @@ static void copy_from_brd(void *dst, struct brd_device *brd, copy = n - copy; page = brd_lookup_page(brd, sector); if (page) { - src = kmap_atomic(page, KM_USER1); + src = kmap_atomic(page); memcpy(dst, src, copy); - kunmap_atomic(src, KM_USER1); + kunmap_atomic(src); } else memset(dst, 0, copy); } @@ -309,7 +309,7 @@ static int brd_do_bvec(struct brd_device *brd, struct page *page, goto out; } - mem = kmap_atomic(page, KM_USER0); + mem = kmap_atomic(page); if (rw == READ) { copy_from_brd(mem + off, brd, sector, len); flush_dcache_page(page); @@ -317,7 +317,7 @@ static int brd_do_bvec(struct brd_device *brd, struct page *page, flush_dcache_page(page); copy_to_brd(brd, mem + off, sector, len); } - kunmap_atomic(mem, KM_USER0); + kunmap_atomic(mem); out: return err; diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c index e820b68d2f6c..acda773b3720 100644 --- a/drivers/block/cciss_scsi.c +++ b/drivers/block/cciss_scsi.c @@ -866,6 +866,7 @@ cciss_scsi_detect(ctlr_info_t *h) sh->can_queue = cciss_tape_cmds; sh->sg_tablesize = h->maxsgentries; sh->max_cmd_len = MAX_COMMAND_SIZE; + sh->max_sectors = h->cciss_max_sectors; ((struct cciss_scsi_adapter_data_t *) h->scsi_ctlr)->scsi_host = sh; @@ -1410,7 +1411,7 @@ static void cciss_scatter_gather(ctlr_info_t *h, CommandList_struct *c, /* track how many SG entries we are using */ if (request_nsgs > h->maxSG) h->maxSG = request_nsgs; - c->Header.SGTotal = (__u8) request_nsgs + chained; + c->Header.SGTotal = (u16) request_nsgs + chained; if (request_nsgs > h->max_cmd_sgentries) c->Header.SGList = h->max_cmd_sgentries; else diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 912f585a760f..3030201c69d8 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -289,25 +289,25 @@ static unsigned int bm_bit_to_page_idx(struct drbd_bitmap *b, u64 bitnr) return page_nr; } -static unsigned long *__bm_map_pidx(struct drbd_bitmap *b, unsigned int idx, const enum km_type km) +static unsigned long *__bm_map_pidx(struct drbd_bitmap *b, unsigned int idx) { struct page *page = b->bm_pages[idx]; - return (unsigned long *) kmap_atomic(page, km); + return (unsigned long *) kmap_atomic(page); } static unsigned long *bm_map_pidx(struct drbd_bitmap *b, unsigned int idx) { - return __bm_map_pidx(b, idx, KM_IRQ1); + return __bm_map_pidx(b, idx); } -static void __bm_unmap(unsigned long *p_addr, const enum km_type km) +static void __bm_unmap(unsigned long *p_addr) { - kunmap_atomic(p_addr, km); + kunmap_atomic(p_addr); }; static void bm_unmap(unsigned long *p_addr) { - return __bm_unmap(p_addr, KM_IRQ1); + return __bm_unmap(p_addr); } /* long word offset of _bitmap_ sector */ @@ -543,15 +543,15 @@ static unsigned long bm_count_bits(struct drbd_bitmap *b) /* all but last page */ for (idx = 0; idx < b->bm_number_of_pages - 1; idx++) { - p_addr = __bm_map_pidx(b, idx, KM_USER0); + p_addr = __bm_map_pidx(b, idx); for (i = 0; i < LWPP; i++) bits += hweight_long(p_addr[i]); - __bm_unmap(p_addr, KM_USER0); + __bm_unmap(p_addr); cond_resched(); } /* last (or only) page */ last_word = ((b->bm_bits - 1) & BITS_PER_PAGE_MASK) >> LN2_BPL; - p_addr = __bm_map_pidx(b, idx, KM_USER0); + p_addr = __bm_map_pidx(b, idx); for (i = 0; i < last_word; i++) bits += hweight_long(p_addr[i]); p_addr[last_word] &= cpu_to_lel(mask); @@ -559,7 +559,7 @@ static unsigned long bm_count_bits(struct drbd_bitmap *b) /* 32bit arch, may have an unused padding long */ if (BITS_PER_LONG == 32 && (last_word & 1) == 0) p_addr[last_word+1] = 0; - __bm_unmap(p_addr, KM_USER0); + __bm_unmap(p_addr); return bits; } @@ -970,11 +970,11 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must * to use pre-allocated page pool */ void *src, *dest; page = alloc_page(__GFP_HIGHMEM|__GFP_WAIT); - dest = kmap_atomic(page, KM_USER0); - src = kmap_atomic(b->bm_pages[page_nr], KM_USER1); + dest = kmap_atomic(page); + src = kmap_atomic(b->bm_pages[page_nr]); memcpy(dest, src, PAGE_SIZE); - kunmap_atomic(src, KM_USER1); - kunmap_atomic(dest, KM_USER0); + kunmap_atomic(src); + kunmap_atomic(dest); bm_store_page_idx(page, page_nr); } else page = b->bm_pages[page_nr]; @@ -1163,7 +1163,7 @@ int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(loc * this returns a bit number, NOT a sector! */ static unsigned long __bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo, - const int find_zero_bit, const enum km_type km) + const int find_zero_bit) { struct drbd_bitmap *b = mdev->bitmap; unsigned long *p_addr; @@ -1178,7 +1178,7 @@ static unsigned long __bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo, while (bm_fo < b->bm_bits) { /* bit offset of the first bit in the page */ bit_offset = bm_fo & ~BITS_PER_PAGE_MASK; - p_addr = __bm_map_pidx(b, bm_bit_to_page_idx(b, bm_fo), km); + p_addr = __bm_map_pidx(b, bm_bit_to_page_idx(b, bm_fo)); if (find_zero_bit) i = find_next_zero_bit_le(p_addr, @@ -1187,7 +1187,7 @@ static unsigned long __bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo, i = find_next_bit_le(p_addr, PAGE_SIZE*8, bm_fo & BITS_PER_PAGE_MASK); - __bm_unmap(p_addr, km); + __bm_unmap(p_addr); if (i < PAGE_SIZE*8) { bm_fo = bit_offset + i; if (bm_fo >= b->bm_bits) @@ -1215,7 +1215,7 @@ static unsigned long bm_find_next(struct drbd_conf *mdev, if (BM_DONT_TEST & b->bm_flags) bm_print_lock_info(mdev); - i = __bm_find_next(mdev, bm_fo, find_zero_bit, KM_IRQ1); + i = __bm_find_next(mdev, bm_fo, find_zero_bit); spin_unlock_irq(&b->bm_lock); return i; @@ -1239,13 +1239,13 @@ unsigned long drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo) { /* WARN_ON(!(BM_DONT_SET & mdev->b->bm_flags)); */ - return __bm_find_next(mdev, bm_fo, 0, KM_USER1); + return __bm_find_next(mdev, bm_fo, 0); } unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo) { /* WARN_ON(!(BM_DONT_SET & mdev->b->bm_flags)); */ - return __bm_find_next(mdev, bm_fo, 1, KM_USER1); + return __bm_find_next(mdev, bm_fo, 1); } /* returns number of bits actually changed. @@ -1273,14 +1273,14 @@ static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, unsigned int page_nr = bm_bit_to_page_idx(b, bitnr); if (page_nr != last_page_nr) { if (p_addr) - __bm_unmap(p_addr, KM_IRQ1); + __bm_unmap(p_addr); if (c < 0) bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]); else if (c > 0) bm_set_page_need_writeout(b->bm_pages[last_page_nr]); changed_total += c; c = 0; - p_addr = __bm_map_pidx(b, page_nr, KM_IRQ1); + p_addr = __bm_map_pidx(b, page_nr); last_page_nr = page_nr; } if (val) @@ -1289,7 +1289,7 @@ static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, c -= (0 != __test_and_clear_bit_le(bitnr & BITS_PER_PAGE_MASK, p_addr)); } if (p_addr) - __bm_unmap(p_addr, KM_IRQ1); + __bm_unmap(p_addr); if (c < 0) bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]); else if (c > 0) @@ -1342,13 +1342,13 @@ static inline void bm_set_full_words_within_one_page(struct drbd_bitmap *b, { int i; int bits; - unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr], KM_IRQ1); + unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr]); for (i = first_word; i < last_word; i++) { bits = hweight_long(paddr[i]); paddr[i] = ~0UL; b->bm_set += BITS_PER_LONG - bits; } - kunmap_atomic(paddr, KM_IRQ1); + kunmap_atomic(paddr); } /* Same thing as drbd_bm_set_bits, diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index af2a25049bce..abfaacaaf346 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -179,7 +179,7 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd) dev_info(DEV, "helper command: %s %s %s\n", usermode_helper, cmd, mb); drbd_bcast_ev_helper(mdev, cmd); - ret = call_usermodehelper(usermode_helper, argv, envp, 1); + ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC); if (ret) dev_warn(DEV, "helper command: %s %s %s exit code %u (0x%x)\n", usermode_helper, cmd, mb, @@ -2526,10 +2526,10 @@ void drbd_bcast_ee(struct drbd_conf *mdev, page = e->pages; page_chain_for_each(page) { - void *d = kmap_atomic(page, KM_USER0); + void *d = kmap_atomic(page); unsigned l = min_t(unsigned, len, PAGE_SIZE); memcpy(tl, d, l); - kunmap_atomic(d, KM_USER0); + kunmap_atomic(d); tl = (unsigned short*)((char*)tl + l); len -= l; if (len == 0) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 744f078f4dd8..b0b00d70c166 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -202,7 +202,6 @@ static int slow_floppy; #include <asm/dma.h> #include <asm/irq.h> -#include <asm/system.h> static int FLOPPY_IRQ = 6; static int FLOPPY_DMA = 2; @@ -1031,37 +1030,6 @@ static int fd_wait_for_completion(unsigned long delay, timeout_fn function) return 0; } -static DEFINE_SPINLOCK(floppy_hlt_lock); -static int hlt_disabled; -static void floppy_disable_hlt(void) -{ - unsigned long flags; - - WARN_ONCE(1, "floppy_disable_hlt() scheduled for removal in 2012"); - spin_lock_irqsave(&floppy_hlt_lock, flags); - if (!hlt_disabled) { - hlt_disabled = 1; -#ifdef HAVE_DISABLE_HLT - disable_hlt(); -#endif - } - spin_unlock_irqrestore(&floppy_hlt_lock, flags); -} - -static void floppy_enable_hlt(void) -{ - unsigned long flags; - - spin_lock_irqsave(&floppy_hlt_lock, flags); - if (hlt_disabled) { - hlt_disabled = 0; -#ifdef HAVE_DISABLE_HLT - enable_hlt(); -#endif - } - spin_unlock_irqrestore(&floppy_hlt_lock, flags); -} - static void setup_DMA(void) { unsigned long f; @@ -1106,7 +1074,6 @@ static void setup_DMA(void) fd_enable_dma(); release_dma_lock(f); #endif - floppy_disable_hlt(); } static void show_floppy(void); @@ -1708,7 +1675,6 @@ irqreturn_t floppy_interrupt(int irq, void *dev_id) fd_disable_dma(); release_dma_lock(f); - floppy_enable_hlt(); do_floppy = NULL; if (fdc >= N_FDC || FDCS->address == -1) { /* we don't even know which FDC is the culprit */ @@ -1857,8 +1823,6 @@ static void floppy_shutdown(unsigned long data) show_floppy(); cancel_activity(); - floppy_enable_hlt(); - flags = claim_dma_lock(); fd_disable_dma(); release_dma_lock(flags); @@ -4509,7 +4473,6 @@ static void floppy_release_irq_and_dma(void) #if N_FDC > 1 set_dor(1, ~8, 0); #endif - floppy_enable_hlt(); if (floppy_track_buffer && max_buffer_sectors) { tmpsize = max_buffer_sectors * 1024; diff --git a/drivers/block/hd.c b/drivers/block/hd.c index b52c9ca146fc..bf397bf108b7 100644 --- a/drivers/block/hd.c +++ b/drivers/block/hd.c @@ -44,7 +44,6 @@ #define HD_IRQ 14 #define REALLY_SLOW_IO -#include <asm/system.h> #include <asm/io.h> #include <asm/uaccess.h> diff --git a/drivers/block/loop.c b/drivers/block/loop.c index cd504353b278..bbca966f8f66 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -93,16 +93,16 @@ static int transfer_none(struct loop_device *lo, int cmd, struct page *loop_page, unsigned loop_off, int size, sector_t real_block) { - char *raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off; - char *loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off; + char *raw_buf = kmap_atomic(raw_page) + raw_off; + char *loop_buf = kmap_atomic(loop_page) + loop_off; if (cmd == READ) memcpy(loop_buf, raw_buf, size); else memcpy(raw_buf, loop_buf, size); - kunmap_atomic(loop_buf, KM_USER1); - kunmap_atomic(raw_buf, KM_USER0); + kunmap_atomic(loop_buf); + kunmap_atomic(raw_buf); cond_resched(); return 0; } @@ -112,8 +112,8 @@ static int transfer_xor(struct loop_device *lo, int cmd, struct page *loop_page, unsigned loop_off, int size, sector_t real_block) { - char *raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off; - char *loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off; + char *raw_buf = kmap_atomic(raw_page) + raw_off; + char *loop_buf = kmap_atomic(loop_page) + loop_off; char *in, *out, *key; int i, keysize; @@ -130,8 +130,8 @@ static int transfer_xor(struct loop_device *lo, int cmd, for (i = 0; i < size; i++) *out++ = *in++ ^ key[(i & 511) % keysize]; - kunmap_atomic(loop_buf, KM_USER1); - kunmap_atomic(raw_buf, KM_USER0); + kunmap_atomic(loop_buf); + kunmap_atomic(raw_buf); cond_resched(); return 0; } diff --git a/drivers/block/mtip32xx/Kconfig b/drivers/block/mtip32xx/Kconfig index b5dd14e072f2..0ba837fc62a8 100644 --- a/drivers/block/mtip32xx/Kconfig +++ b/drivers/block/mtip32xx/Kconfig @@ -4,6 +4,6 @@ config BLK_DEV_PCIESSD_MTIP32XX tristate "Block Device Driver for Micron PCIe SSDs" - depends on HOTPLUG_PCI_PCIE + depends on PCI help This enables the block driver for Micron PCIe SSDs. diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 8eb81c96608f..00f9fc992090 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -36,6 +36,7 @@ #include <linux/idr.h> #include <linux/kthread.h> #include <../drivers/ata/ahci.h> +#include <linux/export.h> #include "mtip32xx.h" #define HW_CMD_SLOT_SZ (MTIP_MAX_COMMAND_SLOTS * 32) @@ -44,6 +45,7 @@ #define HW_PORT_PRIV_DMA_SZ \ (HW_CMD_SLOT_SZ + HW_CMD_TBL_AR_SZ + AHCI_RX_FIS_SZ) +#define HOST_CAP_NZDMA (1 << 19) #define HOST_HSORG 0xFC #define HSORG_DISABLE_SLOTGRP_INTR (1<<24) #define HSORG_DISABLE_SLOTGRP_PXIS (1<<16) @@ -139,6 +141,12 @@ static void mtip_command_cleanup(struct driver_data *dd) int group = 0, commandslot = 0, commandindex = 0; struct mtip_cmd *command; struct mtip_port *port = dd->port; + static int in_progress; + + if (in_progress) + return; + + in_progress = 1; for (group = 0; group < 4; group++) { for (commandslot = 0; commandslot < 32; commandslot++) { @@ -165,7 +173,8 @@ static void mtip_command_cleanup(struct driver_data *dd) up(&port->cmd_slot); - atomic_set(&dd->drv_cleanup_done, true); + set_bit(MTIP_DDF_CLEANUP_BIT, &dd->dd_flag); + in_progress = 0; } /* @@ -262,6 +271,9 @@ static int hba_reset_nosleep(struct driver_data *dd) && time_before(jiffies, timeout)) mdelay(1); + if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag)) + return -1; + if (readl(dd->mmio + HOST_CTL) & HOST_RESET) return -1; @@ -294,6 +306,10 @@ static inline void mtip_issue_ncq_command(struct mtip_port *port, int tag) port->cmd_issue[MTIP_TAG_INDEX(tag)]); spin_unlock_irqrestore(&port->cmd_issue_lock, flags); + + /* Set the command's timeout value.*/ + port->commands[tag].comp_time = jiffies + msecs_to_jiffies( + MTIP_NCQ_COMMAND_TIMEOUT_MS); } /* @@ -420,7 +436,12 @@ static void mtip_init_port(struct mtip_port *port) writel(0xFFFFFFFF, port->completed[i]); /* Clear any pending interrupts for this port */ - writel(readl(port->mmio + PORT_IRQ_STAT), port->mmio + PORT_IRQ_STAT); + writel(readl(port->dd->mmio + PORT_IRQ_STAT), + port->dd->mmio + PORT_IRQ_STAT); + + /* Clear any pending interrupts on the HBA. */ + writel(readl(port->dd->mmio + HOST_IRQ_STAT), + port->dd->mmio + HOST_IRQ_STAT); /* Enable port interrupts */ writel(DEF_PORT_IRQ, port->mmio + PORT_IRQ_MASK); @@ -447,6 +468,9 @@ static void mtip_restart_port(struct mtip_port *port) && time_before(jiffies, timeout)) ; + if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &port->dd->dd_flag)) + return; + /* * Chip quirk: escalate to hba reset if * PxCMD.CR not clear after 500 ms @@ -475,6 +499,9 @@ static void mtip_restart_port(struct mtip_port *port) while (time_before(jiffies, timeout)) ; + if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &port->dd->dd_flag)) + return; + /* Clear PxSCTL.DET */ writel(readl(port->mmio + PORT_SCR_CTL) & ~1, port->mmio + PORT_SCR_CTL); @@ -486,15 +513,35 @@ static void mtip_restart_port(struct mtip_port *port) && time_before(jiffies, timeout)) ; + if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &port->dd->dd_flag)) + return; + if ((readl(port->mmio + PORT_SCR_STAT) & 0x01) == 0) dev_warn(&port->dd->pdev->dev, "COM reset failed\n"); - /* Clear SError, the PxSERR.DIAG.x should be set so clear it */ - writel(readl(port->mmio + PORT_SCR_ERR), port->mmio + PORT_SCR_ERR); + mtip_init_port(port); + mtip_start_port(port); - /* Enable the DMA engine */ - mtip_enable_engine(port, 1); +} + +/* + * Helper function for tag logging + */ +static void print_tags(struct driver_data *dd, + char *msg, + unsigned long *tagbits, + int cnt) +{ + unsigned char tagmap[128]; + int group, tagmap_len = 0; + + memset(tagmap, 0, sizeof(tagmap)); + for (group = SLOTBITS_IN_LONGS; group > 0; group--) + tagmap_len = sprintf(tagmap + tagmap_len, "%016lX ", + tagbits[group-1]); + dev_warn(&dd->pdev->dev, + "%d command(s) %s: tagmap [%s]", cnt, msg, tagmap); } /* @@ -514,15 +561,18 @@ static void mtip_timeout_function(unsigned long int data) int tag, cmdto_cnt = 0; unsigned int bit, group; unsigned int num_command_slots = port->dd->slot_groups * 32; + unsigned long to, tagaccum[SLOTBITS_IN_LONGS]; if (unlikely(!port)) return; - if (atomic_read(&port->dd->resumeflag) == true) { + if (test_bit(MTIP_DDF_RESUME_BIT, &port->dd->dd_flag)) { mod_timer(&port->cmd_timer, jiffies + msecs_to_jiffies(30000)); return; } + /* clear the tag accumulator */ + memset(tagaccum, 0, SLOTBITS_IN_LONGS * sizeof(long)); for (tag = 0; tag < num_command_slots; tag++) { /* @@ -540,12 +590,10 @@ static void mtip_timeout_function(unsigned long int data) command = &port->commands[tag]; fis = (struct host_to_dev_fis *) command->command; - dev_warn(&port->dd->pdev->dev, - "Timeout for command tag %d\n", tag); - + set_bit(tag, tagaccum); cmdto_cnt++; if (cmdto_cnt == 1) - set_bit(MTIP_FLAG_EH_ACTIVE_BIT, &port->flags); + set_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags); /* * Clear the completed bit. This should prevent @@ -578,15 +626,29 @@ static void mtip_timeout_function(unsigned long int data) } } - if (cmdto_cnt) { - dev_warn(&port->dd->pdev->dev, - "%d commands timed out: restarting port", - cmdto_cnt); + if (cmdto_cnt && !test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) { + print_tags(port->dd, "timed out", tagaccum, cmdto_cnt); + mtip_restart_port(port); - clear_bit(MTIP_FLAG_EH_ACTIVE_BIT, &port->flags); + clear_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags); wake_up_interruptible(&port->svc_wait); } + if (port->ic_pause_timer) { + to = port->ic_pause_timer + msecs_to_jiffies(1000); + if (time_after(jiffies, to)) { + if (!test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) { + port->ic_pause_timer = 0; + clear_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags); + clear_bit(MTIP_PF_DM_ACTIVE_BIT, &port->flags); + clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags); + wake_up_interruptible(&port->svc_wait); + } + + + } + } + /* Restart the timer */ mod_timer(&port->cmd_timer, jiffies + msecs_to_jiffies(MTIP_TIMEOUT_CHECK_PERIOD)); @@ -681,23 +743,18 @@ static void mtip_completion(struct mtip_port *port, complete(waiting); } -/* - * Helper function for tag logging - */ -static void print_tags(struct driver_data *dd, - char *msg, - unsigned long *tagbits) +static void mtip_null_completion(struct mtip_port *port, + int tag, + void *data, + int status) { - unsigned int tag, count = 0; - - for (tag = 0; tag < (dd->slot_groups) * 32; tag++) { - if (test_bit(tag, tagbits)) - count++; - } - if (count) - dev_info(&dd->pdev->dev, "%s [%i tags]\n", msg, count); + return; } +static int mtip_read_log_page(struct mtip_port *port, u8 page, u16 *buffer, + dma_addr_t buffer_dma, unsigned int sectors); +static int mtip_get_smart_attr(struct mtip_port *port, unsigned int id, + struct smart_attr *attrib); /* * Handle an error. * @@ -708,12 +765,16 @@ static void print_tags(struct driver_data *dd, */ static void mtip_handle_tfe(struct driver_data *dd) { - int group, tag, bit, reissue; + int group, tag, bit, reissue, rv; struct mtip_port *port; - struct mtip_cmd *command; + struct mtip_cmd *cmd; u32 completed; struct host_to_dev_fis *fis; unsigned long tagaccum[SLOTBITS_IN_LONGS]; + unsigned int cmd_cnt = 0; + unsigned char *buf; + char *fail_reason = NULL; + int fail_all_ncq_write = 0, fail_all_ncq_cmds = 0; dev_warn(&dd->pdev->dev, "Taskfile error\n"); @@ -722,8 +783,11 @@ static void mtip_handle_tfe(struct driver_data *dd) /* Stop the timer to prevent command timeouts. */ del_timer(&port->cmd_timer); + /* clear the tag accumulator */ + memset(tagaccum, 0, SLOTBITS_IN_LONGS * sizeof(long)); + /* Set eh_active */ - set_bit(MTIP_FLAG_EH_ACTIVE_BIT, &port->flags); + set_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags); /* Loop through all the groups */ for (group = 0; group < dd->slot_groups; group++) { @@ -732,9 +796,6 @@ static void mtip_handle_tfe(struct driver_data *dd) /* clear completed status register in the hardware.*/ writel(completed, port->completed[group]); - /* clear the tag accumulator */ - memset(tagaccum, 0, SLOTBITS_IN_LONGS * sizeof(long)); - /* Process successfully completed commands */ for (bit = 0; bit < 32 && completed; bit++) { if (!(completed & (1<<bit))) @@ -745,13 +806,14 @@ static void mtip_handle_tfe(struct driver_data *dd) if (tag == MTIP_TAG_INTERNAL) continue; - command = &port->commands[tag]; - if (likely(command->comp_func)) { + cmd = &port->commands[tag]; + if (likely(cmd->comp_func)) { set_bit(tag, tagaccum); - atomic_set(&port->commands[tag].active, 0); - command->comp_func(port, + cmd_cnt++; + atomic_set(&cmd->active, 0); + cmd->comp_func(port, tag, - command->comp_data, + cmd->comp_data, 0); } else { dev_err(&port->dd->pdev->dev, @@ -765,12 +827,45 @@ static void mtip_handle_tfe(struct driver_data *dd) } } } - print_tags(dd, "TFE tags completed:", tagaccum); + + print_tags(dd, "completed (TFE)", tagaccum, cmd_cnt); /* Restart the port */ mdelay(20); mtip_restart_port(port); + /* Trying to determine the cause of the error */ + rv = mtip_read_log_page(dd->port, ATA_LOG_SATA_NCQ, + dd->port->log_buf, + dd->port->log_buf_dma, 1); + if (rv) { + dev_warn(&dd->pdev->dev, + "Error in READ LOG EXT (10h) command\n"); + /* non-critical error, don't fail the load */ + } else { + buf = (unsigned char *)dd->port->log_buf; + if (buf[259] & 0x1) { + dev_info(&dd->pdev->dev, + "Write protect bit is set.\n"); + set_bit(MTIP_DDF_WRITE_PROTECT_BIT, &dd->dd_flag); + fail_all_ncq_write = 1; + fail_reason = "write protect"; + } + if (buf[288] == 0xF7) { + dev_info(&dd->pdev->dev, + "Exceeded Tmax, drive in thermal shutdown.\n"); + set_bit(MTIP_DDF_OVER_TEMP_BIT, &dd->dd_flag); + fail_all_ncq_cmds = 1; + fail_reason = "thermal shutdown"; + } + if (buf[288] == 0xBF) { + dev_info(&dd->pdev->dev, + "Drive indicates rebuild has failed.\n"); + fail_all_ncq_cmds = 1; + fail_reason = "rebuild failed"; + } + } + /* clear the tag accumulator */ memset(tagaccum, 0, SLOTBITS_IN_LONGS * sizeof(long)); @@ -779,32 +874,47 @@ static void mtip_handle_tfe(struct driver_data *dd) for (bit = 0; bit < 32; bit++) { reissue = 1; tag = (group << 5) + bit; + cmd = &port->commands[tag]; /* If the active bit is set re-issue the command */ - if (atomic_read(&port->commands[tag].active) == 0) + if (atomic_read(&cmd->active) == 0) continue; - fis = (struct host_to_dev_fis *) - port->commands[tag].command; + fis = (struct host_to_dev_fis *)cmd->command; /* Should re-issue? */ if (tag == MTIP_TAG_INTERNAL || fis->command == ATA_CMD_SET_FEATURES) reissue = 0; + else { + if (fail_all_ncq_cmds || + (fail_all_ncq_write && + fis->command == ATA_CMD_FPDMA_WRITE)) { + dev_warn(&dd->pdev->dev, + " Fail: %s w/tag %d [%s].\n", + fis->command == ATA_CMD_FPDMA_WRITE ? + "write" : "read", + tag, + fail_reason != NULL ? + fail_reason : "unknown"); + atomic_set(&cmd->active, 0); + if (cmd->comp_func) { + cmd->comp_func(port, tag, + cmd->comp_data, + -ENODATA); + } + continue; + } + } /* * First check if this command has * exceeded its retries. */ - if (reissue && - (port->commands[tag].retries-- > 0)) { + if (reissue && (cmd->retries-- > 0)) { set_bit(tag, tagaccum); - /* Update the timeout value. */ - port->commands[tag].comp_time = - jiffies + msecs_to_jiffies( - MTIP_NCQ_COMMAND_TIMEOUT_MS); /* Re-issue the command. */ mtip_issue_ncq_command(port, tag); @@ -814,13 +924,13 @@ static void mtip_handle_tfe(struct driver_data *dd) /* Retire a command that will not be reissued */ dev_warn(&port->dd->pdev->dev, "retiring tag %d\n", tag); - atomic_set(&port->commands[tag].active, 0); + atomic_set(&cmd->active, 0); - if (port->commands[tag].comp_func) - port->commands[tag].comp_func( + if (cmd->comp_func) + cmd->comp_func( port, tag, - port->commands[tag].comp_data, + cmd->comp_data, PORT_IRQ_TF_ERR); else dev_warn(&port->dd->pdev->dev, @@ -828,10 +938,10 @@ static void mtip_handle_tfe(struct driver_data *dd) tag); } } - print_tags(dd, "TFE tags reissued:", tagaccum); + print_tags(dd, "reissued (TFE)", tagaccum, cmd_cnt); /* clear eh_active */ - clear_bit(MTIP_FLAG_EH_ACTIVE_BIT, &port->flags); + clear_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags); wake_up_interruptible(&port->svc_wait); mod_timer(&port->cmd_timer, @@ -899,7 +1009,7 @@ static inline void mtip_process_legacy(struct driver_data *dd, u32 port_stat) struct mtip_port *port = dd->port; struct mtip_cmd *cmd = &port->commands[MTIP_TAG_INTERNAL]; - if (test_bit(MTIP_FLAG_IC_ACTIVE_BIT, &port->flags) && + if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags) && (cmd != NULL) && !(readl(port->cmd_issue[MTIP_TAG_INTERNAL]) & (1 << MTIP_TAG_INTERNAL))) { if (cmd->comp_func) { @@ -911,8 +1021,6 @@ static inline void mtip_process_legacy(struct driver_data *dd, u32 port_stat) } } - dev_warn(&dd->pdev->dev, "IRQ status 0x%x ignored.\n", port_stat); - return; } @@ -968,6 +1076,9 @@ static inline irqreturn_t mtip_handle_irq(struct driver_data *data) /* don't proceed further */ return IRQ_HANDLED; } + if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, + &dd->dd_flag)) + return rv; mtip_process_errors(dd, port_stat & PORT_IRQ_ERR); } @@ -1015,6 +1126,39 @@ static void mtip_issue_non_ncq_command(struct mtip_port *port, int tag) port->cmd_issue[MTIP_TAG_INDEX(tag)]); } +static bool mtip_pause_ncq(struct mtip_port *port, + struct host_to_dev_fis *fis) +{ + struct host_to_dev_fis *reply; + unsigned long task_file_data; + + reply = port->rxfis + RX_FIS_D2H_REG; + task_file_data = readl(port->mmio+PORT_TFDATA); + + if ((task_file_data & 1) || (fis->command == ATA_CMD_SEC_ERASE_UNIT)) + return false; + + if (fis->command == ATA_CMD_SEC_ERASE_PREP) { + set_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags); + port->ic_pause_timer = jiffies; + return true; + } else if ((fis->command == ATA_CMD_DOWNLOAD_MICRO) && + (fis->features == 0x03)) { + set_bit(MTIP_PF_DM_ACTIVE_BIT, &port->flags); + port->ic_pause_timer = jiffies; + return true; + } else if ((fis->command == ATA_CMD_SEC_ERASE_UNIT) || + ((fis->command == 0xFC) && + (fis->features == 0x27 || fis->features == 0x72 || + fis->features == 0x62 || fis->features == 0x26))) { + /* Com reset after secure erase or lowlevel format */ + mtip_restart_port(port); + return false; + } + + return false; +} + /* * Wait for port to quiesce * @@ -1033,11 +1177,13 @@ static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout) to = jiffies + msecs_to_jiffies(timeout); do { - if (test_bit(MTIP_FLAG_SVC_THD_ACTIVE_BIT, &port->flags) && - test_bit(MTIP_FLAG_ISSUE_CMDS_BIT, &port->flags)) { + if (test_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags) && + test_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags)) { msleep(20); continue; /* svc thd is actively issuing commands */ } + if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &port->dd->dd_flag)) + return -EFAULT; /* * Ignore s_active bit 0 of array element 0. * This bit will always be set @@ -1074,7 +1220,7 @@ static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout) * -EAGAIN Time out waiting for command to complete. */ static int mtip_exec_internal_command(struct mtip_port *port, - void *fis, + struct host_to_dev_fis *fis, int fis_len, dma_addr_t buffer, int buf_len, @@ -1084,8 +1230,9 @@ static int mtip_exec_internal_command(struct mtip_port *port, { struct mtip_cmd_sg *command_sg; DECLARE_COMPLETION_ONSTACK(wait); - int rv = 0; + int rv = 0, ready2go = 1; struct mtip_cmd *int_cmd = &port->commands[MTIP_TAG_INTERNAL]; + unsigned long to; /* Make sure the buffer is 8 byte aligned. This is asic specific. */ if (buffer & 0x00000007) { @@ -1094,23 +1241,38 @@ static int mtip_exec_internal_command(struct mtip_port *port, return -EFAULT; } - /* Only one internal command should be running at a time */ - if (test_and_set_bit(MTIP_TAG_INTERNAL, port->allocated)) { + to = jiffies + msecs_to_jiffies(timeout); + do { + ready2go = !test_and_set_bit(MTIP_TAG_INTERNAL, + port->allocated); + if (ready2go) + break; + mdelay(100); + } while (time_before(jiffies, to)); + if (!ready2go) { dev_warn(&port->dd->pdev->dev, - "Internal command already active\n"); + "Internal cmd active. new cmd [%02X]\n", fis->command); return -EBUSY; } - set_bit(MTIP_FLAG_IC_ACTIVE_BIT, &port->flags); + set_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags); + port->ic_pause_timer = 0; + + if (fis->command == ATA_CMD_SEC_ERASE_UNIT) + clear_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags); + else if (fis->command == ATA_CMD_DOWNLOAD_MICRO) + clear_bit(MTIP_PF_DM_ACTIVE_BIT, &port->flags); if (atomic == GFP_KERNEL) { - /* wait for io to complete if non atomic */ - if (mtip_quiesce_io(port, 5000) < 0) { - dev_warn(&port->dd->pdev->dev, - "Failed to quiesce IO\n"); - release_slot(port, MTIP_TAG_INTERNAL); - clear_bit(MTIP_FLAG_IC_ACTIVE_BIT, &port->flags); - wake_up_interruptible(&port->svc_wait); - return -EBUSY; + if (fis->command != ATA_CMD_STANDBYNOW1) { + /* wait for io to complete if non atomic */ + if (mtip_quiesce_io(port, 5000) < 0) { + dev_warn(&port->dd->pdev->dev, + "Failed to quiesce IO\n"); + release_slot(port, MTIP_TAG_INTERNAL); + clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags); + wake_up_interruptible(&port->svc_wait); + return -EBUSY; + } } /* Set the completion function and data for the command. */ @@ -1120,7 +1282,7 @@ static int mtip_exec_internal_command(struct mtip_port *port, } else { /* Clear completion - we're going to poll */ int_cmd->comp_data = NULL; - int_cmd->comp_func = NULL; + int_cmd->comp_func = mtip_null_completion; } /* Copy the command to the command table */ @@ -1159,6 +1321,12 @@ static int mtip_exec_internal_command(struct mtip_port *port, "Internal command did not complete [%d] " "within timeout of %lu ms\n", atomic, timeout); + if (mtip_check_surprise_removal(port->dd->pdev) || + test_bit(MTIP_DDF_REMOVE_PENDING_BIT, + &port->dd->dd_flag)) { + rv = -ENXIO; + goto exec_ic_exit; + } rv = -EAGAIN; } @@ -1166,31 +1334,59 @@ static int mtip_exec_internal_command(struct mtip_port *port, & (1 << MTIP_TAG_INTERNAL)) { dev_warn(&port->dd->pdev->dev, "Retiring internal command but CI is 1.\n"); + if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, + &port->dd->dd_flag)) { + hba_reset_nosleep(port->dd); + rv = -ENXIO; + } else { + mtip_restart_port(port); + rv = -EAGAIN; + } + goto exec_ic_exit; } } else { /* Spin for <timeout> checking if command still outstanding */ timeout = jiffies + msecs_to_jiffies(timeout); - - while ((readl( - port->cmd_issue[MTIP_TAG_INTERNAL]) - & (1 << MTIP_TAG_INTERNAL)) - && time_before(jiffies, timeout)) - ; + while ((readl(port->cmd_issue[MTIP_TAG_INTERNAL]) + & (1 << MTIP_TAG_INTERNAL)) + && time_before(jiffies, timeout)) { + if (mtip_check_surprise_removal(port->dd->pdev)) { + rv = -ENXIO; + goto exec_ic_exit; + } + if ((fis->command != ATA_CMD_STANDBYNOW1) && + test_bit(MTIP_DDF_REMOVE_PENDING_BIT, + &port->dd->dd_flag)) { + rv = -ENXIO; + goto exec_ic_exit; + } + } if (readl(port->cmd_issue[MTIP_TAG_INTERNAL]) & (1 << MTIP_TAG_INTERNAL)) { dev_err(&port->dd->pdev->dev, - "Internal command did not complete [%d]\n", - atomic); + "Internal command did not complete [atomic]\n"); rv = -EAGAIN; + if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, + &port->dd->dd_flag)) { + hba_reset_nosleep(port->dd); + rv = -ENXIO; + } else { + mtip_restart_port(port); + rv = -EAGAIN; + } } } - +exec_ic_exit: /* Clear the allocated and active bits for the internal command. */ atomic_set(&int_cmd->active, 0); release_slot(port, MTIP_TAG_INTERNAL); - clear_bit(MTIP_FLAG_IC_ACTIVE_BIT, &port->flags); + if (rv >= 0 && mtip_pause_ncq(port, fis)) { + /* NCQ paused */ + return rv; + } + clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags); wake_up_interruptible(&port->svc_wait); return rv; @@ -1240,6 +1436,9 @@ static int mtip_get_identify(struct mtip_port *port, void __user *user_buffer) int rv = 0; struct host_to_dev_fis fis; + if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &port->dd->dd_flag)) + return -EFAULT; + /* Build the FIS. */ memset(&fis, 0, sizeof(struct host_to_dev_fis)); fis.type = 0x27; @@ -1313,6 +1512,7 @@ static int mtip_standby_immediate(struct mtip_port *port) { int rv; struct host_to_dev_fis fis; + unsigned long start; /* Build the FIS. */ memset(&fis, 0, sizeof(struct host_to_dev_fis)); @@ -1320,15 +1520,150 @@ static int mtip_standby_immediate(struct mtip_port *port) fis.opts = 1 << 7; fis.command = ATA_CMD_STANDBYNOW1; - /* Execute the command. Use a 15-second timeout for large drives. */ + start = jiffies; rv = mtip_exec_internal_command(port, &fis, 5, 0, 0, 0, - GFP_KERNEL, + GFP_ATOMIC, 15000); + dbg_printk(MTIP_DRV_NAME "Time taken to complete standby cmd: %d ms\n", + jiffies_to_msecs(jiffies - start)); + if (rv) + dev_warn(&port->dd->pdev->dev, + "STANDBY IMMEDIATE command failed.\n"); + + return rv; +} + +/* + * Issue a READ LOG EXT command to the device. + * + * @port pointer to the port structure. + * @page page number to fetch + * @buffer pointer to buffer + * @buffer_dma dma address corresponding to @buffer + * @sectors page length to fetch, in sectors + * + * return value + * @rv return value from mtip_exec_internal_command() + */ +static int mtip_read_log_page(struct mtip_port *port, u8 page, u16 *buffer, + dma_addr_t buffer_dma, unsigned int sectors) +{ + struct host_to_dev_fis fis; + + memset(&fis, 0, sizeof(struct host_to_dev_fis)); + fis.type = 0x27; + fis.opts = 1 << 7; + fis.command = ATA_CMD_READ_LOG_EXT; + fis.sect_count = sectors & 0xFF; + fis.sect_cnt_ex = (sectors >> 8) & 0xFF; + fis.lba_low = page; + fis.lba_mid = 0; + fis.device = ATA_DEVICE_OBS; + + memset(buffer, 0, sectors * ATA_SECT_SIZE); + + return mtip_exec_internal_command(port, + &fis, + 5, + buffer_dma, + sectors * ATA_SECT_SIZE, + 0, + GFP_ATOMIC, + MTIP_INTERNAL_COMMAND_TIMEOUT_MS); +} + +/* + * Issue a SMART READ DATA command to the device. + * + * @port pointer to the port structure. + * @buffer pointer to buffer + * @buffer_dma dma address corresponding to @buffer + * + * return value + * @rv return value from mtip_exec_internal_command() + */ +static int mtip_get_smart_data(struct mtip_port *port, u8 *buffer, + dma_addr_t buffer_dma) +{ + struct host_to_dev_fis fis; + + memset(&fis, 0, sizeof(struct host_to_dev_fis)); + fis.type = 0x27; + fis.opts = 1 << 7; + fis.command = ATA_CMD_SMART; + fis.features = 0xD0; + fis.sect_count = 1; + fis.lba_mid = 0x4F; + fis.lba_hi = 0xC2; + fis.device = ATA_DEVICE_OBS; + + return mtip_exec_internal_command(port, + &fis, + 5, + buffer_dma, + ATA_SECT_SIZE, + 0, + GFP_ATOMIC, + 15000); +} + +/* + * Get the value of a smart attribute + * + * @port pointer to the port structure + * @id attribute number + * @attrib pointer to return attrib information corresponding to @id + * + * return value + * -EINVAL NULL buffer passed or unsupported attribute @id. + * -EPERM Identify data not valid, SMART not supported or not enabled + */ +static int mtip_get_smart_attr(struct mtip_port *port, unsigned int id, + struct smart_attr *attrib) +{ + int rv, i; + struct smart_attr *pattr; + + if (!attrib) + return -EINVAL; + + if (!port->identify_valid) { + dev_warn(&port->dd->pdev->dev, "IDENTIFY DATA not valid\n"); + return -EPERM; + } + if (!(port->identify[82] & 0x1)) { + dev_warn(&port->dd->pdev->dev, "SMART not supported\n"); + return -EPERM; + } + if (!(port->identify[85] & 0x1)) { + dev_warn(&port->dd->pdev->dev, "SMART not enabled\n"); + return -EPERM; + } + + memset(port->smart_buf, 0, ATA_SECT_SIZE); + rv = mtip_get_smart_data(port, port->smart_buf, port->smart_buf_dma); + if (rv) { + dev_warn(&port->dd->pdev->dev, "Failed to ge SMART data\n"); + return rv; + } + + pattr = (struct smart_attr *)(port->smart_buf + 2); + for (i = 0; i < 29; i++, pattr++) + if (pattr->attr_id == id) { + memcpy(attrib, pattr, sizeof(struct smart_attr)); + break; + } + + if (i == 29) { + dev_warn(&port->dd->pdev->dev, + "Query for invalid SMART attribute ID\n"); + rv = -EINVAL; + } return rv; } @@ -1504,10 +1839,7 @@ static int exec_drive_task(struct mtip_port *port, u8 *command) fis.cyl_hi = command[5]; fis.device = command[6] & ~0x10; /* Clear the dev bit*/ - - dbg_printk(MTIP_DRV_NAME "%s: User Command: cmd %x, feat %x, " - "nsect %x, sect %x, lcyl %x, " - "hcyl %x, sel %x\n", + dbg_printk(MTIP_DRV_NAME " %s: User Command: cmd %x, feat %x, nsect %x, sect %x, lcyl %x, hcyl %x, sel %x\n", __func__, command[0], command[1], @@ -1534,8 +1866,7 @@ static int exec_drive_task(struct mtip_port *port, u8 *command) command[4] = reply->cyl_low; command[5] = reply->cyl_hi; - dbg_printk(MTIP_DRV_NAME "%s: Completion Status: stat %x, " - "err %x , cyl_lo %x cyl_hi %x\n", + dbg_printk(MTIP_DRV_NAME " %s: Completion Status: stat %x, err %x , cyl_lo %x cyl_hi %x\n", __func__, command[0], command[1], @@ -1578,7 +1909,7 @@ static int exec_drive_command(struct mtip_port *port, u8 *command, } dbg_printk(MTIP_DRV_NAME - "%s: User Command: cmd %x, sect %x, " + " %s: User Command: cmd %x, sect %x, " "feat %x, sectcnt %x\n", __func__, command[0], @@ -1607,7 +1938,7 @@ static int exec_drive_command(struct mtip_port *port, u8 *command, command[2] = command[3]; dbg_printk(MTIP_DRV_NAME - "%s: Completion Status: stat %x, " + " %s: Completion Status: stat %x, " "err %x, cmd %x\n", __func__, command[0], @@ -1810,9 +2141,10 @@ static int exec_drive_taskfile(struct driver_data *dd, } dbg_printk(MTIP_DRV_NAME - "taskfile: cmd %x, feat %x, nsect %x," + " %s: cmd %x, feat %x, nsect %x," " sect/lbal %x, lcyl/lbam %x, hcyl/lbah %x," " head/dev %x\n", + __func__, fis.command, fis.features, fis.sect_count, @@ -1823,8 +2155,8 @@ static int exec_drive_taskfile(struct driver_data *dd, switch (fis.command) { case ATA_CMD_DOWNLOAD_MICRO: - /* Change timeout for Download Microcode to 60 seconds.*/ - timeout = 60000; + /* Change timeout for Download Microcode to 2 minutes */ + timeout = 120000; break; case ATA_CMD_SEC_ERASE_UNIT: /* Change timeout for Security Erase Unit to 4 minutes.*/ @@ -1840,8 +2172,8 @@ static int exec_drive_taskfile(struct driver_data *dd, timeout = 10000; break; case ATA_CMD_SMART: - /* Change timeout for vendor unique command to 10 secs */ - timeout = 10000; + /* Change timeout for vendor unique command to 15 secs */ + timeout = 15000; break; default: timeout = MTIP_IOCTL_COMMAND_TIMEOUT_MS; @@ -1903,18 +2235,8 @@ static int exec_drive_taskfile(struct driver_data *dd, req_task->hob_ports[1] = reply->features_ex; req_task->hob_ports[2] = reply->sect_cnt_ex; } - - /* Com rest after secure erase or lowlevel format */ - if (((fis.command == ATA_CMD_SEC_ERASE_UNIT) || - ((fis.command == 0xFC) && - (fis.features == 0x27 || fis.features == 0x72 || - fis.features == 0x62 || fis.features == 0x26))) && - !(reply->command & 1)) { - mtip_restart_port(dd->port); - } - dbg_printk(MTIP_DRV_NAME - "%s: Completion: stat %x," + " %s: Completion: stat %x," "err %x, sect_cnt %x, lbalo %x," "lbamid %x, lbahi %x, dev %x\n", __func__, @@ -2080,14 +2402,10 @@ static void mtip_hw_submit_io(struct driver_data *dd, sector_t start, struct host_to_dev_fis *fis; struct mtip_port *port = dd->port; struct mtip_cmd *command = &port->commands[tag]; + int dma_dir = (dir == READ) ? DMA_FROM_DEVICE : DMA_TO_DEVICE; /* Map the scatter list for DMA access */ - if (dir == READ) - nents = dma_map_sg(&dd->pdev->dev, command->sg, - nents, DMA_FROM_DEVICE); - else - nents = dma_map_sg(&dd->pdev->dev, command->sg, - nents, DMA_TO_DEVICE); + nents = dma_map_sg(&dd->pdev->dev, command->sg, nents, dma_dir); command->scatter_ents = nents; @@ -2127,7 +2445,7 @@ static void mtip_hw_submit_io(struct driver_data *dd, sector_t start, */ command->comp_data = dd; command->comp_func = mtip_async_complete; - command->direction = (dir == READ ? DMA_FROM_DEVICE : DMA_TO_DEVICE); + command->direction = dma_dir; /* * Set the completion function and data for the command passed @@ -2140,19 +2458,16 @@ static void mtip_hw_submit_io(struct driver_data *dd, sector_t start, * To prevent this command from being issued * if an internal command is in progress or error handling is active. */ - if (unlikely(test_bit(MTIP_FLAG_IC_ACTIVE_BIT, &port->flags) || - test_bit(MTIP_FLAG_EH_ACTIVE_BIT, &port->flags))) { + if (port->flags & MTIP_PF_PAUSE_IO) { set_bit(tag, port->cmds_to_issue); - set_bit(MTIP_FLAG_ISSUE_CMDS_BIT, &port->flags); + set_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags); return; } /* Issue the command to the hardware */ mtip_issue_ncq_command(port, tag); - /* Set the command's timeout value.*/ - port->commands[tag].comp_time = jiffies + msecs_to_jiffies( - MTIP_NCQ_COMMAND_TIMEOUT_MS); + return; } /* @@ -2191,6 +2506,10 @@ static struct scatterlist *mtip_hw_get_scatterlist(struct driver_data *dd, down(&dd->port->cmd_slot); *tag = get_slot(dd->port); + if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag))) { + up(&dd->port->cmd_slot); + return NULL; + } if (unlikely(*tag < 0)) return NULL; @@ -2207,7 +2526,7 @@ static struct scatterlist *mtip_hw_get_scatterlist(struct driver_data *dd, * return value * The size, in bytes, of the data copied into buf. */ -static ssize_t hw_show_registers(struct device *dev, +static ssize_t mtip_hw_show_registers(struct device *dev, struct device_attribute *attr, char *buf) { @@ -2216,7 +2535,7 @@ static ssize_t hw_show_registers(struct device *dev, int size = 0; int n; - size += sprintf(&buf[size], "%s:\ns_active:\n", __func__); + size += sprintf(&buf[size], "S ACTive:\n"); for (n = 0; n < dd->slot_groups; n++) size += sprintf(&buf[size], "0x%08x\n", @@ -2240,20 +2559,39 @@ static ssize_t hw_show_registers(struct device *dev, group_allocated); } - size += sprintf(&buf[size], "completed:\n"); + size += sprintf(&buf[size], "Completed:\n"); for (n = 0; n < dd->slot_groups; n++) size += sprintf(&buf[size], "0x%08x\n", readl(dd->port->completed[n])); - size += sprintf(&buf[size], "PORT_IRQ_STAT 0x%08x\n", + size += sprintf(&buf[size], "PORT IRQ STAT : 0x%08x\n", readl(dd->port->mmio + PORT_IRQ_STAT)); - size += sprintf(&buf[size], "HOST_IRQ_STAT 0x%08x\n", + size += sprintf(&buf[size], "HOST IRQ STAT : 0x%08x\n", readl(dd->mmio + HOST_IRQ_STAT)); return size; } -static DEVICE_ATTR(registers, S_IRUGO, hw_show_registers, NULL); + +static ssize_t mtip_hw_show_status(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct driver_data *dd = dev_to_disk(dev)->private_data; + int size = 0; + + if (test_bit(MTIP_DDF_OVER_TEMP_BIT, &dd->dd_flag)) + size += sprintf(buf, "%s", "thermal_shutdown\n"); + else if (test_bit(MTIP_DDF_WRITE_PROTECT_BIT, &dd->dd_flag)) + size += sprintf(buf, "%s", "write_protect\n"); + else + size += sprintf(buf, "%s", "online\n"); + + return size; +} + +static DEVICE_ATTR(registers, S_IRUGO, mtip_hw_show_registers, NULL); +static DEVICE_ATTR(status, S_IRUGO, mtip_hw_show_status, NULL); /* * Create the sysfs related attributes. @@ -2272,7 +2610,10 @@ static int mtip_hw_sysfs_init(struct driver_data *dd, struct kobject *kobj) if (sysfs_create_file(kobj, &dev_attr_registers.attr)) dev_warn(&dd->pdev->dev, - "Error creating registers sysfs entry\n"); + "Error creating 'registers' sysfs entry\n"); + if (sysfs_create_file(kobj, &dev_attr_status.attr)) + dev_warn(&dd->pdev->dev, + "Error creating 'status' sysfs entry\n"); return 0; } @@ -2292,6 +2633,7 @@ static int mtip_hw_sysfs_exit(struct driver_data *dd, struct kobject *kobj) return -EINVAL; sysfs_remove_file(kobj, &dev_attr_registers.attr); + sysfs_remove_file(kobj, &dev_attr_status.attr); return 0; } @@ -2384,10 +2726,12 @@ static int mtip_ftl_rebuild_poll(struct driver_data *dd) "FTL rebuild in progress. Polling for completion.\n"); start = jiffies; - dd->ftlrebuildflag = 1; timeout = jiffies + msecs_to_jiffies(MTIP_FTL_REBUILD_TIMEOUT_MS); do { + if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT, + &dd->dd_flag))) + return -EFAULT; if (mtip_check_surprise_removal(dd->pdev)) return -EFAULT; @@ -2408,22 +2752,17 @@ static int mtip_ftl_rebuild_poll(struct driver_data *dd) dev_warn(&dd->pdev->dev, "FTL rebuild complete (%d secs).\n", jiffies_to_msecs(jiffies - start) / 1000); - dd->ftlrebuildflag = 0; mtip_block_initialize(dd); - break; + return 0; } ssleep(10); } while (time_before(jiffies, timeout)); /* Check for timeout */ - if (dd->ftlrebuildflag) { - dev_err(&dd->pdev->dev, + dev_err(&dd->pdev->dev, "Timed out waiting for FTL rebuild to complete (%d secs).\n", jiffies_to_msecs(jiffies - start) / 1000); - return -EFAULT; - } - - return 0; + return -EFAULT; } /* @@ -2448,14 +2787,17 @@ static int mtip_service_thread(void *data) * is in progress nor error handling is active */ wait_event_interruptible(port->svc_wait, (port->flags) && - !test_bit(MTIP_FLAG_IC_ACTIVE_BIT, &port->flags) && - !test_bit(MTIP_FLAG_EH_ACTIVE_BIT, &port->flags)); + !(port->flags & MTIP_PF_PAUSE_IO)); if (kthread_should_stop()) break; - set_bit(MTIP_FLAG_SVC_THD_ACTIVE_BIT, &port->flags); - if (test_bit(MTIP_FLAG_ISSUE_CMDS_BIT, &port->flags)) { + if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT, + &dd->dd_flag))) + break; + + set_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags); + if (test_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags)) { slot = 1; /* used to restrict the loop to one iteration */ slot_start = num_cmd_slots; @@ -2480,21 +2822,19 @@ static int mtip_service_thread(void *data) /* Issue the command to the hardware */ mtip_issue_ncq_command(port, slot); - /* Set the command's timeout value.*/ - port->commands[slot].comp_time = jiffies + - msecs_to_jiffies(MTIP_NCQ_COMMAND_TIMEOUT_MS); - clear_bit(slot, port->cmds_to_issue); } - clear_bit(MTIP_FLAG_ISSUE_CMDS_BIT, &port->flags); - } else if (test_bit(MTIP_FLAG_REBUILD_BIT, &port->flags)) { - mtip_ftl_rebuild_poll(dd); - clear_bit(MTIP_FLAG_REBUILD_BIT, &port->flags); + clear_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags); + } else if (test_bit(MTIP_PF_REBUILD_BIT, &port->flags)) { + if (!mtip_ftl_rebuild_poll(dd)) + set_bit(MTIP_DDF_REBUILD_FAILED_BIT, + &dd->dd_flag); + clear_bit(MTIP_PF_REBUILD_BIT, &port->flags); } - clear_bit(MTIP_FLAG_SVC_THD_ACTIVE_BIT, &port->flags); + clear_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags); - if (test_bit(MTIP_FLAG_SVC_THD_SHOULD_STOP_BIT, &port->flags)) + if (test_bit(MTIP_PF_SVC_THD_STOP_BIT, &port->flags)) break; } return 0; @@ -2513,6 +2853,9 @@ static int mtip_hw_init(struct driver_data *dd) int i; int rv; unsigned int num_command_slots; + unsigned long timeout, timetaken; + unsigned char *buf; + struct smart_attr attr242; dd->mmio = pcim_iomap_table(dd->pdev)[MTIP_ABAR]; @@ -2547,7 +2890,7 @@ static int mtip_hw_init(struct driver_data *dd) /* Allocate memory for the command list. */ dd->port->command_list = dmam_alloc_coherent(&dd->pdev->dev, - HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 2), + HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4), &dd->port->command_list_dma, GFP_KERNEL); if (!dd->port->command_list) { @@ -2560,7 +2903,7 @@ static int mtip_hw_init(struct driver_data *dd) /* Clear the memory we have allocated. */ memset(dd->port->command_list, 0, - HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 2)); + HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4)); /* Setup the addresse of the RX FIS. */ dd->port->rxfis = dd->port->command_list + HW_CMD_SLOT_SZ; @@ -2576,10 +2919,19 @@ static int mtip_hw_init(struct driver_data *dd) dd->port->identify_dma = dd->port->command_tbl_dma + HW_CMD_TBL_AR_SZ; - /* Setup the address of the sector buffer. */ + /* Setup the address of the sector buffer - for some non-ncq cmds */ dd->port->sector_buffer = (void *) dd->port->identify + ATA_SECT_SIZE; dd->port->sector_buffer_dma = dd->port->identify_dma + ATA_SECT_SIZE; + /* Setup the address of the log buf - for read log command */ + dd->port->log_buf = (void *)dd->port->sector_buffer + ATA_SECT_SIZE; + dd->port->log_buf_dma = dd->port->sector_buffer_dma + ATA_SECT_SIZE; + + /* Setup the address of the smart buf - for smart read data command */ + dd->port->smart_buf = (void *)dd->port->log_buf + ATA_SECT_SIZE; + dd->port->smart_buf_dma = dd->port->log_buf_dma + ATA_SECT_SIZE; + + /* Point the command headers at the command tables. */ for (i = 0; i < num_command_slots; i++) { dd->port->commands[i].command_header = @@ -2623,14 +2975,43 @@ static int mtip_hw_init(struct driver_data *dd) dd->port->mmio + i*0x80 + PORT_SDBV; } - /* Reset the HBA. */ - if (mtip_hba_reset(dd) < 0) { - dev_err(&dd->pdev->dev, - "Card did not reset within timeout\n"); - rv = -EIO; + timetaken = jiffies; + timeout = jiffies + msecs_to_jiffies(30000); + while (((readl(dd->port->mmio + PORT_SCR_STAT) & 0x0F) != 0x03) && + time_before(jiffies, timeout)) { + mdelay(100); + } + if (unlikely(mtip_check_surprise_removal(dd->pdev))) { + timetaken = jiffies - timetaken; + dev_warn(&dd->pdev->dev, + "Surprise removal detected at %u ms\n", + jiffies_to_msecs(timetaken)); + rv = -ENODEV; + goto out2 ; + } + if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag))) { + timetaken = jiffies - timetaken; + dev_warn(&dd->pdev->dev, + "Removal detected at %u ms\n", + jiffies_to_msecs(timetaken)); + rv = -EFAULT; goto out2; } + /* Conditionally reset the HBA. */ + if (!(readl(dd->mmio + HOST_CAP) & HOST_CAP_NZDMA)) { + if (mtip_hba_reset(dd) < 0) { + dev_err(&dd->pdev->dev, + "Card did not reset within timeout\n"); + rv = -EIO; + goto out2; + } + } else { + /* Clear any pending interrupts on the HBA */ + writel(readl(dd->mmio + HOST_IRQ_STAT), + dd->mmio + HOST_IRQ_STAT); + } + mtip_init_port(dd->port); mtip_start_port(dd->port); @@ -2660,6 +3041,12 @@ static int mtip_hw_init(struct driver_data *dd) mod_timer(&dd->port->cmd_timer, jiffies + msecs_to_jiffies(MTIP_TIMEOUT_CHECK_PERIOD)); + + if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag)) { + rv = -EFAULT; + goto out3; + } + if (mtip_get_identify(dd->port, NULL) < 0) { rv = -EFAULT; goto out3; @@ -2667,10 +3054,47 @@ static int mtip_hw_init(struct driver_data *dd) if (*(dd->port->identify + MTIP_FTL_REBUILD_OFFSET) == MTIP_FTL_REBUILD_MAGIC) { - set_bit(MTIP_FLAG_REBUILD_BIT, &dd->port->flags); + set_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags); return MTIP_FTL_REBUILD_MAGIC; } mtip_dump_identify(dd->port); + + /* check write protect, over temp and rebuild statuses */ + rv = mtip_read_log_page(dd->port, ATA_LOG_SATA_NCQ, + dd->port->log_buf, + dd->port->log_buf_dma, 1); + if (rv) { + dev_warn(&dd->pdev->dev, + "Error in READ LOG EXT (10h) command\n"); + /* non-critical error, don't fail the load */ + } else { + buf = (unsigned char *)dd->port->log_buf; + if (buf[259] & 0x1) { + dev_info(&dd->pdev->dev, + "Write protect bit is set.\n"); + set_bit(MTIP_DDF_WRITE_PROTECT_BIT, &dd->dd_flag); + } + if (buf[288] == 0xF7) { + dev_info(&dd->pdev->dev, + "Exceeded Tmax, drive in thermal shutdown.\n"); + set_bit(MTIP_DDF_OVER_TEMP_BIT, &dd->dd_flag); + } + if (buf[288] == 0xBF) { + dev_info(&dd->pdev->dev, + "Drive indicates rebuild has failed.\n"); + /* TODO */ + } + } + + /* get write protect progess */ + memset(&attr242, 0, sizeof(struct smart_attr)); + if (mtip_get_smart_attr(dd->port, 242, &attr242)) + dev_warn(&dd->pdev->dev, + "Unable to check write protect progress\n"); + else + dev_info(&dd->pdev->dev, + "Write protect progress: %d%% (%d blocks)\n", + attr242.cur, attr242.data); return rv; out3: @@ -2688,7 +3112,7 @@ out2: /* Free the command/command header memory. */ dmam_free_coherent(&dd->pdev->dev, - HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 2), + HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4), dd->port->command_list, dd->port->command_list_dma); out1: @@ -2712,9 +3136,12 @@ static int mtip_hw_exit(struct driver_data *dd) * Send standby immediate (E0h) to the drive so that it * saves its state. */ - if (atomic_read(&dd->drv_cleanup_done) != true) { + if (!test_bit(MTIP_DDF_CLEANUP_BIT, &dd->dd_flag)) { - mtip_standby_immediate(dd->port); + if (!test_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags)) + if (mtip_standby_immediate(dd->port)) + dev_warn(&dd->pdev->dev, + "STANDBY IMMEDIATE failed\n"); /* de-initialize the port. */ mtip_deinit_port(dd->port); @@ -2734,7 +3161,7 @@ static int mtip_hw_exit(struct driver_data *dd) /* Free the command/command header memory. */ dmam_free_coherent(&dd->pdev->dev, - HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 2), + HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4), dd->port->command_list, dd->port->command_list_dma); /* Free the memory allocated for the for structure. */ @@ -2892,6 +3319,9 @@ static int mtip_block_ioctl(struct block_device *dev, if (!dd) return -ENOTTY; + if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag))) + return -ENOTTY; + switch (cmd) { case BLKFLSBUF: return -ENOTTY; @@ -2927,6 +3357,9 @@ static int mtip_block_compat_ioctl(struct block_device *dev, if (!dd) return -ENOTTY; + if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag))) + return -ENOTTY; + switch (cmd) { case BLKFLSBUF: return -ENOTTY; @@ -3049,6 +3482,24 @@ static void mtip_make_request(struct request_queue *queue, struct bio *bio) int nents = 0; int tag = 0; + if (unlikely(dd->dd_flag & MTIP_DDF_STOP_IO)) { + if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT, + &dd->dd_flag))) { + bio_endio(bio, -ENXIO); + return; + } + if (unlikely(test_bit(MTIP_DDF_OVER_TEMP_BIT, &dd->dd_flag))) { + bio_endio(bio, -ENODATA); + return; + } + if (unlikely(test_bit(MTIP_DDF_WRITE_PROTECT_BIT, + &dd->dd_flag) && + bio_data_dir(bio))) { + bio_endio(bio, -ENODATA); + return; + } + } + if (unlikely(!bio_has_data(bio))) { blk_queue_flush(queue, 0); bio_endio(bio, 0); @@ -3061,7 +3512,7 @@ static void mtip_make_request(struct request_queue *queue, struct bio *bio) if (unlikely((bio)->bi_vcnt > MTIP_MAX_SG)) { dev_warn(&dd->pdev->dev, - "Maximum number of SGL entries exceeded"); + "Maximum number of SGL entries exceeded\n"); bio_io_error(bio); mtip_hw_release_scatterlist(dd, tag); return; @@ -3210,8 +3661,10 @@ skip_create_disk: kobject_put(kobj); } - if (dd->mtip_svc_handler) + if (dd->mtip_svc_handler) { + set_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag); return rv; /* service thread created for handling rebuild */ + } start_service_thread: sprintf(thd_name, "mtip_svc_thd_%02d", index); @@ -3220,12 +3673,15 @@ start_service_thread: dd, thd_name); if (IS_ERR(dd->mtip_svc_handler)) { - printk(KERN_ERR "mtip32xx: service thread failed to start\n"); + dev_err(&dd->pdev->dev, "service thread failed to start\n"); dd->mtip_svc_handler = NULL; rv = -EFAULT; goto kthread_run_error; } + if (wait_for_rebuild == MTIP_FTL_REBUILD_MAGIC) + rv = wait_for_rebuild; + return rv; kthread_run_error: @@ -3266,16 +3722,18 @@ static int mtip_block_remove(struct driver_data *dd) struct kobject *kobj; if (dd->mtip_svc_handler) { - set_bit(MTIP_FLAG_SVC_THD_SHOULD_STOP_BIT, &dd->port->flags); + set_bit(MTIP_PF_SVC_THD_STOP_BIT, &dd->port->flags); wake_up_interruptible(&dd->port->svc_wait); kthread_stop(dd->mtip_svc_handler); } - /* Clean up the sysfs attributes managed by the protocol layer. */ - kobj = kobject_get(&disk_to_dev(dd->disk)->kobj); - if (kobj) { - mtip_hw_sysfs_exit(dd, kobj); - kobject_put(kobj); + /* Clean up the sysfs attributes, if created */ + if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag)) { + kobj = kobject_get(&disk_to_dev(dd->disk)->kobj); + if (kobj) { + mtip_hw_sysfs_exit(dd, kobj); + kobject_put(kobj); + } } /* @@ -3283,6 +3741,11 @@ static int mtip_block_remove(struct driver_data *dd) * from /dev */ del_gendisk(dd->disk); + + spin_lock(&rssd_index_lock); + ida_remove(&rssd_index_ida, dd->index); + spin_unlock(&rssd_index_lock); + blk_cleanup_queue(dd->queue); dd->disk = NULL; dd->queue = NULL; @@ -3312,6 +3775,11 @@ static int mtip_block_shutdown(struct driver_data *dd) /* Delete our gendisk structure, and cleanup the blk queue. */ del_gendisk(dd->disk); + + spin_lock(&rssd_index_lock); + ida_remove(&rssd_index_ida, dd->index); + spin_unlock(&rssd_index_lock); + blk_cleanup_queue(dd->queue); dd->disk = NULL; dd->queue = NULL; @@ -3359,11 +3827,6 @@ static int mtip_pci_probe(struct pci_dev *pdev, return -ENOMEM; } - /* Set the atomic variable as 1 in case of SRSI */ - atomic_set(&dd->drv_cleanup_done, true); - - atomic_set(&dd->resumeflag, false); - /* Attach the private data to this PCI device. */ pci_set_drvdata(pdev, dd); @@ -3420,7 +3883,8 @@ static int mtip_pci_probe(struct pci_dev *pdev, * instance number. */ instance++; - + if (rv != MTIP_FTL_REBUILD_MAGIC) + set_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag); goto done; block_initialize_err: @@ -3434,9 +3898,6 @@ iomap_err: pci_set_drvdata(pdev, NULL); return rv; done: - /* Set the atomic variable as 0 in case of SRSI */ - atomic_set(&dd->drv_cleanup_done, true); - return rv; } @@ -3452,8 +3913,10 @@ static void mtip_pci_remove(struct pci_dev *pdev) struct driver_data *dd = pci_get_drvdata(pdev); int counter = 0; + set_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag); + if (mtip_check_surprise_removal(pdev)) { - while (atomic_read(&dd->drv_cleanup_done) == false) { + while (!test_bit(MTIP_DDF_CLEANUP_BIT, &dd->dd_flag)) { counter++; msleep(20); if (counter == 10) { @@ -3463,8 +3926,6 @@ static void mtip_pci_remove(struct pci_dev *pdev) } } } - /* Set the atomic variable as 1 in case of SRSI */ - atomic_set(&dd->drv_cleanup_done, true); /* Clean up the block layer. */ mtip_block_remove(dd); @@ -3493,7 +3954,7 @@ static int mtip_pci_suspend(struct pci_dev *pdev, pm_message_t mesg) return -EFAULT; } - atomic_set(&dd->resumeflag, true); + set_bit(MTIP_DDF_RESUME_BIT, &dd->dd_flag); /* Disable ports & interrupts then send standby immediate */ rv = mtip_block_suspend(dd); @@ -3559,7 +4020,7 @@ static int mtip_pci_resume(struct pci_dev *pdev) dev_err(&pdev->dev, "Unable to resume\n"); err: - atomic_set(&dd->resumeflag, false); + clear_bit(MTIP_DDF_RESUME_BIT, &dd->dd_flag); return rv; } @@ -3608,18 +4069,25 @@ MODULE_DEVICE_TABLE(pci, mtip_pci_tbl); */ static int __init mtip_init(void) { + int error; + printk(KERN_INFO MTIP_DRV_NAME " Version " MTIP_DRV_VERSION "\n"); /* Allocate a major block device number to use with this driver. */ - mtip_major = register_blkdev(0, MTIP_DRV_NAME); - if (mtip_major < 0) { + error = register_blkdev(0, MTIP_DRV_NAME); + if (error <= 0) { printk(KERN_ERR "Unable to register block device (%d)\n", - mtip_major); + error); return -EBUSY; } + mtip_major = error; /* Register our PCI operations. */ - return pci_register_driver(&mtip_pci_driver); + error = pci_register_driver(&mtip_pci_driver); + if (error) + unregister_blkdev(mtip_major, MTIP_DRV_NAME); + + return error; } /* diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h index e0554a8f2233..4ef58336310a 100644 --- a/drivers/block/mtip32xx/mtip32xx.h +++ b/drivers/block/mtip32xx/mtip32xx.h @@ -34,8 +34,8 @@ /* offset of Device Control register in PCIe extended capabilites space */ #define PCIE_CONFIG_EXT_DEVICE_CONTROL_OFFSET 0x48 -/* # of times to retry timed out IOs */ -#define MTIP_MAX_RETRIES 5 +/* # of times to retry timed out/failed IOs */ +#define MTIP_MAX_RETRIES 2 /* Various timeout values in ms */ #define MTIP_NCQ_COMMAND_TIMEOUT_MS 5000 @@ -114,12 +114,41 @@ #define __force_bit2int (unsigned int __force) /* below are bit numbers in 'flags' defined in mtip_port */ -#define MTIP_FLAG_IC_ACTIVE_BIT 0 -#define MTIP_FLAG_EH_ACTIVE_BIT 1 -#define MTIP_FLAG_SVC_THD_ACTIVE_BIT 2 -#define MTIP_FLAG_ISSUE_CMDS_BIT 4 -#define MTIP_FLAG_REBUILD_BIT 5 -#define MTIP_FLAG_SVC_THD_SHOULD_STOP_BIT 8 +#define MTIP_PF_IC_ACTIVE_BIT 0 /* pio/ioctl */ +#define MTIP_PF_EH_ACTIVE_BIT 1 /* error handling */ +#define MTIP_PF_SE_ACTIVE_BIT 2 /* secure erase */ +#define MTIP_PF_DM_ACTIVE_BIT 3 /* download microcde */ +#define MTIP_PF_PAUSE_IO ((1 << MTIP_PF_IC_ACTIVE_BIT) | \ + (1 << MTIP_PF_EH_ACTIVE_BIT) | \ + (1 << MTIP_PF_SE_ACTIVE_BIT) | \ + (1 << MTIP_PF_DM_ACTIVE_BIT)) + +#define MTIP_PF_SVC_THD_ACTIVE_BIT 4 +#define MTIP_PF_ISSUE_CMDS_BIT 5 +#define MTIP_PF_REBUILD_BIT 6 +#define MTIP_PF_SVC_THD_STOP_BIT 8 + +/* below are bit numbers in 'dd_flag' defined in driver_data */ +#define MTIP_DDF_REMOVE_PENDING_BIT 1 +#define MTIP_DDF_OVER_TEMP_BIT 2 +#define MTIP_DDF_WRITE_PROTECT_BIT 3 +#define MTIP_DDF_STOP_IO ((1 << MTIP_DDF_REMOVE_PENDING_BIT) | \ + (1 << MTIP_DDF_OVER_TEMP_BIT) | \ + (1 << MTIP_DDF_WRITE_PROTECT_BIT)) + +#define MTIP_DDF_CLEANUP_BIT 5 +#define MTIP_DDF_RESUME_BIT 6 +#define MTIP_DDF_INIT_DONE_BIT 7 +#define MTIP_DDF_REBUILD_FAILED_BIT 8 + +__packed struct smart_attr{ + u8 attr_id; + u16 flags; + u8 cur; + u8 worst; + u32 data; + u8 res[3]; +}; /* Register Frame Information Structure (FIS), host to device. */ struct host_to_dev_fis { @@ -345,6 +374,12 @@ struct mtip_port { * when the command slot and all associated data structures * are no longer needed. */ + u16 *log_buf; + dma_addr_t log_buf_dma; + + u8 *smart_buf; + dma_addr_t smart_buf_dma; + unsigned long allocated[SLOTBITS_IN_LONGS]; /* * used to queue commands when an internal command is in progress @@ -368,6 +403,7 @@ struct mtip_port { * Timer used to complete commands that have been active for too long. */ struct timer_list cmd_timer; + unsigned long ic_pause_timer; /* * Semaphore used to block threads if there are no * command slots available. @@ -404,13 +440,9 @@ struct driver_data { unsigned slot_groups; /* number of slot groups the product supports */ - atomic_t drv_cleanup_done; /* Atomic variable for SRSI */ - unsigned long index; /* Index to determine the disk name */ - unsigned int ftlrebuildflag; /* FTL rebuild flag */ - - atomic_t resumeflag; /* Atomic variable to track suspend/resume */ + unsigned long dd_flag; /* NOTE: use atomic bit operations on this */ struct task_struct *mtip_svc_handler; /* task_struct of svc thd */ }; diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index c3f0ee16594d..061427a75d37 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -34,12 +34,11 @@ #include <linux/kthread.h> #include <asm/uaccess.h> -#include <asm/system.h> #include <asm/types.h> #include <linux/nbd.h> -#define LO_MAGIC 0x68797548 +#define NBD_MAGIC 0x68797548 #ifdef NDEBUG #define dprintk(flags, fmt...) @@ -116,7 +115,7 @@ static void nbd_end_request(struct request *req) spin_unlock_irqrestore(q->queue_lock, flags); } -static void sock_shutdown(struct nbd_device *lo, int lock) +static void sock_shutdown(struct nbd_device *nbd, int lock) { /* Forcibly shutdown the socket causing all listeners * to error @@ -125,14 +124,14 @@ static void sock_shutdown(struct nbd_device *lo, int lock) * there should be a more generic interface rather than * calling socket ops directly here */ if (lock) - mutex_lock(&lo->tx_lock); - if (lo->sock) { - dev_warn(disk_to_dev(lo->disk), "shutting down socket\n"); - kernel_sock_shutdown(lo->sock, SHUT_RDWR); - lo->sock = NULL; + mutex_lock(&nbd->tx_lock); + if (nbd->sock) { + dev_warn(disk_to_dev(nbd->disk), "shutting down socket\n"); + kernel_sock_shutdown(nbd->sock, SHUT_RDWR); + nbd->sock = NULL; } if (lock) - mutex_unlock(&lo->tx_lock); + mutex_unlock(&nbd->tx_lock); } static void nbd_xmit_timeout(unsigned long arg) @@ -147,17 +146,17 @@ static void nbd_xmit_timeout(unsigned long arg) /* * Send or receive packet. */ -static int sock_xmit(struct nbd_device *lo, int send, void *buf, int size, +static int sock_xmit(struct nbd_device *nbd, int send, void *buf, int size, int msg_flags) { - struct socket *sock = lo->sock; + struct socket *sock = nbd->sock; int result; struct msghdr msg; struct kvec iov; sigset_t blocked, oldset; if (unlikely(!sock)) { - dev_err(disk_to_dev(lo->disk), + dev_err(disk_to_dev(nbd->disk), "Attempted %s on closed socket in sock_xmit\n", (send ? "send" : "recv")); return -EINVAL; @@ -181,15 +180,15 @@ static int sock_xmit(struct nbd_device *lo, int send, void *buf, int size, if (send) { struct timer_list ti; - if (lo->xmit_timeout) { + if (nbd->xmit_timeout) { init_timer(&ti); ti.function = nbd_xmit_timeout; ti.data = (unsigned long)current; - ti.expires = jiffies + lo->xmit_timeout; + ti.expires = jiffies + nbd->xmit_timeout; add_timer(&ti); } result = kernel_sendmsg(sock, &msg, &iov, 1, size); - if (lo->xmit_timeout) + if (nbd->xmit_timeout) del_timer_sync(&ti); } else result = kernel_recvmsg(sock, &msg, &iov, 1, size, @@ -201,7 +200,7 @@ static int sock_xmit(struct nbd_device *lo, int send, void *buf, int size, task_pid_nr(current), current->comm, dequeue_signal_lock(current, ¤t->blocked, &info)); result = -EINTR; - sock_shutdown(lo, !send); + sock_shutdown(nbd, !send); break; } @@ -219,18 +218,19 @@ static int sock_xmit(struct nbd_device *lo, int send, void *buf, int size, return result; } -static inline int sock_send_bvec(struct nbd_device *lo, struct bio_vec *bvec, +static inline int sock_send_bvec(struct nbd_device *nbd, struct bio_vec *bvec, int flags) { int result; void *kaddr = kmap(bvec->bv_page); - result = sock_xmit(lo, 1, kaddr + bvec->bv_offset, bvec->bv_len, flags); + result = sock_xmit(nbd, 1, kaddr + bvec->bv_offset, + bvec->bv_len, flags); kunmap(bvec->bv_page); return result; } /* always call with the tx_lock held */ -static int nbd_send_req(struct nbd_device *lo, struct request *req) +static int nbd_send_req(struct nbd_device *nbd, struct request *req) { int result, flags; struct nbd_request request; @@ -243,14 +243,14 @@ static int nbd_send_req(struct nbd_device *lo, struct request *req) memcpy(request.handle, &req, sizeof(req)); dprintk(DBG_TX, "%s: request %p: sending control (%s@%llu,%uB)\n", - lo->disk->disk_name, req, + nbd->disk->disk_name, req, nbdcmd_to_ascii(nbd_cmd(req)), (unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req)); - result = sock_xmit(lo, 1, &request, sizeof(request), + result = sock_xmit(nbd, 1, &request, sizeof(request), (nbd_cmd(req) == NBD_CMD_WRITE) ? MSG_MORE : 0); if (result <= 0) { - dev_err(disk_to_dev(lo->disk), + dev_err(disk_to_dev(nbd->disk), "Send control failed (result %d)\n", result); goto error_out; } @@ -267,10 +267,10 @@ static int nbd_send_req(struct nbd_device *lo, struct request *req) if (!rq_iter_last(req, iter)) flags = MSG_MORE; dprintk(DBG_TX, "%s: request %p: sending %d bytes data\n", - lo->disk->disk_name, req, bvec->bv_len); - result = sock_send_bvec(lo, bvec, flags); + nbd->disk->disk_name, req, bvec->bv_len); + result = sock_send_bvec(nbd, bvec, flags); if (result <= 0) { - dev_err(disk_to_dev(lo->disk), + dev_err(disk_to_dev(nbd->disk), "Send data failed (result %d)\n", result); goto error_out; @@ -283,25 +283,25 @@ error_out: return -EIO; } -static struct request *nbd_find_request(struct nbd_device *lo, +static struct request *nbd_find_request(struct nbd_device *nbd, struct request *xreq) { struct request *req, *tmp; int err; - err = wait_event_interruptible(lo->active_wq, lo->active_req != xreq); + err = wait_event_interruptible(nbd->active_wq, nbd->active_req != xreq); if (unlikely(err)) goto out; - spin_lock(&lo->queue_lock); - list_for_each_entry_safe(req, tmp, &lo->queue_head, queuelist) { + spin_lock(&nbd->queue_lock); + list_for_each_entry_safe(req, tmp, &nbd->queue_head, queuelist) { if (req != xreq) continue; list_del_init(&req->queuelist); - spin_unlock(&lo->queue_lock); + spin_unlock(&nbd->queue_lock); return req; } - spin_unlock(&lo->queue_lock); + spin_unlock(&nbd->queue_lock); err = -ENOENT; @@ -309,78 +309,78 @@ out: return ERR_PTR(err); } -static inline int sock_recv_bvec(struct nbd_device *lo, struct bio_vec *bvec) +static inline int sock_recv_bvec(struct nbd_device *nbd, struct bio_vec *bvec) { int result; void *kaddr = kmap(bvec->bv_page); - result = sock_xmit(lo, 0, kaddr + bvec->bv_offset, bvec->bv_len, + result = sock_xmit(nbd, 0, kaddr + bvec->bv_offset, bvec->bv_len, MSG_WAITALL); kunmap(bvec->bv_page); return result; } /* NULL returned = something went wrong, inform userspace */ -static struct request *nbd_read_stat(struct nbd_device *lo) +static struct request *nbd_read_stat(struct nbd_device *nbd) { int result; struct nbd_reply reply; struct request *req; reply.magic = 0; - result = sock_xmit(lo, 0, &reply, sizeof(reply), MSG_WAITALL); + result = sock_xmit(nbd, 0, &reply, sizeof(reply), MSG_WAITALL); if (result <= 0) { - dev_err(disk_to_dev(lo->disk), + dev_err(disk_to_dev(nbd->disk), "Receive control failed (result %d)\n", result); goto harderror; } if (ntohl(reply.magic) != NBD_REPLY_MAGIC) { - dev_err(disk_to_dev(lo->disk), "Wrong magic (0x%lx)\n", + dev_err(disk_to_dev(nbd->disk), "Wrong magic (0x%lx)\n", (unsigned long)ntohl(reply.magic)); result = -EPROTO; goto harderror; } - req = nbd_find_request(lo, *(struct request **)reply.handle); + req = nbd_find_request(nbd, *(struct request **)reply.handle); if (IS_ERR(req)) { result = PTR_ERR(req); if (result != -ENOENT) goto harderror; - dev_err(disk_to_dev(lo->disk), "Unexpected reply (%p)\n", + dev_err(disk_to_dev(nbd->disk), "Unexpected reply (%p)\n", reply.handle); result = -EBADR; goto harderror; } if (ntohl(reply.error)) { - dev_err(disk_to_dev(lo->disk), "Other side returned error (%d)\n", + dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n", ntohl(reply.error)); req->errors++; return req; } dprintk(DBG_RX, "%s: request %p: got reply\n", - lo->disk->disk_name, req); + nbd->disk->disk_name, req); if (nbd_cmd(req) == NBD_CMD_READ) { struct req_iterator iter; struct bio_vec *bvec; rq_for_each_segment(bvec, req, iter) { - result = sock_recv_bvec(lo, bvec); + result = sock_recv_bvec(nbd, bvec); if (result <= 0) { - dev_err(disk_to_dev(lo->disk), "Receive data failed (result %d)\n", + dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n", result); req->errors++; return req; } dprintk(DBG_RX, "%s: request %p: got %d bytes data\n", - lo->disk->disk_name, req, bvec->bv_len); + nbd->disk->disk_name, req, bvec->bv_len); } } return req; harderror: - lo->harderror = result; + nbd->harderror = result; return NULL; } @@ -398,48 +398,48 @@ static struct device_attribute pid_attr = { .show = pid_show, }; -static int nbd_do_it(struct nbd_device *lo) +static int nbd_do_it(struct nbd_device *nbd) { struct request *req; int ret; - BUG_ON(lo->magic != LO_MAGIC); + BUG_ON(nbd->magic != NBD_MAGIC); - lo->pid = task_pid_nr(current); - ret = device_create_file(disk_to_dev(lo->disk), &pid_attr); + nbd->pid = task_pid_nr(current); + ret = device_create_file(disk_to_dev(nbd->disk), &pid_attr); if (ret) { - dev_err(disk_to_dev(lo->disk), "device_create_file failed!\n"); - lo->pid = 0; + dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n"); + nbd->pid = 0; return ret; } - while ((req = nbd_read_stat(lo)) != NULL) + while ((req = nbd_read_stat(nbd)) != NULL) nbd_end_request(req); - device_remove_file(disk_to_dev(lo->disk), &pid_attr); - lo->pid = 0; + device_remove_file(disk_to_dev(nbd->disk), &pid_attr); + nbd->pid = 0; return 0; } -static void nbd_clear_que(struct nbd_device *lo) +static void nbd_clear_que(struct nbd_device *nbd) { struct request *req; - BUG_ON(lo->magic != LO_MAGIC); + BUG_ON(nbd->magic != NBD_MAGIC); /* - * Because we have set lo->sock to NULL under the tx_lock, all + * Because we have set nbd->sock to NULL under the tx_lock, all * modifications to the list must have completed by now. For * the same reason, the active_req must be NULL. * * As a consequence, we don't need to take the spin lock while * purging the list here. */ - BUG_ON(lo->sock); - BUG_ON(lo->active_req); + BUG_ON(nbd->sock); + BUG_ON(nbd->active_req); - while (!list_empty(&lo->queue_head)) { - req = list_entry(lo->queue_head.next, struct request, + while (!list_empty(&nbd->queue_head)) { + req = list_entry(nbd->queue_head.next, struct request, queuelist); list_del_init(&req->queuelist); req->errors++; @@ -448,7 +448,7 @@ static void nbd_clear_que(struct nbd_device *lo) } -static void nbd_handle_req(struct nbd_device *lo, struct request *req) +static void nbd_handle_req(struct nbd_device *nbd, struct request *req) { if (req->cmd_type != REQ_TYPE_FS) goto error_out; @@ -456,8 +456,8 @@ static void nbd_handle_req(struct nbd_device *lo, struct request *req) nbd_cmd(req) = NBD_CMD_READ; if (rq_data_dir(req) == WRITE) { nbd_cmd(req) = NBD_CMD_WRITE; - if (lo->flags & NBD_READ_ONLY) { - dev_err(disk_to_dev(lo->disk), + if (nbd->flags & NBD_READ_ONLY) { + dev_err(disk_to_dev(nbd->disk), "Write on read-only\n"); goto error_out; } @@ -465,29 +465,29 @@ static void nbd_handle_req(struct nbd_device *lo, struct request *req) req->errors = 0; - mutex_lock(&lo->tx_lock); - if (unlikely(!lo->sock)) { - mutex_unlock(&lo->tx_lock); - dev_err(disk_to_dev(lo->disk), + mutex_lock(&nbd->tx_lock); + if (unlikely(!nbd->sock)) { + mutex_unlock(&nbd->tx_lock); + dev_err(disk_to_dev(nbd->disk), "Attempted send on closed socket\n"); goto error_out; } - lo->active_req = req; + nbd->active_req = req; - if (nbd_send_req(lo, req) != 0) { - dev_err(disk_to_dev(lo->disk), "Request send failed\n"); + if (nbd_send_req(nbd, req) != 0) { + dev_err(disk_to_dev(nbd->disk), "Request send failed\n"); req->errors++; nbd_end_request(req); } else { - spin_lock(&lo->queue_lock); - list_add(&req->queuelist, &lo->queue_head); - spin_unlock(&lo->queue_lock); + spin_lock(&nbd->queue_lock); + list_add(&req->queuelist, &nbd->queue_head); + spin_unlock(&nbd->queue_lock); } - lo->active_req = NULL; - mutex_unlock(&lo->tx_lock); - wake_up_all(&lo->active_wq); + nbd->active_req = NULL; + mutex_unlock(&nbd->tx_lock); + wake_up_all(&nbd->active_wq); return; @@ -498,28 +498,28 @@ error_out: static int nbd_thread(void *data) { - struct nbd_device *lo = data; + struct nbd_device *nbd = data; struct request *req; set_user_nice(current, -20); - while (!kthread_should_stop() || !list_empty(&lo->waiting_queue)) { + while (!kthread_should_stop() || !list_empty(&nbd->waiting_queue)) { /* wait for something to do */ - wait_event_interruptible(lo->waiting_wq, + wait_event_interruptible(nbd->waiting_wq, kthread_should_stop() || - !list_empty(&lo->waiting_queue)); + !list_empty(&nbd->waiting_queue)); /* extract request */ - if (list_empty(&lo->waiting_queue)) + if (list_empty(&nbd->waiting_queue)) continue; - spin_lock_irq(&lo->queue_lock); - req = list_entry(lo->waiting_queue.next, struct request, + spin_lock_irq(&nbd->queue_lock); + req = list_entry(nbd->waiting_queue.next, struct request, queuelist); list_del_init(&req->queuelist); - spin_unlock_irq(&lo->queue_lock); + spin_unlock_irq(&nbd->queue_lock); /* handle request */ - nbd_handle_req(lo, req); + nbd_handle_req(nbd, req); } return 0; } @@ -527,7 +527,7 @@ static int nbd_thread(void *data) /* * We always wait for result of write, for now. It would be nice to make it optional * in future - * if ((rq_data_dir(req) == WRITE) && (lo->flags & NBD_WRITE_NOCHK)) + * if ((rq_data_dir(req) == WRITE) && (nbd->flags & NBD_WRITE_NOCHK)) * { printk( "Warning: Ignoring result!\n"); nbd_end_request( req ); } */ @@ -536,19 +536,19 @@ static void do_nbd_request(struct request_queue *q) struct request *req; while ((req = blk_fetch_request(q)) != NULL) { - struct nbd_device *lo; + struct nbd_device *nbd; spin_unlock_irq(q->queue_lock); dprintk(DBG_BLKDEV, "%s: request %p: dequeued (flags=%x)\n", req->rq_disk->disk_name, req, req->cmd_type); - lo = req->rq_disk->private_data; + nbd = req->rq_disk->private_data; - BUG_ON(lo->magic != LO_MAGIC); + BUG_ON(nbd->magic != NBD_MAGIC); - if (unlikely(!lo->sock)) { - dev_err(disk_to_dev(lo->disk), + if (unlikely(!nbd->sock)) { + dev_err(disk_to_dev(nbd->disk), "Attempted send on closed socket\n"); req->errors++; nbd_end_request(req); @@ -556,11 +556,11 @@ static void do_nbd_request(struct request_queue *q) continue; } - spin_lock_irq(&lo->queue_lock); - list_add_tail(&req->queuelist, &lo->waiting_queue); - spin_unlock_irq(&lo->queue_lock); + spin_lock_irq(&nbd->queue_lock); + list_add_tail(&req->queuelist, &nbd->waiting_queue); + spin_unlock_irq(&nbd->queue_lock); - wake_up(&lo->waiting_wq); + wake_up(&nbd->waiting_wq); spin_lock_irq(q->queue_lock); } @@ -568,32 +568,32 @@ static void do_nbd_request(struct request_queue *q) /* Must be called with tx_lock held */ -static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo, +static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, unsigned int cmd, unsigned long arg) { switch (cmd) { case NBD_DISCONNECT: { struct request sreq; - dev_info(disk_to_dev(lo->disk), "NBD_DISCONNECT\n"); + dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n"); blk_rq_init(NULL, &sreq); sreq.cmd_type = REQ_TYPE_SPECIAL; nbd_cmd(&sreq) = NBD_CMD_DISC; - if (!lo->sock) + if (!nbd->sock) return -EINVAL; - nbd_send_req(lo, &sreq); + nbd_send_req(nbd, &sreq); return 0; } case NBD_CLEAR_SOCK: { struct file *file; - lo->sock = NULL; - file = lo->file; - lo->file = NULL; - nbd_clear_que(lo); - BUG_ON(!list_empty(&lo->queue_head)); + nbd->sock = NULL; + file = nbd->file; + nbd->file = NULL; + nbd_clear_que(nbd); + BUG_ON(!list_empty(&nbd->queue_head)); if (file) fput(file); return 0; @@ -601,14 +601,14 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo, case NBD_SET_SOCK: { struct file *file; - if (lo->file) + if (nbd->file) return -EBUSY; file = fget(arg); if (file) { struct inode *inode = file->f_path.dentry->d_inode; if (S_ISSOCK(inode->i_mode)) { - lo->file = file; - lo->sock = SOCKET_I(inode); + nbd->file = file; + nbd->sock = SOCKET_I(inode); if (max_part > 0) bdev->bd_invalidated = 1; return 0; @@ -620,29 +620,29 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo, } case NBD_SET_BLKSIZE: - lo->blksize = arg; - lo->bytesize &= ~(lo->blksize-1); - bdev->bd_inode->i_size = lo->bytesize; - set_blocksize(bdev, lo->blksize); - set_capacity(lo->disk, lo->bytesize >> 9); + nbd->blksize = arg; + nbd->bytesize &= ~(nbd->blksize-1); + bdev->bd_inode->i_size = nbd->bytesize; + set_blocksize(bdev, nbd->blksize); + set_capacity(nbd->disk, nbd->bytesize >> 9); return 0; case NBD_SET_SIZE: - lo->bytesize = arg & ~(lo->blksize-1); - bdev->bd_inode->i_size = lo->bytesize; - set_blocksize(bdev, lo->blksize); - set_capacity(lo->disk, lo->bytesize >> 9); + nbd->bytesize = arg & ~(nbd->blksize-1); + bdev->bd_inode->i_size = nbd->bytesize; + set_blocksize(bdev, nbd->blksize); + set_capacity(nbd->disk, nbd->bytesize >> 9); return 0; case NBD_SET_TIMEOUT: - lo->xmit_timeout = arg * HZ; + nbd->xmit_timeout = arg * HZ; return 0; case NBD_SET_SIZE_BLOCKS: - lo->bytesize = ((u64) arg) * lo->blksize; - bdev->bd_inode->i_size = lo->bytesize; - set_blocksize(bdev, lo->blksize); - set_capacity(lo->disk, lo->bytesize >> 9); + nbd->bytesize = ((u64) arg) * nbd->blksize; + bdev->bd_inode->i_size = nbd->bytesize; + set_blocksize(bdev, nbd->blksize); + set_capacity(nbd->disk, nbd->bytesize >> 9); return 0; case NBD_DO_IT: { @@ -650,38 +650,38 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo, struct file *file; int error; - if (lo->pid) + if (nbd->pid) return -EBUSY; - if (!lo->file) + if (!nbd->file) return -EINVAL; - mutex_unlock(&lo->tx_lock); + mutex_unlock(&nbd->tx_lock); - thread = kthread_create(nbd_thread, lo, lo->disk->disk_name); + thread = kthread_create(nbd_thread, nbd, nbd->disk->disk_name); if (IS_ERR(thread)) { - mutex_lock(&lo->tx_lock); + mutex_lock(&nbd->tx_lock); return PTR_ERR(thread); } wake_up_process(thread); - error = nbd_do_it(lo); + error = nbd_do_it(nbd); kthread_stop(thread); - mutex_lock(&lo->tx_lock); + mutex_lock(&nbd->tx_lock); if (error) return error; - sock_shutdown(lo, 0); - file = lo->file; - lo->file = NULL; - nbd_clear_que(lo); - dev_warn(disk_to_dev(lo->disk), "queue cleared\n"); + sock_shutdown(nbd, 0); + file = nbd->file; + nbd->file = NULL; + nbd_clear_que(nbd); + dev_warn(disk_to_dev(nbd->disk), "queue cleared\n"); if (file) fput(file); - lo->bytesize = 0; + nbd->bytesize = 0; bdev->bd_inode->i_size = 0; - set_capacity(lo->disk, 0); + set_capacity(nbd->disk, 0); if (max_part > 0) ioctl_by_bdev(bdev, BLKRRPART, 0); - return lo->harderror; + return nbd->harderror; } case NBD_CLEAR_QUE: @@ -689,14 +689,14 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo, * This is for compatibility only. The queue is always cleared * by NBD_DO_IT or NBD_CLEAR_SOCK. */ - BUG_ON(!lo->sock && !list_empty(&lo->queue_head)); + BUG_ON(!nbd->sock && !list_empty(&nbd->queue_head)); return 0; case NBD_PRINT_DEBUG: - dev_info(disk_to_dev(lo->disk), + dev_info(disk_to_dev(nbd->disk), "next = %p, prev = %p, head = %p\n", - lo->queue_head.next, lo->queue_head.prev, - &lo->queue_head); + nbd->queue_head.next, nbd->queue_head.prev, + &nbd->queue_head); return 0; } return -ENOTTY; @@ -705,21 +705,21 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo, static int nbd_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { - struct nbd_device *lo = bdev->bd_disk->private_data; + struct nbd_device *nbd = bdev->bd_disk->private_data; int error; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - BUG_ON(lo->magic != LO_MAGIC); + BUG_ON(nbd->magic != NBD_MAGIC); /* Anyone capable of this syscall can do *real bad* things */ dprintk(DBG_IOCTL, "%s: nbd_ioctl cmd=%s(0x%x) arg=%lu\n", - lo->disk->disk_name, ioctl_cmd_to_ascii(cmd), cmd, arg); + nbd->disk->disk_name, ioctl_cmd_to_ascii(cmd), cmd, arg); - mutex_lock(&lo->tx_lock); - error = __nbd_ioctl(bdev, lo, cmd, arg); - mutex_unlock(&lo->tx_lock); + mutex_lock(&nbd->tx_lock); + error = __nbd_ioctl(bdev, nbd, cmd, arg); + mutex_unlock(&nbd->tx_lock); return error; } @@ -805,7 +805,7 @@ static int __init nbd_init(void) for (i = 0; i < nbds_max; i++) { struct gendisk *disk = nbd_dev[i].disk; nbd_dev[i].file = NULL; - nbd_dev[i].magic = LO_MAGIC; + nbd_dev[i].magic = NBD_MAGIC; nbd_dev[i].flags = 0; INIT_LIST_HEAD(&nbd_dev[i].waiting_queue); spin_lock_init(&nbd_dev[i].queue_lock); diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c index 1f3c1a7d132a..38a2d0631882 100644 --- a/drivers/block/nvme.c +++ b/drivers/block/nvme.c @@ -39,7 +39,6 @@ #include <linux/sched.h> #include <linux/slab.h> #include <linux/types.h> -#include <linux/version.h> #include <asm-generic/io-64-nonatomic-lo-hi.h> diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index d59edeabd93f..ba66e4445f41 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -987,14 +987,14 @@ static void pkt_copy_bio_data(struct bio *src_bio, int seg, int offs, struct pag while (copy_size > 0) { struct bio_vec *src_bvl = bio_iovec_idx(src_bio, seg); - void *vfrom = kmap_atomic(src_bvl->bv_page, KM_USER0) + + void *vfrom = kmap_atomic(src_bvl->bv_page) + src_bvl->bv_offset + offs; void *vto = page_address(dst_page) + dst_offs; int len = min_t(int, copy_size, src_bvl->bv_len - offs); BUG_ON(len < 0); memcpy(vto, vfrom, len); - kunmap_atomic(vfrom, KM_USER0); + kunmap_atomic(vfrom); seg++; offs = 0; @@ -1019,10 +1019,10 @@ static void pkt_make_local_copy(struct packet_data *pkt, struct bio_vec *bvec) offs = 0; for (f = 0; f < pkt->frames; f++) { if (bvec[f].bv_page != pkt->pages[p]) { - void *vfrom = kmap_atomic(bvec[f].bv_page, KM_USER0) + bvec[f].bv_offset; + void *vfrom = kmap_atomic(bvec[f].bv_page) + bvec[f].bv_offset; void *vto = page_address(pkt->pages[p]) + offs; memcpy(vto, vfrom, CD_FRAMESIZE); - kunmap_atomic(vfrom, KM_USER0); + kunmap_atomic(vfrom); bvec[f].bv_page = pkt->pages[p]; bvec[f].bv_offset = offs; } else { diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index a6278e7e61a0..013c7a549fb6 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -41,19 +41,35 @@ #include "rbd_types.h" -#define DRV_NAME "rbd" -#define DRV_NAME_LONG "rbd (rados block device)" +/* + * The basic unit of block I/O is a sector. It is interpreted in a + * number of contexts in Linux (blk, bio, genhd), but the default is + * universally 512 bytes. These symbols are just slightly more + * meaningful than the bare numbers they represent. + */ +#define SECTOR_SHIFT 9 +#define SECTOR_SIZE (1ULL << SECTOR_SHIFT) + +#define RBD_DRV_NAME "rbd" +#define RBD_DRV_NAME_LONG "rbd (rados block device)" #define RBD_MINORS_PER_MAJOR 256 /* max minors per blkdev */ -#define RBD_MAX_MD_NAME_LEN (96 + sizeof(RBD_SUFFIX)) +#define RBD_MAX_MD_NAME_LEN (RBD_MAX_OBJ_NAME_LEN + sizeof(RBD_SUFFIX)) #define RBD_MAX_POOL_NAME_LEN 64 #define RBD_MAX_SNAP_NAME_LEN 32 #define RBD_MAX_OPT_LEN 1024 #define RBD_SNAP_HEAD_NAME "-" +/* + * An RBD device name will be "rbd#", where the "rbd" comes from + * RBD_DRV_NAME above, and # is a unique integer identifier. + * MAX_INT_FORMAT_WIDTH is used in ensuring DEV_NAME_LEN is big + * enough to hold all possible device names. + */ #define DEV_NAME_LEN 32 +#define MAX_INT_FORMAT_WIDTH ((5 * sizeof (int)) / 2 + 1) #define RBD_NOTIFY_TIMEOUT_DEFAULT 10 @@ -66,7 +82,6 @@ struct rbd_image_header { __u8 obj_order; __u8 crypt_type; __u8 comp_type; - struct rw_semaphore snap_rwsem; struct ceph_snap_context *snapc; size_t snap_names_len; u64 snap_seq; @@ -83,7 +98,7 @@ struct rbd_options { }; /* - * an instance of the client. multiple devices may share a client. + * an instance of the client. multiple devices may share an rbd client. */ struct rbd_client { struct ceph_client *client; @@ -92,20 +107,9 @@ struct rbd_client { struct list_head node; }; -struct rbd_req_coll; - /* - * a single io request + * a request completion status */ -struct rbd_request { - struct request *rq; /* blk layer request */ - struct bio *bio; /* cloned bio */ - struct page **pages; /* list of used pages */ - u64 len; - int coll_index; - struct rbd_req_coll *coll; -}; - struct rbd_req_status { int done; int rc; @@ -122,6 +126,18 @@ struct rbd_req_coll { struct rbd_req_status status[0]; }; +/* + * a single io request + */ +struct rbd_request { + struct request *rq; /* blk layer request */ + struct bio *bio; /* cloned bio */ + struct page **pages; /* list of used pages */ + u64 len; + int coll_index; + struct rbd_req_coll *coll; +}; + struct rbd_snap { struct device dev; const char *name; @@ -140,7 +156,6 @@ struct rbd_device { struct gendisk *disk; /* blkdev's gendisk and rq */ struct request_queue *q; - struct ceph_client *client; struct rbd_client *rbd_client; char name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */ @@ -157,6 +172,8 @@ struct rbd_device { struct ceph_osd_event *watch_event; struct ceph_osd_request *watch_request; + /* protects updating the header */ + struct rw_semaphore header_rwsem; char snap_name[RBD_MAX_SNAP_NAME_LEN]; u32 cur_snap; /* index+1 of current snapshot within snap context 0 - for the head */ @@ -171,15 +188,13 @@ struct rbd_device { struct device dev; }; -static struct bus_type rbd_bus_type = { - .name = "rbd", -}; - -static spinlock_t node_lock; /* protects client get/put */ - static DEFINE_MUTEX(ctl_mutex); /* Serialize open/close/setup/teardown */ + static LIST_HEAD(rbd_dev_list); /* devices */ -static LIST_HEAD(rbd_client_list); /* clients */ +static DEFINE_SPINLOCK(rbd_dev_list_lock); + +static LIST_HEAD(rbd_client_list); /* clients */ +static DEFINE_SPINLOCK(rbd_client_list_lock); static int __rbd_init_snaps_header(struct rbd_device *rbd_dev); static void rbd_dev_release(struct device *dev); @@ -190,12 +205,32 @@ static ssize_t rbd_snap_add(struct device *dev, static void __rbd_remove_snap_dev(struct rbd_device *rbd_dev, struct rbd_snap *snap); +static ssize_t rbd_add(struct bus_type *bus, const char *buf, + size_t count); +static ssize_t rbd_remove(struct bus_type *bus, const char *buf, + size_t count); -static struct rbd_device *dev_to_rbd(struct device *dev) +static struct bus_attribute rbd_bus_attrs[] = { + __ATTR(add, S_IWUSR, NULL, rbd_add), + __ATTR(remove, S_IWUSR, NULL, rbd_remove), + __ATTR_NULL +}; + +static struct bus_type rbd_bus_type = { + .name = "rbd", + .bus_attrs = rbd_bus_attrs, +}; + +static void rbd_root_dev_release(struct device *dev) { - return container_of(dev, struct rbd_device, dev); } +static struct device rbd_root_dev = { + .init_name = "rbd", + .release = rbd_root_dev_release, +}; + + static struct device *rbd_get_dev(struct rbd_device *rbd_dev) { return get_device(&rbd_dev->dev); @@ -210,8 +245,7 @@ static int __rbd_update_snaps(struct rbd_device *rbd_dev); static int rbd_open(struct block_device *bdev, fmode_t mode) { - struct gendisk *disk = bdev->bd_disk; - struct rbd_device *rbd_dev = disk->private_data; + struct rbd_device *rbd_dev = bdev->bd_disk->private_data; rbd_get_dev(rbd_dev); @@ -256,9 +290,11 @@ static struct rbd_client *rbd_client_create(struct ceph_options *opt, kref_init(&rbdc->kref); INIT_LIST_HEAD(&rbdc->node); + mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); + rbdc->client = ceph_create_client(opt, rbdc, 0, 0); if (IS_ERR(rbdc->client)) - goto out_rbdc; + goto out_mutex; opt = NULL; /* Now rbdc->client is responsible for opt */ ret = ceph_open_session(rbdc->client); @@ -267,16 +303,19 @@ static struct rbd_client *rbd_client_create(struct ceph_options *opt, rbdc->rbd_opts = rbd_opts; - spin_lock(&node_lock); + spin_lock(&rbd_client_list_lock); list_add_tail(&rbdc->node, &rbd_client_list); - spin_unlock(&node_lock); + spin_unlock(&rbd_client_list_lock); + + mutex_unlock(&ctl_mutex); dout("rbd_client_create created %p\n", rbdc); return rbdc; out_err: ceph_destroy_client(rbdc->client); -out_rbdc: +out_mutex: + mutex_unlock(&ctl_mutex); kfree(rbdc); out_opt: if (opt) @@ -324,7 +363,7 @@ static int parse_rbd_opts_token(char *c, void *private) substring_t argstr[MAX_OPT_ARGS]; int token, intval, ret; - token = match_token((char *)c, rbdopt_tokens, argstr); + token = match_token(c, rbdopt_tokens, argstr); if (token < 0) return -EINVAL; @@ -357,58 +396,54 @@ static int parse_rbd_opts_token(char *c, void *private) * Get a ceph client with specific addr and configuration, if one does * not exist create it. */ -static int rbd_get_client(struct rbd_device *rbd_dev, const char *mon_addr, - char *options) +static struct rbd_client *rbd_get_client(const char *mon_addr, + size_t mon_addr_len, + char *options) { struct rbd_client *rbdc; struct ceph_options *opt; - int ret; struct rbd_options *rbd_opts; rbd_opts = kzalloc(sizeof(*rbd_opts), GFP_KERNEL); if (!rbd_opts) - return -ENOMEM; + return ERR_PTR(-ENOMEM); rbd_opts->notify_timeout = RBD_NOTIFY_TIMEOUT_DEFAULT; - ret = ceph_parse_options(&opt, options, mon_addr, - mon_addr + strlen(mon_addr), parse_rbd_opts_token, rbd_opts); - if (ret < 0) - goto done_err; + opt = ceph_parse_options(options, mon_addr, + mon_addr + mon_addr_len, + parse_rbd_opts_token, rbd_opts); + if (IS_ERR(opt)) { + kfree(rbd_opts); + return ERR_CAST(opt); + } - spin_lock(&node_lock); + spin_lock(&rbd_client_list_lock); rbdc = __rbd_client_find(opt); if (rbdc) { + /* using an existing client */ + kref_get(&rbdc->kref); + spin_unlock(&rbd_client_list_lock); + ceph_destroy_options(opt); kfree(rbd_opts); - /* using an existing client */ - kref_get(&rbdc->kref); - rbd_dev->rbd_client = rbdc; - rbd_dev->client = rbdc->client; - spin_unlock(&node_lock); - return 0; + return rbdc; } - spin_unlock(&node_lock); + spin_unlock(&rbd_client_list_lock); rbdc = rbd_client_create(opt, rbd_opts); - if (IS_ERR(rbdc)) { - ret = PTR_ERR(rbdc); - goto done_err; - } - rbd_dev->rbd_client = rbdc; - rbd_dev->client = rbdc->client; - return 0; -done_err: - kfree(rbd_opts); - return ret; + if (IS_ERR(rbdc)) + kfree(rbd_opts); + + return rbdc; } /* * Destroy ceph client * - * Caller must hold node_lock. + * Caller must hold rbd_client_list_lock. */ static void rbd_client_release(struct kref *kref) { @@ -428,11 +463,10 @@ static void rbd_client_release(struct kref *kref) */ static void rbd_put_client(struct rbd_device *rbd_dev) { - spin_lock(&node_lock); + spin_lock(&rbd_client_list_lock); kref_put(&rbd_dev->rbd_client->kref, rbd_client_release); - spin_unlock(&node_lock); + spin_unlock(&rbd_client_list_lock); rbd_dev->rbd_client = NULL; - rbd_dev->client = NULL; } /* @@ -457,21 +491,19 @@ static int rbd_header_from_disk(struct rbd_image_header *header, gfp_t gfp_flags) { int i; - u32 snap_count = le32_to_cpu(ondisk->snap_count); - int ret = -ENOMEM; + u32 snap_count; - if (memcmp(ondisk, RBD_HEADER_TEXT, sizeof(RBD_HEADER_TEXT))) { + if (memcmp(ondisk, RBD_HEADER_TEXT, sizeof(RBD_HEADER_TEXT))) return -ENXIO; - } - init_rwsem(&header->snap_rwsem); - header->snap_names_len = le64_to_cpu(ondisk->snap_names_len); + snap_count = le32_to_cpu(ondisk->snap_count); header->snapc = kmalloc(sizeof(struct ceph_snap_context) + - snap_count * - sizeof(struct rbd_image_snap_ondisk), + snap_count * sizeof (*ondisk), gfp_flags); if (!header->snapc) return -ENOMEM; + + header->snap_names_len = le64_to_cpu(ondisk->snap_names_len); if (snap_count) { header->snap_names = kmalloc(header->snap_names_len, GFP_KERNEL); @@ -498,8 +530,7 @@ static int rbd_header_from_disk(struct rbd_image_header *header, header->snapc->num_snaps = snap_count; header->total_snaps = snap_count; - if (snap_count && - allocated_snaps == snap_count) { + if (snap_count && allocated_snaps == snap_count) { for (i = 0; i < snap_count; i++) { header->snapc->snaps[i] = le64_to_cpu(ondisk->snaps[i].id); @@ -518,7 +549,7 @@ err_names: kfree(header->snap_names); err_snapc: kfree(header->snapc); - return ret; + return -ENOMEM; } static int snap_index(struct rbd_image_header *header, int snap_num) @@ -542,35 +573,34 @@ static int snap_by_name(struct rbd_image_header *header, const char *snap_name, int i; char *p = header->snap_names; - for (i = 0; i < header->total_snaps; i++, p += strlen(p) + 1) { - if (strcmp(snap_name, p) == 0) - break; - } - if (i == header->total_snaps) - return -ENOENT; - if (seq) - *seq = header->snapc->snaps[i]; + for (i = 0; i < header->total_snaps; i++) { + if (!strcmp(snap_name, p)) { - if (size) - *size = header->snap_sizes[i]; + /* Found it. Pass back its id and/or size */ - return i; + if (seq) + *seq = header->snapc->snaps[i]; + if (size) + *size = header->snap_sizes[i]; + return i; + } + p += strlen(p) + 1; /* Skip ahead to the next name */ + } + return -ENOENT; } -static int rbd_header_set_snap(struct rbd_device *dev, - const char *snap_name, - u64 *size) +static int rbd_header_set_snap(struct rbd_device *dev, u64 *size) { struct rbd_image_header *header = &dev->header; struct ceph_snap_context *snapc = header->snapc; int ret = -ENOENT; - down_write(&header->snap_rwsem); + BUILD_BUG_ON(sizeof (dev->snap_name) < sizeof (RBD_SNAP_HEAD_NAME)); - if (!snap_name || - !*snap_name || - strcmp(snap_name, "-") == 0 || - strcmp(snap_name, RBD_SNAP_HEAD_NAME) == 0) { + down_write(&dev->header_rwsem); + + if (!memcmp(dev->snap_name, RBD_SNAP_HEAD_NAME, + sizeof (RBD_SNAP_HEAD_NAME))) { if (header->total_snaps) snapc->seq = header->snap_seq; else @@ -580,7 +610,7 @@ static int rbd_header_set_snap(struct rbd_device *dev, if (size) *size = header->image_size; } else { - ret = snap_by_name(header, snap_name, &snapc->seq, size); + ret = snap_by_name(header, dev->snap_name, &snapc->seq, size); if (ret < 0) goto done; @@ -590,7 +620,7 @@ static int rbd_header_set_snap(struct rbd_device *dev, ret = 0; done: - up_write(&header->snap_rwsem); + up_write(&dev->header_rwsem); return ret; } @@ -717,7 +747,7 @@ static struct bio *bio_chain_clone(struct bio **old, struct bio **next, /* split the bio. We'll release it either in the next call, or it will have to be released outside */ - bp = bio_split(old_chain, (len - total) / 512ULL); + bp = bio_split(old_chain, (len - total) / SECTOR_SIZE); if (!bp) goto err_out; @@ -857,7 +887,7 @@ static int rbd_do_request(struct request *rq, struct timespec mtime = CURRENT_TIME; struct rbd_request *req_data; struct ceph_osd_request_head *reqhead; - struct rbd_image_header *header = &dev->header; + struct ceph_osd_client *osdc; req_data = kzalloc(sizeof(*req_data), GFP_NOIO); if (!req_data) { @@ -874,15 +904,13 @@ static int rbd_do_request(struct request *rq, dout("rbd_do_request obj=%s ofs=%lld len=%lld\n", obj, len, ofs); - down_read(&header->snap_rwsem); + down_read(&dev->header_rwsem); - req = ceph_osdc_alloc_request(&dev->client->osdc, flags, - snapc, - ops, - false, - GFP_NOIO, pages, bio); + osdc = &dev->rbd_client->client->osdc; + req = ceph_osdc_alloc_request(osdc, flags, snapc, ops, + false, GFP_NOIO, pages, bio); if (!req) { - up_read(&header->snap_rwsem); + up_read(&dev->header_rwsem); ret = -ENOMEM; goto done_pages; } @@ -909,27 +937,27 @@ static int rbd_do_request(struct request *rq, layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); layout->fl_pg_preferred = cpu_to_le32(-1); layout->fl_pg_pool = cpu_to_le32(dev->poolid); - ceph_calc_raw_layout(&dev->client->osdc, layout, snapid, - ofs, &len, &bno, req, ops); + ceph_calc_raw_layout(osdc, layout, snapid, ofs, &len, &bno, + req, ops); ceph_osdc_build_request(req, ofs, &len, ops, snapc, &mtime, req->r_oid, req->r_oid_len); - up_read(&header->snap_rwsem); + up_read(&dev->header_rwsem); if (linger_req) { - ceph_osdc_set_request_linger(&dev->client->osdc, req); + ceph_osdc_set_request_linger(osdc, req); *linger_req = req; } - ret = ceph_osdc_start_request(&dev->client->osdc, req, false); + ret = ceph_osdc_start_request(osdc, req, false); if (ret < 0) goto done_err; if (!rbd_cb) { - ret = ceph_osdc_wait_request(&dev->client->osdc, req); + ret = ceph_osdc_wait_request(osdc, req); if (ver) *ver = le64_to_cpu(req->r_reassert_version.version); dout("reassert_ver=%lld\n", @@ -1213,8 +1241,8 @@ static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data) rc = __rbd_update_snaps(dev); mutex_unlock(&ctl_mutex); if (rc) - pr_warning(DRV_NAME "%d got notification but failed to update" - " snaps: %d\n", dev->major, rc); + pr_warning(RBD_DRV_NAME "%d got notification but failed to " + " update snaps: %d\n", dev->major, rc); rbd_req_sync_notify_ack(dev, ver, notify_id, dev->obj_md_name); } @@ -1227,7 +1255,7 @@ static int rbd_req_sync_watch(struct rbd_device *dev, u64 ver) { struct ceph_osd_req_op *ops; - struct ceph_osd_client *osdc = &dev->client->osdc; + struct ceph_osd_client *osdc = &dev->rbd_client->client->osdc; int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_WATCH, 0); if (ret < 0) @@ -1314,7 +1342,7 @@ static int rbd_req_sync_notify(struct rbd_device *dev, const char *obj) { struct ceph_osd_req_op *ops; - struct ceph_osd_client *osdc = &dev->client->osdc; + struct ceph_osd_client *osdc = &dev->rbd_client->client->osdc; struct ceph_osd_event *event; struct rbd_notify_info info; int payload_len = sizeof(u32) + sizeof(u32); @@ -1421,9 +1449,7 @@ static void rbd_rq_fn(struct request_queue *q) struct request *rq; struct bio_pair *bp = NULL; - rq = blk_fetch_request(q); - - while (1) { + while ((rq = blk_fetch_request(q))) { struct bio *bio; struct bio *rq_bio, *next_bio = NULL; bool do_write; @@ -1441,32 +1467,32 @@ static void rbd_rq_fn(struct request_queue *q) /* filter out block requests we don't understand */ if ((rq->cmd_type != REQ_TYPE_FS)) { __blk_end_request_all(rq, 0); - goto next; + continue; } /* deduce our operation (read, write) */ do_write = (rq_data_dir(rq) == WRITE); size = blk_rq_bytes(rq); - ofs = blk_rq_pos(rq) * 512ULL; + ofs = blk_rq_pos(rq) * SECTOR_SIZE; rq_bio = rq->bio; if (do_write && rbd_dev->read_only) { __blk_end_request_all(rq, -EROFS); - goto next; + continue; } spin_unlock_irq(q->queue_lock); dout("%s 0x%x bytes at 0x%llx\n", do_write ? "write" : "read", - size, blk_rq_pos(rq) * 512ULL); + size, blk_rq_pos(rq) * SECTOR_SIZE); num_segs = rbd_get_num_segments(&rbd_dev->header, ofs, size); coll = rbd_alloc_coll(num_segs); if (!coll) { spin_lock_irq(q->queue_lock); __blk_end_request_all(rq, -ENOMEM); - goto next; + continue; } do { @@ -1512,8 +1538,6 @@ next_seg: if (bp) bio_pair_release(bp); spin_lock_irq(q->queue_lock); -next: - rq = blk_fetch_request(q); } } @@ -1526,13 +1550,17 @@ static int rbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bmd, struct bio_vec *bvec) { struct rbd_device *rbd_dev = q->queuedata; - unsigned int chunk_sectors = 1 << (rbd_dev->header.obj_order - 9); - sector_t sector = bmd->bi_sector + get_start_sect(bmd->bi_bdev); - unsigned int bio_sectors = bmd->bi_size >> 9; + unsigned int chunk_sectors; + sector_t sector; + unsigned int bio_sectors; int max; + chunk_sectors = 1 << (rbd_dev->header.obj_order - SECTOR_SHIFT); + sector = bmd->bi_sector + get_start_sect(bmd->bi_bdev); + bio_sectors = bmd->bi_size >> SECTOR_SHIFT; + max = (chunk_sectors - ((sector & (chunk_sectors - 1)) - + bio_sectors)) << 9; + + bio_sectors)) << SECTOR_SHIFT; if (max < 0) max = 0; /* bio_add cannot handle a negative return */ if (max <= bvec->bv_len && bio_sectors == 0) @@ -1565,15 +1593,16 @@ static int rbd_read_header(struct rbd_device *rbd_dev, ssize_t rc; struct rbd_image_header_ondisk *dh; int snap_count = 0; - u64 snap_names_len = 0; u64 ver; + size_t len; + /* + * First reads the fixed-size header to determine the number + * of snapshots, then re-reads it, along with all snapshot + * records as well as their stored names. + */ + len = sizeof (*dh); while (1) { - int len = sizeof(*dh) + - snap_count * sizeof(struct rbd_image_snap_ondisk) + - snap_names_len; - - rc = -ENOMEM; dh = kmalloc(len, GFP_KERNEL); if (!dh) return -ENOMEM; @@ -1588,21 +1617,22 @@ static int rbd_read_header(struct rbd_device *rbd_dev, rc = rbd_header_from_disk(header, dh, snap_count, GFP_KERNEL); if (rc < 0) { - if (rc == -ENXIO) { + if (rc == -ENXIO) pr_warning("unrecognized header format" " for image %s", rbd_dev->obj); - } goto out_dh; } - if (snap_count != header->total_snaps) { - snap_count = header->total_snaps; - snap_names_len = header->snap_names_len; - rbd_header_free(header); - kfree(dh); - continue; - } - break; + if (snap_count == header->total_snaps) + break; + + snap_count = header->total_snaps; + len = sizeof (*dh) + + snap_count * sizeof(struct rbd_image_snap_ondisk) + + header->snap_names_len; + + rbd_header_free(header); + kfree(dh); } header->obj_version = ver; @@ -1623,13 +1653,14 @@ static int rbd_header_add_snap(struct rbd_device *dev, int ret; void *data, *p, *e; u64 ver; + struct ceph_mon_client *monc; /* we should create a snapshot only if we're pointing at the head */ if (dev->cur_snap) return -EINVAL; - ret = ceph_monc_create_snapid(&dev->client->monc, dev->poolid, - &new_snapid); + monc = &dev->rbd_client->client->monc; + ret = ceph_monc_create_snapid(monc, dev->poolid, &new_snapid); dout("created snapid=%lld\n", new_snapid); if (ret < 0) return ret; @@ -1684,9 +1715,9 @@ static int __rbd_update_snaps(struct rbd_device *rbd_dev) return ret; /* resized? */ - set_capacity(rbd_dev->disk, h.image_size / 512ULL); + set_capacity(rbd_dev->disk, h.image_size / SECTOR_SIZE); - down_write(&rbd_dev->header.snap_rwsem); + down_write(&rbd_dev->header_rwsem); snap_seq = rbd_dev->header.snapc->seq; if (rbd_dev->header.total_snaps && @@ -1711,7 +1742,7 @@ static int __rbd_update_snaps(struct rbd_device *rbd_dev) ret = __rbd_init_snaps_header(rbd_dev); - up_write(&rbd_dev->header.snap_rwsem); + up_write(&rbd_dev->header_rwsem); return ret; } @@ -1721,6 +1752,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) struct gendisk *disk; struct request_queue *q; int rc; + u64 segment_size; u64 total_size = 0; /* contact OSD, request size info about the object being mapped */ @@ -1733,7 +1765,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) if (rc) return rc; - rc = rbd_header_set_snap(rbd_dev, rbd_dev->snap_name, &total_size); + rc = rbd_header_set_snap(rbd_dev, &total_size); if (rc) return rc; @@ -1743,7 +1775,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) if (!disk) goto out; - snprintf(disk->disk_name, sizeof(disk->disk_name), DRV_NAME "%d", + snprintf(disk->disk_name, sizeof(disk->disk_name), RBD_DRV_NAME "%d", rbd_dev->id); disk->major = rbd_dev->major; disk->first_minor = 0; @@ -1756,11 +1788,15 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) if (!q) goto out_disk; + /* We use the default size, but let's be explicit about it. */ + blk_queue_physical_block_size(q, SECTOR_SIZE); + /* set io sizes to object size */ - blk_queue_max_hw_sectors(q, rbd_obj_bytes(&rbd_dev->header) / 512ULL); - blk_queue_max_segment_size(q, rbd_obj_bytes(&rbd_dev->header)); - blk_queue_io_min(q, rbd_obj_bytes(&rbd_dev->header)); - blk_queue_io_opt(q, rbd_obj_bytes(&rbd_dev->header)); + segment_size = rbd_obj_bytes(&rbd_dev->header); + blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE); + blk_queue_max_segment_size(q, segment_size); + blk_queue_io_min(q, segment_size); + blk_queue_io_opt(q, segment_size); blk_queue_merge_bvec(q, rbd_merge_bvec); disk->queue = q; @@ -1771,7 +1807,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) rbd_dev->q = q; /* finally, announce the disk to the world */ - set_capacity(disk, total_size / 512ULL); + set_capacity(disk, total_size / SECTOR_SIZE); add_disk(disk); pr_info("%s: added with size 0x%llx\n", @@ -1788,10 +1824,15 @@ out: sysfs */ +static struct rbd_device *dev_to_rbd_dev(struct device *dev) +{ + return container_of(dev, struct rbd_device, dev); +} + static ssize_t rbd_size_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct rbd_device *rbd_dev = dev_to_rbd(dev); + struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); return sprintf(buf, "%llu\n", (unsigned long long)rbd_dev->header.image_size); } @@ -1799,7 +1840,7 @@ static ssize_t rbd_size_show(struct device *dev, static ssize_t rbd_major_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct rbd_device *rbd_dev = dev_to_rbd(dev); + struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); return sprintf(buf, "%d\n", rbd_dev->major); } @@ -1807,15 +1848,16 @@ static ssize_t rbd_major_show(struct device *dev, static ssize_t rbd_client_id_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct rbd_device *rbd_dev = dev_to_rbd(dev); + struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); - return sprintf(buf, "client%lld\n", ceph_client_id(rbd_dev->client)); + return sprintf(buf, "client%lld\n", + ceph_client_id(rbd_dev->rbd_client->client)); } static ssize_t rbd_pool_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct rbd_device *rbd_dev = dev_to_rbd(dev); + struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); return sprintf(buf, "%s\n", rbd_dev->pool_name); } @@ -1823,7 +1865,7 @@ static ssize_t rbd_pool_show(struct device *dev, static ssize_t rbd_name_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct rbd_device *rbd_dev = dev_to_rbd(dev); + struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); return sprintf(buf, "%s\n", rbd_dev->obj); } @@ -1832,7 +1874,7 @@ static ssize_t rbd_snap_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct rbd_device *rbd_dev = dev_to_rbd(dev); + struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); return sprintf(buf, "%s\n", rbd_dev->snap_name); } @@ -1842,7 +1884,7 @@ static ssize_t rbd_image_refresh(struct device *dev, const char *buf, size_t size) { - struct rbd_device *rbd_dev = dev_to_rbd(dev); + struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); int rc; int ret = size; @@ -1907,7 +1949,7 @@ static ssize_t rbd_snap_size_show(struct device *dev, { struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev); - return sprintf(buf, "%lld\n", (long long)snap->size); + return sprintf(buf, "%zd\n", snap->size); } static ssize_t rbd_snap_id_show(struct device *dev, @@ -1916,7 +1958,7 @@ static ssize_t rbd_snap_id_show(struct device *dev, { struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev); - return sprintf(buf, "%lld\n", (long long)snap->id); + return sprintf(buf, "%llu\n", (unsigned long long) snap->id); } static DEVICE_ATTR(snap_size, S_IRUGO, rbd_snap_size_show, NULL); @@ -2088,19 +2130,9 @@ static int __rbd_init_snaps_header(struct rbd_device *rbd_dev) return 0; } - -static void rbd_root_dev_release(struct device *dev) -{ -} - -static struct device rbd_root_dev = { - .init_name = "rbd", - .release = rbd_root_dev_release, -}; - static int rbd_bus_add_dev(struct rbd_device *rbd_dev) { - int ret = -ENOMEM; + int ret; struct device *dev; struct rbd_snap *snap; @@ -2114,7 +2146,7 @@ static int rbd_bus_add_dev(struct rbd_device *rbd_dev) dev_set_name(dev, "%d", rbd_dev->id); ret = device_register(dev); if (ret < 0) - goto done_free; + goto out; list_for_each_entry(snap, &rbd_dev->snaps, node) { ret = rbd_register_snap_dev(rbd_dev, snap, @@ -2122,10 +2154,7 @@ static int rbd_bus_add_dev(struct rbd_device *rbd_dev) if (ret < 0) break; } - - mutex_unlock(&ctl_mutex); - return 0; -done_free: +out: mutex_unlock(&ctl_mutex); return ret; } @@ -2154,104 +2183,250 @@ static int rbd_init_watch_dev(struct rbd_device *rbd_dev) return ret; } +static atomic64_t rbd_id_max = ATOMIC64_INIT(0); + +/* + * Get a unique rbd identifier for the given new rbd_dev, and add + * the rbd_dev to the global list. The minimum rbd id is 1. + */ +static void rbd_id_get(struct rbd_device *rbd_dev) +{ + rbd_dev->id = atomic64_inc_return(&rbd_id_max); + + spin_lock(&rbd_dev_list_lock); + list_add_tail(&rbd_dev->node, &rbd_dev_list); + spin_unlock(&rbd_dev_list_lock); +} + +/* + * Remove an rbd_dev from the global list, and record that its + * identifier is no longer in use. + */ +static void rbd_id_put(struct rbd_device *rbd_dev) +{ + struct list_head *tmp; + int rbd_id = rbd_dev->id; + int max_id; + + BUG_ON(rbd_id < 1); + + spin_lock(&rbd_dev_list_lock); + list_del_init(&rbd_dev->node); + + /* + * If the id being "put" is not the current maximum, there + * is nothing special we need to do. + */ + if (rbd_id != atomic64_read(&rbd_id_max)) { + spin_unlock(&rbd_dev_list_lock); + return; + } + + /* + * We need to update the current maximum id. Search the + * list to find out what it is. We're more likely to find + * the maximum at the end, so search the list backward. + */ + max_id = 0; + list_for_each_prev(tmp, &rbd_dev_list) { + struct rbd_device *rbd_dev; + + rbd_dev = list_entry(tmp, struct rbd_device, node); + if (rbd_id > max_id) + max_id = rbd_id; + } + spin_unlock(&rbd_dev_list_lock); + + /* + * The max id could have been updated by rbd_id_get(), in + * which case it now accurately reflects the new maximum. + * Be careful not to overwrite the maximum value in that + * case. + */ + atomic64_cmpxchg(&rbd_id_max, rbd_id, max_id); +} + +/* + * Skips over white space at *buf, and updates *buf to point to the + * first found non-space character (if any). Returns the length of + * the token (string of non-white space characters) found. Note + * that *buf must be terminated with '\0'. + */ +static inline size_t next_token(const char **buf) +{ + /* + * These are the characters that produce nonzero for + * isspace() in the "C" and "POSIX" locales. + */ + const char *spaces = " \f\n\r\t\v"; + + *buf += strspn(*buf, spaces); /* Find start of token */ + + return strcspn(*buf, spaces); /* Return token length */ +} + +/* + * Finds the next token in *buf, and if the provided token buffer is + * big enough, copies the found token into it. The result, if + * copied, is guaranteed to be terminated with '\0'. Note that *buf + * must be terminated with '\0' on entry. + * + * Returns the length of the token found (not including the '\0'). + * Return value will be 0 if no token is found, and it will be >= + * token_size if the token would not fit. + * + * The *buf pointer will be updated to point beyond the end of the + * found token. Note that this occurs even if the token buffer is + * too small to hold it. + */ +static inline size_t copy_token(const char **buf, + char *token, + size_t token_size) +{ + size_t len; + + len = next_token(buf); + if (len < token_size) { + memcpy(token, *buf, len); + *(token + len) = '\0'; + } + *buf += len; + + return len; +} + +/* + * This fills in the pool_name, obj, obj_len, snap_name, obj_len, + * rbd_dev, rbd_md_name, and name fields of the given rbd_dev, based + * on the list of monitor addresses and other options provided via + * /sys/bus/rbd/add. + */ +static int rbd_add_parse_args(struct rbd_device *rbd_dev, + const char *buf, + const char **mon_addrs, + size_t *mon_addrs_size, + char *options, + size_t options_size) +{ + size_t len; + + /* The first four tokens are required */ + + len = next_token(&buf); + if (!len) + return -EINVAL; + *mon_addrs_size = len + 1; + *mon_addrs = buf; + + buf += len; + + len = copy_token(&buf, options, options_size); + if (!len || len >= options_size) + return -EINVAL; + + len = copy_token(&buf, rbd_dev->pool_name, sizeof (rbd_dev->pool_name)); + if (!len || len >= sizeof (rbd_dev->pool_name)) + return -EINVAL; + + len = copy_token(&buf, rbd_dev->obj, sizeof (rbd_dev->obj)); + if (!len || len >= sizeof (rbd_dev->obj)) + return -EINVAL; + + /* We have the object length in hand, save it. */ + + rbd_dev->obj_len = len; + + BUILD_BUG_ON(RBD_MAX_MD_NAME_LEN + < RBD_MAX_OBJ_NAME_LEN + sizeof (RBD_SUFFIX)); + sprintf(rbd_dev->obj_md_name, "%s%s", rbd_dev->obj, RBD_SUFFIX); + + /* + * The snapshot name is optional, but it's an error if it's + * too long. If no snapshot is supplied, fill in the default. + */ + len = copy_token(&buf, rbd_dev->snap_name, sizeof (rbd_dev->snap_name)); + if (!len) + memcpy(rbd_dev->snap_name, RBD_SNAP_HEAD_NAME, + sizeof (RBD_SNAP_HEAD_NAME)); + else if (len >= sizeof (rbd_dev->snap_name)) + return -EINVAL; + + return 0; +} + static ssize_t rbd_add(struct bus_type *bus, const char *buf, size_t count) { - struct ceph_osd_client *osdc; struct rbd_device *rbd_dev; - ssize_t rc = -ENOMEM; - int irc, new_id = 0; - struct list_head *tmp; - char *mon_dev_name; - char *options; + const char *mon_addrs = NULL; + size_t mon_addrs_size = 0; + char *options = NULL; + struct ceph_osd_client *osdc; + int rc = -ENOMEM; if (!try_module_get(THIS_MODULE)) return -ENODEV; - mon_dev_name = kmalloc(RBD_MAX_OPT_LEN, GFP_KERNEL); - if (!mon_dev_name) - goto err_out_mod; - - options = kmalloc(RBD_MAX_OPT_LEN, GFP_KERNEL); - if (!options) - goto err_mon_dev; - - /* new rbd_device object */ rbd_dev = kzalloc(sizeof(*rbd_dev), GFP_KERNEL); if (!rbd_dev) - goto err_out_opt; + goto err_nomem; + options = kmalloc(count, GFP_KERNEL); + if (!options) + goto err_nomem; /* static rbd_device initialization */ spin_lock_init(&rbd_dev->lock); INIT_LIST_HEAD(&rbd_dev->node); INIT_LIST_HEAD(&rbd_dev->snaps); + init_rwsem(&rbd_dev->header_rwsem); - init_rwsem(&rbd_dev->header.snap_rwsem); + init_rwsem(&rbd_dev->header_rwsem); /* generate unique id: find highest unique id, add one */ - mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); - - list_for_each(tmp, &rbd_dev_list) { - struct rbd_device *rbd_dev; + rbd_id_get(rbd_dev); - rbd_dev = list_entry(tmp, struct rbd_device, node); - if (rbd_dev->id >= new_id) - new_id = rbd_dev->id + 1; - } - - rbd_dev->id = new_id; - - /* add to global list */ - list_add_tail(&rbd_dev->node, &rbd_dev_list); + /* Fill in the device name, now that we have its id. */ + BUILD_BUG_ON(DEV_NAME_LEN + < sizeof (RBD_DRV_NAME) + MAX_INT_FORMAT_WIDTH); + sprintf(rbd_dev->name, "%s%d", RBD_DRV_NAME, rbd_dev->id); /* parse add command */ - if (sscanf(buf, "%" __stringify(RBD_MAX_OPT_LEN) "s " - "%" __stringify(RBD_MAX_OPT_LEN) "s " - "%" __stringify(RBD_MAX_POOL_NAME_LEN) "s " - "%" __stringify(RBD_MAX_OBJ_NAME_LEN) "s" - "%" __stringify(RBD_MAX_SNAP_NAME_LEN) "s", - mon_dev_name, options, rbd_dev->pool_name, - rbd_dev->obj, rbd_dev->snap_name) < 4) { - rc = -EINVAL; - goto err_out_slot; - } - - if (rbd_dev->snap_name[0] == 0) - rbd_dev->snap_name[0] = '-'; - - rbd_dev->obj_len = strlen(rbd_dev->obj); - snprintf(rbd_dev->obj_md_name, sizeof(rbd_dev->obj_md_name), "%s%s", - rbd_dev->obj, RBD_SUFFIX); - - /* initialize rest of new object */ - snprintf(rbd_dev->name, DEV_NAME_LEN, DRV_NAME "%d", rbd_dev->id); - rc = rbd_get_client(rbd_dev, mon_dev_name, options); - if (rc < 0) - goto err_out_slot; + rc = rbd_add_parse_args(rbd_dev, buf, &mon_addrs, &mon_addrs_size, + options, count); + if (rc) + goto err_put_id; - mutex_unlock(&ctl_mutex); + rbd_dev->rbd_client = rbd_get_client(mon_addrs, mon_addrs_size - 1, + options); + if (IS_ERR(rbd_dev->rbd_client)) { + rc = PTR_ERR(rbd_dev->rbd_client); + goto err_put_id; + } /* pick the pool */ - osdc = &rbd_dev->client->osdc; + osdc = &rbd_dev->rbd_client->client->osdc; rc = ceph_pg_poolid_by_name(osdc->osdmap, rbd_dev->pool_name); if (rc < 0) goto err_out_client; rbd_dev->poolid = rc; /* register our block device */ - irc = register_blkdev(0, rbd_dev->name); - if (irc < 0) { - rc = irc; + rc = register_blkdev(0, rbd_dev->name); + if (rc < 0) goto err_out_client; - } - rbd_dev->major = irc; + rbd_dev->major = rc; rc = rbd_bus_add_dev(rbd_dev); if (rc) goto err_out_blkdev; - /* set up and announce blkdev mapping */ + /* + * At this point cleanup in the event of an error is the job + * of the sysfs code (initiated by rbd_bus_del_dev()). + * + * Set up and announce blkdev mapping. + */ rc = rbd_init_disk(rbd_dev); if (rc) goto err_out_bus; @@ -2263,35 +2438,26 @@ static ssize_t rbd_add(struct bus_type *bus, return count; err_out_bus: - mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); - list_del_init(&rbd_dev->node); - mutex_unlock(&ctl_mutex); - /* this will also clean up rest of rbd_dev stuff */ rbd_bus_del_dev(rbd_dev); kfree(options); - kfree(mon_dev_name); return rc; err_out_blkdev: unregister_blkdev(rbd_dev->major, rbd_dev->name); err_out_client: rbd_put_client(rbd_dev); - mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); -err_out_slot: - list_del_init(&rbd_dev->node); - mutex_unlock(&ctl_mutex); - - kfree(rbd_dev); -err_out_opt: +err_put_id: + rbd_id_put(rbd_dev); +err_nomem: kfree(options); -err_mon_dev: - kfree(mon_dev_name); -err_out_mod: + kfree(rbd_dev); + dout("Error adding device %s\n", buf); module_put(THIS_MODULE); - return rc; + + return (ssize_t) rc; } static struct rbd_device *__rbd_get_dev(unsigned long id) @@ -2299,22 +2465,28 @@ static struct rbd_device *__rbd_get_dev(unsigned long id) struct list_head *tmp; struct rbd_device *rbd_dev; + spin_lock(&rbd_dev_list_lock); list_for_each(tmp, &rbd_dev_list) { rbd_dev = list_entry(tmp, struct rbd_device, node); - if (rbd_dev->id == id) + if (rbd_dev->id == id) { + spin_unlock(&rbd_dev_list_lock); return rbd_dev; + } } + spin_unlock(&rbd_dev_list_lock); return NULL; } static void rbd_dev_release(struct device *dev) { - struct rbd_device *rbd_dev = - container_of(dev, struct rbd_device, dev); + struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); - if (rbd_dev->watch_request) - ceph_osdc_unregister_linger_request(&rbd_dev->client->osdc, + if (rbd_dev->watch_request) { + struct ceph_client *client = rbd_dev->rbd_client->client; + + ceph_osdc_unregister_linger_request(&client->osdc, rbd_dev->watch_request); + } if (rbd_dev->watch_event) rbd_req_sync_unwatch(rbd_dev, rbd_dev->obj_md_name); @@ -2323,6 +2495,9 @@ static void rbd_dev_release(struct device *dev) /* clean up and free blkdev */ rbd_free_disk(rbd_dev); unregister_blkdev(rbd_dev->major, rbd_dev->name); + + /* done with the id, and with the rbd_dev */ + rbd_id_put(rbd_dev); kfree(rbd_dev); /* release module ref */ @@ -2355,8 +2530,6 @@ static ssize_t rbd_remove(struct bus_type *bus, goto done; } - list_del_init(&rbd_dev->node); - __rbd_remove_all_snaps(rbd_dev); rbd_bus_del_dev(rbd_dev); @@ -2370,7 +2543,7 @@ static ssize_t rbd_snap_add(struct device *dev, const char *buf, size_t count) { - struct rbd_device *rbd_dev = dev_to_rbd(dev); + struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); int ret; char *name = kmalloc(count + 1, GFP_KERNEL); if (!name) @@ -2406,12 +2579,6 @@ err_unlock: return ret; } -static struct bus_attribute rbd_bus_attrs[] = { - __ATTR(add, S_IWUSR, NULL, rbd_add), - __ATTR(remove, S_IWUSR, NULL, rbd_remove), - __ATTR_NULL -}; - /* * create control files in sysfs * /sys/bus/rbd/... @@ -2420,21 +2587,21 @@ static int rbd_sysfs_init(void) { int ret; - rbd_bus_type.bus_attrs = rbd_bus_attrs; - - ret = bus_register(&rbd_bus_type); - if (ret < 0) + ret = device_register(&rbd_root_dev); + if (ret < 0) return ret; - ret = device_register(&rbd_root_dev); + ret = bus_register(&rbd_bus_type); + if (ret < 0) + device_unregister(&rbd_root_dev); return ret; } static void rbd_sysfs_cleanup(void) { - device_unregister(&rbd_root_dev); bus_unregister(&rbd_bus_type); + device_unregister(&rbd_root_dev); } int __init rbd_init(void) @@ -2444,8 +2611,7 @@ int __init rbd_init(void) rc = rbd_sysfs_init(); if (rc) return rc; - spin_lock_init(&node_lock); - pr_info("loaded " DRV_NAME_LONG "\n"); + pr_info("loaded " RBD_DRV_NAME_LONG "\n"); return 0; } diff --git a/drivers/block/rbd_types.h b/drivers/block/rbd_types.h index fc6c678aa2cb..950708688f17 100644 --- a/drivers/block/rbd_types.h +++ b/drivers/block/rbd_types.h @@ -41,10 +41,6 @@ #define RBD_HEADER_SIGNATURE "RBD" #define RBD_HEADER_VERSION "001.005" -struct rbd_info { - __le64 max_id; -} __attribute__ ((packed)); - struct rbd_image_snap_ondisk { __le64 id; __le64 image_size; diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c index 48e8fee9f2d4..9dcf76a10bb6 100644 --- a/drivers/block/sunvdc.c +++ b/drivers/block/sunvdc.c @@ -839,10 +839,7 @@ static struct vio_driver vdc_port_driver = { .id_table = vdc_port_match, .probe = vdc_port_probe, .remove = vdc_port_remove, - .driver = { - .name = "vdc_port", - .owner = THIS_MODULE, - } + .name = "vdc_port", }; static int __init vdc_init(void) diff --git a/drivers/block/viodasd.c b/drivers/block/viodasd.c deleted file mode 100644 index 9a5b2a2d616d..000000000000 --- a/drivers/block/viodasd.c +++ /dev/null @@ -1,809 +0,0 @@ -/* -*- linux-c -*- - * viodasd.c - * Authors: Dave Boutcher <boutcher@us.ibm.com> - * Ryan Arnold <ryanarn@us.ibm.com> - * Colin Devilbiss <devilbis@us.ibm.com> - * Stephen Rothwell - * - * (C) Copyright 2000-2004 IBM Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of the - * License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * This routine provides access to disk space (termed "DASD" in historical - * IBM terms) owned and managed by an OS/400 partition running on the - * same box as this Linux partition. - * - * All disk operations are performed by sending messages back and forth to - * the OS/400 partition. - */ - -#define pr_fmt(fmt) "viod: " fmt - -#include <linux/major.h> -#include <linux/fs.h> -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/blkdev.h> -#include <linux/genhd.h> -#include <linux/hdreg.h> -#include <linux/errno.h> -#include <linux/init.h> -#include <linux/string.h> -#include <linux/mutex.h> -#include <linux/dma-mapping.h> -#include <linux/completion.h> -#include <linux/device.h> -#include <linux/scatterlist.h> - -#include <asm/uaccess.h> -#include <asm/vio.h> -#include <asm/iseries/hv_types.h> -#include <asm/iseries/hv_lp_event.h> -#include <asm/iseries/hv_lp_config.h> -#include <asm/iseries/vio.h> -#include <asm/firmware.h> - -MODULE_DESCRIPTION("iSeries Virtual DASD"); -MODULE_AUTHOR("Dave Boutcher"); -MODULE_LICENSE("GPL"); - -/* - * We only support 7 partitions per physical disk....so with minor - * numbers 0-255 we get a maximum of 32 disks. - */ -#define VIOD_GENHD_NAME "iseries/vd" - -#define VIOD_VERS "1.64" - -enum { - PARTITION_SHIFT = 3, - MAX_DISKNO = HVMAXARCHITECTEDVIRTUALDISKS, - MAX_DISK_NAME = FIELD_SIZEOF(struct gendisk, disk_name) -}; - -static DEFINE_MUTEX(viodasd_mutex); -static DEFINE_SPINLOCK(viodasd_spinlock); - -#define VIOMAXREQ 16 - -#define DEVICE_NO(cell) ((struct viodasd_device *)(cell) - &viodasd_devices[0]) - -struct viodasd_waitevent { - struct completion com; - int rc; - u16 sub_result; - int max_disk; /* open */ -}; - -static const struct vio_error_entry viodasd_err_table[] = { - { 0x0201, EINVAL, "Invalid Range" }, - { 0x0202, EINVAL, "Invalid Token" }, - { 0x0203, EIO, "DMA Error" }, - { 0x0204, EIO, "Use Error" }, - { 0x0205, EIO, "Release Error" }, - { 0x0206, EINVAL, "Invalid Disk" }, - { 0x0207, EBUSY, "Can't Lock" }, - { 0x0208, EIO, "Already Locked" }, - { 0x0209, EIO, "Already Unlocked" }, - { 0x020A, EIO, "Invalid Arg" }, - { 0x020B, EIO, "Bad IFS File" }, - { 0x020C, EROFS, "Read Only Device" }, - { 0x02FF, EIO, "Internal Error" }, - { 0x0000, 0, NULL }, -}; - -/* - * Figure out the biggest I/O request (in sectors) we can accept - */ -#define VIODASD_MAXSECTORS (4096 / 512 * VIOMAXBLOCKDMA) - -/* - * Number of disk I/O requests we've sent to OS/400 - */ -static int num_req_outstanding; - -/* - * This is our internal structure for keeping track of disk devices - */ -struct viodasd_device { - u16 cylinders; - u16 tracks; - u16 sectors; - u16 bytes_per_sector; - u64 size; - int read_only; - spinlock_t q_lock; - struct gendisk *disk; - struct device *dev; -} viodasd_devices[MAX_DISKNO]; - -/* - * External open entry point. - */ -static int viodasd_open(struct block_device *bdev, fmode_t mode) -{ - struct viodasd_device *d = bdev->bd_disk->private_data; - HvLpEvent_Rc hvrc; - struct viodasd_waitevent we; - u16 flags = 0; - - if (d->read_only) { - if (mode & FMODE_WRITE) - return -EROFS; - flags = vioblockflags_ro; - } - - init_completion(&we.com); - - /* Send the open event to OS/400 */ - hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp, - HvLpEvent_Type_VirtualIo, - viomajorsubtype_blockio | vioblockopen, - HvLpEvent_AckInd_DoAck, HvLpEvent_AckType_ImmediateAck, - viopath_sourceinst(viopath_hostLp), - viopath_targetinst(viopath_hostLp), - (u64)(unsigned long)&we, VIOVERSION << 16, - ((u64)DEVICE_NO(d) << 48) | ((u64)flags << 32), - 0, 0, 0); - if (hvrc != 0) { - pr_warning("HV open failed %d\n", (int)hvrc); - return -EIO; - } - - wait_for_completion(&we.com); - - /* Check the return code */ - if (we.rc != 0) { - const struct vio_error_entry *err = - vio_lookup_rc(viodasd_err_table, we.sub_result); - - pr_warning("bad rc opening disk: %d:0x%04x (%s)\n", - (int)we.rc, we.sub_result, err->msg); - return -EIO; - } - - return 0; -} - -static int viodasd_unlocked_open(struct block_device *bdev, fmode_t mode) -{ - int ret; - - mutex_lock(&viodasd_mutex); - ret = viodasd_open(bdev, mode); - mutex_unlock(&viodasd_mutex); - - return ret; -} - - -/* - * External release entry point. - */ -static int viodasd_release(struct gendisk *disk, fmode_t mode) -{ - struct viodasd_device *d = disk->private_data; - HvLpEvent_Rc hvrc; - - mutex_lock(&viodasd_mutex); - /* Send the event to OS/400. We DON'T expect a response */ - hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp, - HvLpEvent_Type_VirtualIo, - viomajorsubtype_blockio | vioblockclose, - HvLpEvent_AckInd_NoAck, HvLpEvent_AckType_ImmediateAck, - viopath_sourceinst(viopath_hostLp), - viopath_targetinst(viopath_hostLp), - 0, VIOVERSION << 16, - ((u64)DEVICE_NO(d) << 48) /* | ((u64)flags << 32) */, - 0, 0, 0); - if (hvrc != 0) - pr_warning("HV close call failed %d\n", (int)hvrc); - - mutex_unlock(&viodasd_mutex); - - return 0; -} - - -/* External ioctl entry point. - */ -static int viodasd_getgeo(struct block_device *bdev, struct hd_geometry *geo) -{ - struct gendisk *disk = bdev->bd_disk; - struct viodasd_device *d = disk->private_data; - - geo->sectors = d->sectors ? d->sectors : 32; - geo->heads = d->tracks ? d->tracks : 64; - geo->cylinders = d->cylinders ? d->cylinders : - get_capacity(disk) / (geo->sectors * geo->heads); - - return 0; -} - -/* - * Our file operations table - */ -static const struct block_device_operations viodasd_fops = { - .owner = THIS_MODULE, - .open = viodasd_unlocked_open, - .release = viodasd_release, - .getgeo = viodasd_getgeo, -}; - -/* - * End a request - */ -static void viodasd_end_request(struct request *req, int error, - int num_sectors) -{ - __blk_end_request(req, error, num_sectors << 9); -} - -/* - * Send an actual I/O request to OS/400 - */ -static int send_request(struct request *req) -{ - u64 start; - int direction; - int nsg; - u16 viocmd; - HvLpEvent_Rc hvrc; - struct vioblocklpevent *bevent; - struct HvLpEvent *hev; - struct scatterlist sg[VIOMAXBLOCKDMA]; - int sgindex; - struct viodasd_device *d; - unsigned long flags; - - start = (u64)blk_rq_pos(req) << 9; - - if (rq_data_dir(req) == READ) { - direction = DMA_FROM_DEVICE; - viocmd = viomajorsubtype_blockio | vioblockread; - } else { - direction = DMA_TO_DEVICE; - viocmd = viomajorsubtype_blockio | vioblockwrite; - } - - d = req->rq_disk->private_data; - - /* Now build the scatter-gather list */ - sg_init_table(sg, VIOMAXBLOCKDMA); - nsg = blk_rq_map_sg(req->q, req, sg); - nsg = dma_map_sg(d->dev, sg, nsg, direction); - - spin_lock_irqsave(&viodasd_spinlock, flags); - num_req_outstanding++; - - /* This optimization handles a single DMA block */ - if (nsg == 1) - hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp, - HvLpEvent_Type_VirtualIo, viocmd, - HvLpEvent_AckInd_DoAck, - HvLpEvent_AckType_ImmediateAck, - viopath_sourceinst(viopath_hostLp), - viopath_targetinst(viopath_hostLp), - (u64)(unsigned long)req, VIOVERSION << 16, - ((u64)DEVICE_NO(d) << 48), start, - ((u64)sg_dma_address(&sg[0])) << 32, - sg_dma_len(&sg[0])); - else { - bevent = (struct vioblocklpevent *) - vio_get_event_buffer(viomajorsubtype_blockio); - if (bevent == NULL) { - pr_warning("error allocating disk event buffer\n"); - goto error_ret; - } - - /* - * Now build up the actual request. Note that we store - * the pointer to the request in the correlation - * token so we can match the response up later - */ - memset(bevent, 0, sizeof(struct vioblocklpevent)); - hev = &bevent->event; - hev->flags = HV_LP_EVENT_VALID | HV_LP_EVENT_DO_ACK | - HV_LP_EVENT_INT; - hev->xType = HvLpEvent_Type_VirtualIo; - hev->xSubtype = viocmd; - hev->xSourceLp = HvLpConfig_getLpIndex(); - hev->xTargetLp = viopath_hostLp; - hev->xSizeMinus1 = - offsetof(struct vioblocklpevent, u.rw_data.dma_info) + - (sizeof(bevent->u.rw_data.dma_info[0]) * nsg) - 1; - hev->xSourceInstanceId = viopath_sourceinst(viopath_hostLp); - hev->xTargetInstanceId = viopath_targetinst(viopath_hostLp); - hev->xCorrelationToken = (u64)req; - bevent->version = VIOVERSION; - bevent->disk = DEVICE_NO(d); - bevent->u.rw_data.offset = start; - - /* - * Copy just the dma information from the sg list - * into the request - */ - for (sgindex = 0; sgindex < nsg; sgindex++) { - bevent->u.rw_data.dma_info[sgindex].token = - sg_dma_address(&sg[sgindex]); - bevent->u.rw_data.dma_info[sgindex].len = - sg_dma_len(&sg[sgindex]); - } - - /* Send the request */ - hvrc = HvCallEvent_signalLpEvent(&bevent->event); - vio_free_event_buffer(viomajorsubtype_blockio, bevent); - } - - if (hvrc != HvLpEvent_Rc_Good) { - pr_warning("error sending disk event to OS/400 (rc %d)\n", - (int)hvrc); - goto error_ret; - } - spin_unlock_irqrestore(&viodasd_spinlock, flags); - return 0; - -error_ret: - num_req_outstanding--; - spin_unlock_irqrestore(&viodasd_spinlock, flags); - dma_unmap_sg(d->dev, sg, nsg, direction); - return -1; -} - -/* - * This is the external request processing routine - */ -static void do_viodasd_request(struct request_queue *q) -{ - struct request *req; - - /* - * If we already have the maximum number of requests - * outstanding to OS/400 just bail out. We'll come - * back later. - */ - while (num_req_outstanding < VIOMAXREQ) { - req = blk_fetch_request(q); - if (req == NULL) - return; - /* check that request contains a valid command */ - if (req->cmd_type != REQ_TYPE_FS) { - viodasd_end_request(req, -EIO, blk_rq_sectors(req)); - continue; - } - /* Try sending the request */ - if (send_request(req) != 0) - viodasd_end_request(req, -EIO, blk_rq_sectors(req)); - } -} - -/* - * Probe a single disk and fill in the viodasd_device structure - * for it. - */ -static int probe_disk(struct viodasd_device *d) -{ - HvLpEvent_Rc hvrc; - struct viodasd_waitevent we; - int dev_no = DEVICE_NO(d); - struct gendisk *g; - struct request_queue *q; - u16 flags = 0; - -retry: - init_completion(&we.com); - - /* Send the open event to OS/400 */ - hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp, - HvLpEvent_Type_VirtualIo, - viomajorsubtype_blockio | vioblockopen, - HvLpEvent_AckInd_DoAck, HvLpEvent_AckType_ImmediateAck, - viopath_sourceinst(viopath_hostLp), - viopath_targetinst(viopath_hostLp), - (u64)(unsigned long)&we, VIOVERSION << 16, - ((u64)dev_no << 48) | ((u64)flags<< 32), - 0, 0, 0); - if (hvrc != 0) { - pr_warning("bad rc on HV open %d\n", (int)hvrc); - return 0; - } - - wait_for_completion(&we.com); - - if (we.rc != 0) { - if (flags != 0) - return 0; - /* try again with read only flag set */ - flags = vioblockflags_ro; - goto retry; - } - if (we.max_disk > (MAX_DISKNO - 1)) { - printk_once(KERN_INFO pr_fmt("Only examining the first %d of %d disks connected\n"), - MAX_DISKNO, we.max_disk + 1); - } - - /* Send the close event to OS/400. We DON'T expect a response */ - hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp, - HvLpEvent_Type_VirtualIo, - viomajorsubtype_blockio | vioblockclose, - HvLpEvent_AckInd_NoAck, HvLpEvent_AckType_ImmediateAck, - viopath_sourceinst(viopath_hostLp), - viopath_targetinst(viopath_hostLp), - 0, VIOVERSION << 16, - ((u64)dev_no << 48) | ((u64)flags << 32), - 0, 0, 0); - if (hvrc != 0) { - pr_warning("bad rc sending event to OS/400 %d\n", (int)hvrc); - return 0; - } - - if (d->dev == NULL) { - /* this is when we reprobe for new disks */ - if (vio_create_viodasd(dev_no) == NULL) { - pr_warning("cannot allocate virtual device for disk %d\n", - dev_no); - return 0; - } - /* - * The vio_create_viodasd will have recursed into this - * routine with d->dev set to the new vio device and - * will finish the setup of the disk below. - */ - return 1; - } - - /* create the request queue for the disk */ - spin_lock_init(&d->q_lock); - q = blk_init_queue(do_viodasd_request, &d->q_lock); - if (q == NULL) { - pr_warning("cannot allocate queue for disk %d\n", dev_no); - return 0; - } - g = alloc_disk(1 << PARTITION_SHIFT); - if (g == NULL) { - pr_warning("cannot allocate disk structure for disk %d\n", - dev_no); - blk_cleanup_queue(q); - return 0; - } - - d->disk = g; - blk_queue_max_segments(q, VIOMAXBLOCKDMA); - blk_queue_max_hw_sectors(q, VIODASD_MAXSECTORS); - g->major = VIODASD_MAJOR; - g->first_minor = dev_no << PARTITION_SHIFT; - if (dev_no >= 26) - snprintf(g->disk_name, sizeof(g->disk_name), - VIOD_GENHD_NAME "%c%c", - 'a' + (dev_no / 26) - 1, 'a' + (dev_no % 26)); - else - snprintf(g->disk_name, sizeof(g->disk_name), - VIOD_GENHD_NAME "%c", 'a' + (dev_no % 26)); - g->fops = &viodasd_fops; - g->queue = q; - g->private_data = d; - g->driverfs_dev = d->dev; - set_capacity(g, d->size >> 9); - - pr_info("disk %d: %lu sectors (%lu MB) CHS=%d/%d/%d sector size %d%s\n", - dev_no, (unsigned long)(d->size >> 9), - (unsigned long)(d->size >> 20), - (int)d->cylinders, (int)d->tracks, - (int)d->sectors, (int)d->bytes_per_sector, - d->read_only ? " (RO)" : ""); - - /* register us in the global list */ - add_disk(g); - return 1; -} - -/* returns the total number of scatterlist elements converted */ -static int block_event_to_scatterlist(const struct vioblocklpevent *bevent, - struct scatterlist *sg, int *total_len) -{ - int i, numsg; - const struct rw_data *rw_data = &bevent->u.rw_data; - static const int offset = - offsetof(struct vioblocklpevent, u.rw_data.dma_info); - static const int element_size = sizeof(rw_data->dma_info[0]); - - numsg = ((bevent->event.xSizeMinus1 + 1) - offset) / element_size; - if (numsg > VIOMAXBLOCKDMA) - numsg = VIOMAXBLOCKDMA; - - *total_len = 0; - sg_init_table(sg, VIOMAXBLOCKDMA); - for (i = 0; (i < numsg) && (rw_data->dma_info[i].len > 0); ++i) { - sg_dma_address(&sg[i]) = rw_data->dma_info[i].token; - sg_dma_len(&sg[i]) = rw_data->dma_info[i].len; - *total_len += rw_data->dma_info[i].len; - } - return i; -} - -/* - * Restart all queues, starting with the one _after_ the disk given, - * thus reducing the chance of starvation of higher numbered disks. - */ -static void viodasd_restart_all_queues_starting_from(int first_index) -{ - int i; - - for (i = first_index + 1; i < MAX_DISKNO; ++i) - if (viodasd_devices[i].disk) - blk_run_queue(viodasd_devices[i].disk->queue); - for (i = 0; i <= first_index; ++i) - if (viodasd_devices[i].disk) - blk_run_queue(viodasd_devices[i].disk->queue); -} - -/* - * For read and write requests, decrement the number of outstanding requests, - * Free the DMA buffers we allocated. - */ -static int viodasd_handle_read_write(struct vioblocklpevent *bevent) -{ - int num_sg, num_sect, pci_direction, total_len; - struct request *req; - struct scatterlist sg[VIOMAXBLOCKDMA]; - struct HvLpEvent *event = &bevent->event; - unsigned long irq_flags; - struct viodasd_device *d; - int error; - spinlock_t *qlock; - - num_sg = block_event_to_scatterlist(bevent, sg, &total_len); - num_sect = total_len >> 9; - if (event->xSubtype == (viomajorsubtype_blockio | vioblockread)) - pci_direction = DMA_FROM_DEVICE; - else - pci_direction = DMA_TO_DEVICE; - req = (struct request *)bevent->event.xCorrelationToken; - d = req->rq_disk->private_data; - - dma_unmap_sg(d->dev, sg, num_sg, pci_direction); - - /* - * Since this is running in interrupt mode, we need to make sure - * we're not stepping on any global I/O operations - */ - spin_lock_irqsave(&viodasd_spinlock, irq_flags); - num_req_outstanding--; - spin_unlock_irqrestore(&viodasd_spinlock, irq_flags); - - error = (event->xRc == HvLpEvent_Rc_Good) ? 0 : -EIO; - if (error) { - const struct vio_error_entry *err; - err = vio_lookup_rc(viodasd_err_table, bevent->sub_result); - pr_warning("read/write error %d:0x%04x (%s)\n", - event->xRc, bevent->sub_result, err->msg); - num_sect = blk_rq_sectors(req); - } - qlock = req->q->queue_lock; - spin_lock_irqsave(qlock, irq_flags); - viodasd_end_request(req, error, num_sect); - spin_unlock_irqrestore(qlock, irq_flags); - - /* Finally, try to get more requests off of this device's queue */ - viodasd_restart_all_queues_starting_from(DEVICE_NO(d)); - - return 0; -} - -/* This routine handles incoming block LP events */ -static void handle_block_event(struct HvLpEvent *event) -{ - struct vioblocklpevent *bevent = (struct vioblocklpevent *)event; - struct viodasd_waitevent *pwe; - - if (event == NULL) - /* Notification that a partition went away! */ - return; - /* First, we should NEVER get an int here...only acks */ - if (hvlpevent_is_int(event)) { - pr_warning("Yikes! got an int in viodasd event handler!\n"); - if (hvlpevent_need_ack(event)) { - event->xRc = HvLpEvent_Rc_InvalidSubtype; - HvCallEvent_ackLpEvent(event); - } - } - - switch (event->xSubtype & VIOMINOR_SUBTYPE_MASK) { - case vioblockopen: - /* - * Handle a response to an open request. We get all the - * disk information in the response, so update it. The - * correlation token contains a pointer to a waitevent - * structure that has a completion in it. update the - * return code in the waitevent structure and post the - * completion to wake up the guy who sent the request - */ - pwe = (struct viodasd_waitevent *)event->xCorrelationToken; - pwe->rc = event->xRc; - pwe->sub_result = bevent->sub_result; - if (event->xRc == HvLpEvent_Rc_Good) { - const struct open_data *data = &bevent->u.open_data; - struct viodasd_device *device = - &viodasd_devices[bevent->disk]; - device->read_only = - bevent->flags & vioblockflags_ro; - device->size = data->disk_size; - device->cylinders = data->cylinders; - device->tracks = data->tracks; - device->sectors = data->sectors; - device->bytes_per_sector = data->bytes_per_sector; - pwe->max_disk = data->max_disk; - } - complete(&pwe->com); - break; - case vioblockclose: - break; - case vioblockread: - case vioblockwrite: - viodasd_handle_read_write(bevent); - break; - - default: - pr_warning("invalid subtype!"); - if (hvlpevent_need_ack(event)) { - event->xRc = HvLpEvent_Rc_InvalidSubtype; - HvCallEvent_ackLpEvent(event); - } - } -} - -/* - * Get the driver to reprobe for more disks. - */ -static ssize_t probe_disks(struct device_driver *drv, const char *buf, - size_t count) -{ - struct viodasd_device *d; - - for (d = viodasd_devices; d < &viodasd_devices[MAX_DISKNO]; d++) { - if (d->disk == NULL) - probe_disk(d); - } - return count; -} -static DRIVER_ATTR(probe, S_IWUSR, NULL, probe_disks); - -static int viodasd_probe(struct vio_dev *vdev, const struct vio_device_id *id) -{ - struct viodasd_device *d = &viodasd_devices[vdev->unit_address]; - - d->dev = &vdev->dev; - if (!probe_disk(d)) - return -ENODEV; - return 0; -} - -static int viodasd_remove(struct vio_dev *vdev) -{ - struct viodasd_device *d; - - d = &viodasd_devices[vdev->unit_address]; - if (d->disk) { - del_gendisk(d->disk); - blk_cleanup_queue(d->disk->queue); - put_disk(d->disk); - d->disk = NULL; - } - d->dev = NULL; - return 0; -} - -/** - * viodasd_device_table: Used by vio.c to match devices that we - * support. - */ -static struct vio_device_id viodasd_device_table[] __devinitdata = { - { "block", "IBM,iSeries-viodasd" }, - { "", "" } -}; -MODULE_DEVICE_TABLE(vio, viodasd_device_table); - -static struct vio_driver viodasd_driver = { - .id_table = viodasd_device_table, - .probe = viodasd_probe, - .remove = viodasd_remove, - .driver = { - .name = "viodasd", - .owner = THIS_MODULE, - } -}; - -static int need_delete_probe; - -/* - * Initialize the whole device driver. Handle module and non-module - * versions - */ -static int __init viodasd_init(void) -{ - int rc; - - if (!firmware_has_feature(FW_FEATURE_ISERIES)) { - rc = -ENODEV; - goto early_fail; - } - - /* Try to open to our host lp */ - if (viopath_hostLp == HvLpIndexInvalid) - vio_set_hostlp(); - - if (viopath_hostLp == HvLpIndexInvalid) { - pr_warning("invalid hosting partition\n"); - rc = -EIO; - goto early_fail; - } - - pr_info("vers " VIOD_VERS ", hosting partition %d\n", viopath_hostLp); - - /* register the block device */ - rc = register_blkdev(VIODASD_MAJOR, VIOD_GENHD_NAME); - if (rc) { - pr_warning("Unable to get major number %d for %s\n", - VIODASD_MAJOR, VIOD_GENHD_NAME); - goto early_fail; - } - /* Actually open the path to the hosting partition */ - rc = viopath_open(viopath_hostLp, viomajorsubtype_blockio, - VIOMAXREQ + 2); - if (rc) { - pr_warning("error opening path to host partition %d\n", - viopath_hostLp); - goto unregister_blk; - } - - /* Initialize our request handler */ - vio_setHandler(viomajorsubtype_blockio, handle_block_event); - - rc = vio_register_driver(&viodasd_driver); - if (rc) { - pr_warning("vio_register_driver failed\n"); - goto unset_handler; - } - - /* - * If this call fails, it just means that we cannot dynamically - * add virtual disks, but the driver will still work fine for - * all existing disk, so ignore the failure. - */ - if (!driver_create_file(&viodasd_driver.driver, &driver_attr_probe)) - need_delete_probe = 1; - - return 0; - -unset_handler: - vio_clearHandler(viomajorsubtype_blockio); - viopath_close(viopath_hostLp, viomajorsubtype_blockio, VIOMAXREQ + 2); -unregister_blk: - unregister_blkdev(VIODASD_MAJOR, VIOD_GENHD_NAME); -early_fail: - return rc; -} -module_init(viodasd_init); - -void __exit viodasd_exit(void) -{ - if (need_delete_probe) - driver_remove_file(&viodasd_driver.driver, &driver_attr_probe); - vio_unregister_driver(&viodasd_driver); - vio_clearHandler(viomajorsubtype_blockio); - viopath_close(viopath_hostLp, viomajorsubtype_blockio, VIOMAXREQ + 2); - unregister_blkdev(VIODASD_MAJOR, VIOD_GENHD_NAME); -} -module_exit(viodasd_exit); diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index c4a60badf252..0d39f2f4294a 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -351,6 +351,7 @@ static void virtblk_config_changed_work(struct work_struct *work) cap_str_10, cap_str_2); set_capacity(vblk->disk, capacity); + revalidate_disk(vblk->disk); done: mutex_unlock(&vblk->config_lock); } @@ -374,6 +375,34 @@ static int init_vq(struct virtio_blk *vblk) return err; } +/* + * Legacy naming scheme used for virtio devices. We are stuck with it for + * virtio blk but don't ever use it for any new driver. + */ +static int virtblk_name_format(char *prefix, int index, char *buf, int buflen) +{ + const int base = 'z' - 'a' + 1; + char *begin = buf + strlen(prefix); + char *end = buf + buflen; + char *p; + int unit; + + p = end - 1; + *p = '\0'; + unit = base; + do { + if (p == begin) + return -EINVAL; + *--p = 'a' + (index % unit); + index = (index / unit) - 1; + } while (index >= 0); + + memmove(begin, p, end - p); + memcpy(buf, prefix, strlen(prefix)); + + return 0; +} + static int __devinit virtblk_probe(struct virtio_device *vdev) { struct virtio_blk *vblk; @@ -442,18 +471,7 @@ static int __devinit virtblk_probe(struct virtio_device *vdev) q->queuedata = vblk; - if (index < 26) { - sprintf(vblk->disk->disk_name, "vd%c", 'a' + index % 26); - } else if (index < (26 + 1) * 26) { - sprintf(vblk->disk->disk_name, "vd%c%c", - 'a' + index / 26 - 1, 'a' + index % 26); - } else { - const unsigned int m1 = (index / 26 - 1) / 26 - 1; - const unsigned int m2 = (index / 26 - 1) % 26; - const unsigned int m3 = index % 26; - sprintf(vblk->disk->disk_name, "vd%c%c%c", - 'a' + m1, 'a' + m2, 'a' + m3); - } + virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN); vblk->disk->major = major; vblk->disk->first_minor = index_to_minor(index); diff --git a/drivers/block/xd.c b/drivers/block/xd.c index 51a972704db5..ff540520bada 100644 --- a/drivers/block/xd.c +++ b/drivers/block/xd.c @@ -52,7 +52,6 @@ #include <linux/io.h> #include <linux/gfp.h> -#include <asm/system.h> #include <asm/uaccess.h> #include <asm/dma.h> diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 0088bf60f368..73f196ca713f 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -321,6 +321,7 @@ struct seg_buf { static void xen_blkbk_unmap(struct pending_req *req) { struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; unsigned int i, invcount = 0; grant_handle_t handle; int ret; @@ -332,25 +333,12 @@ static void xen_blkbk_unmap(struct pending_req *req) gnttab_set_unmap_op(&unmap[invcount], vaddr(req, i), GNTMAP_host_map, handle); pending_handle(req, i) = BLKBACK_INVALID_HANDLE; + pages[invcount] = virt_to_page(vaddr(req, i)); invcount++; } - ret = HYPERVISOR_grant_table_op( - GNTTABOP_unmap_grant_ref, unmap, invcount); + ret = gnttab_unmap_refs(unmap, pages, invcount, false); BUG_ON(ret); - /* - * Note, we use invcount, so nr->pages, so we can't index - * using vaddr(req, i). - */ - for (i = 0; i < invcount; i++) { - ret = m2p_remove_override( - virt_to_page(unmap[i].host_addr), false); - if (ret) { - pr_alert(DRV_PFX "Failed to remove M2P override for %lx\n", - (unsigned long)unmap[i].host_addr); - continue; - } - } } static int xen_blkbk_map(struct blkif_request *req, @@ -378,7 +366,7 @@ static int xen_blkbk_map(struct blkif_request *req, pending_req->blkif->domid); } - ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg); + ret = gnttab_map_refs(map, NULL, &blkbk->pending_page(pending_req, 0), nseg); BUG_ON(ret); /* @@ -398,15 +386,6 @@ static int xen_blkbk_map(struct blkif_request *req, if (ret) continue; - ret = m2p_add_override(PFN_DOWN(map[i].dev_bus_addr), - blkbk->pending_page(pending_req, i), NULL); - if (ret) { - pr_alert(DRV_PFX "Failed to install M2P override for %lx (ret: %d)\n", - (unsigned long)map[i].dev_bus_addr, ret); - /* We could switch over to GNTTABOP_copy */ - continue; - } - seg[i].buf = map[i].dev_bus_addr | (req->u.rw.seg[i].first_sect << 9); } @@ -419,21 +398,18 @@ static int dispatch_discard_io(struct xen_blkif *blkif, int err = 0; int status = BLKIF_RSP_OKAY; struct block_device *bdev = blkif->vbd.bdev; + unsigned long secure; blkif->st_ds_req++; xen_blkif_get(blkif); - if (blkif->blk_backend_type == BLKIF_BACKEND_PHY || - blkif->blk_backend_type == BLKIF_BACKEND_FILE) { - unsigned long secure = (blkif->vbd.discard_secure && - (req->u.discard.flag & BLKIF_DISCARD_SECURE)) ? - BLKDEV_DISCARD_SECURE : 0; - err = blkdev_issue_discard(bdev, - req->u.discard.sector_number, - req->u.discard.nr_sectors, - GFP_KERNEL, secure); - } else - err = -EOPNOTSUPP; + secure = (blkif->vbd.discard_secure && + (req->u.discard.flag & BLKIF_DISCARD_SECURE)) ? + BLKDEV_DISCARD_SECURE : 0; + + err = blkdev_issue_discard(bdev, req->u.discard.sector_number, + req->u.discard.nr_sectors, + GFP_KERNEL, secure); if (err == -EOPNOTSUPP) { pr_debug(DRV_PFX "discard op failed, not supported\n"); @@ -830,7 +806,7 @@ static int __init xen_blkif_init(void) int i, mmap_pages; int rc = 0; - if (!xen_pv_domain()) + if (!xen_domain()) return -ENODEV; blkbk = kzalloc(sizeof(struct xen_blkbk), GFP_KERNEL); diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index d0ee7edc9be8..773cf27dc23f 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -146,11 +146,6 @@ enum blkif_protocol { BLKIF_PROTOCOL_X86_64 = 3, }; -enum blkif_backend_type { - BLKIF_BACKEND_PHY = 1, - BLKIF_BACKEND_FILE = 2, -}; - struct xen_vbd { /* What the domain refers to this vbd as. */ blkif_vdev_t handle; @@ -177,7 +172,6 @@ struct xen_blkif { unsigned int irq; /* Comms information. */ enum blkif_protocol blk_protocol; - enum blkif_backend_type blk_backend_type; union blkif_back_rings blk_rings; void *blk_ring; /* The VBD attached to this interface. */ diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 24a2fb57e5d0..89860f34a7ec 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -381,72 +381,49 @@ int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt, err = xenbus_printf(xbt, dev->nodename, "feature-flush-cache", "%d", state); if (err) - xenbus_dev_fatal(dev, err, "writing feature-flush-cache"); + dev_warn(&dev->dev, "writing feature-flush-cache (%d)", err); return err; } -int xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info *be) +static void xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info *be) { struct xenbus_device *dev = be->dev; struct xen_blkif *blkif = be->blkif; - char *type; int err; int state = 0; + struct block_device *bdev = be->blkif->vbd.bdev; + struct request_queue *q = bdev_get_queue(bdev); - type = xenbus_read(XBT_NIL, dev->nodename, "type", NULL); - if (!IS_ERR(type)) { - if (strncmp(type, "file", 4) == 0) { - state = 1; - blkif->blk_backend_type = BLKIF_BACKEND_FILE; + if (blk_queue_discard(q)) { + err = xenbus_printf(xbt, dev->nodename, + "discard-granularity", "%u", + q->limits.discard_granularity); + if (err) { + dev_warn(&dev->dev, "writing discard-granularity (%d)", err); + return; } - if (strncmp(type, "phy", 3) == 0) { - struct block_device *bdev = be->blkif->vbd.bdev; - struct request_queue *q = bdev_get_queue(bdev); - if (blk_queue_discard(q)) { - err = xenbus_printf(xbt, dev->nodename, - "discard-granularity", "%u", - q->limits.discard_granularity); - if (err) { - xenbus_dev_fatal(dev, err, - "writing discard-granularity"); - goto kfree; - } - err = xenbus_printf(xbt, dev->nodename, - "discard-alignment", "%u", - q->limits.discard_alignment); - if (err) { - xenbus_dev_fatal(dev, err, - "writing discard-alignment"); - goto kfree; - } - state = 1; - blkif->blk_backend_type = BLKIF_BACKEND_PHY; - } - /* Optional. */ - err = xenbus_printf(xbt, dev->nodename, - "discard-secure", "%d", - blkif->vbd.discard_secure); - if (err) { - xenbus_dev_fatal(dev, err, - "writting discard-secure"); - goto kfree; - } + err = xenbus_printf(xbt, dev->nodename, + "discard-alignment", "%u", + q->limits.discard_alignment); + if (err) { + dev_warn(&dev->dev, "writing discard-alignment (%d)", err); + return; + } + state = 1; + /* Optional. */ + err = xenbus_printf(xbt, dev->nodename, + "discard-secure", "%d", + blkif->vbd.discard_secure); + if (err) { + dev_warn(dev-dev, "writing discard-secure (%d)", err); + return; } - } else { - err = PTR_ERR(type); - xenbus_dev_fatal(dev, err, "reading type"); - goto out; } - err = xenbus_printf(xbt, dev->nodename, "feature-discard", "%d", state); if (err) - xenbus_dev_fatal(dev, err, "writing feature-discard"); -kfree: - kfree(type); -out: - return err; + dev_warn(&dev->dev, "writing feature-discard (%d)", err); } int xen_blkbk_barrier(struct xenbus_transaction xbt, struct backend_info *be, int state) @@ -457,7 +434,7 @@ int xen_blkbk_barrier(struct xenbus_transaction xbt, err = xenbus_printf(xbt, dev->nodename, "feature-barrier", "%d", state); if (err) - xenbus_dev_fatal(dev, err, "writing feature-barrier"); + dev_warn(&dev->dev, "writing feature-barrier (%d)", err); return err; } @@ -689,14 +666,12 @@ again: return; } - err = xen_blkbk_flush_diskcache(xbt, be, be->blkif->vbd.flush_support); - if (err) - goto abort; + /* If we can't advertise it is OK. */ + xen_blkbk_flush_diskcache(xbt, be, be->blkif->vbd.flush_support); - err = xen_blkbk_discard(xbt, be); + xen_blkbk_discard(xbt, be); - /* If we can't advertise it is OK. */ - err = xen_blkbk_barrier(xbt, be, be->blkif->vbd.flush_support); + xen_blkbk_barrier(xbt, be, be->blkif->vbd.flush_support); err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", (unsigned long long)vbd_sz(&be->blkif->vbd)); diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 2f22874c0a37..4e86393a09cf 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -43,6 +43,7 @@ #include <linux/slab.h> #include <linux/mutex.h> #include <linux/scatterlist.h> +#include <linux/bitmap.h> #include <xen/xen.h> #include <xen/xenbus.h> @@ -81,6 +82,7 @@ static const struct block_device_operations xlvbd_block_fops; */ struct blkfront_info { + spinlock_t io_lock; struct mutex mutex; struct xenbus_device *xbdev; struct gendisk *gd; @@ -105,8 +107,6 @@ struct blkfront_info int is_ready; }; -static DEFINE_SPINLOCK(blkif_io_lock); - static unsigned int nr_minors; static unsigned long *minors; static DEFINE_SPINLOCK(minor_lock); @@ -177,8 +177,7 @@ static int xlbd_reserve_minors(unsigned int minor, unsigned int nr) spin_lock(&minor_lock); if (find_next_bit(minors, end, minor) >= end) { - for (; minor < end; ++minor) - __set_bit(minor, minors); + bitmap_set(minors, minor, nr); rc = 0; } else rc = -EBUSY; @@ -193,8 +192,7 @@ static void xlbd_release_minors(unsigned int minor, unsigned int nr) BUG_ON(end > nr_minors); spin_lock(&minor_lock); - for (; minor < end; ++minor) - __clear_bit(minor, minors); + bitmap_clear(minors, minor, nr); spin_unlock(&minor_lock); } @@ -419,7 +417,7 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) struct request_queue *rq; struct blkfront_info *info = gd->private_data; - rq = blk_init_queue(do_blkif_request, &blkif_io_lock); + rq = blk_init_queue(do_blkif_request, &info->io_lock); if (rq == NULL) return -1; @@ -636,14 +634,14 @@ static void xlvbd_release_gendisk(struct blkfront_info *info) if (info->rq == NULL) return; - spin_lock_irqsave(&blkif_io_lock, flags); + spin_lock_irqsave(&info->io_lock, flags); /* No more blkif_request(). */ blk_stop_queue(info->rq); /* No more gnttab callback work. */ gnttab_cancel_free_callback(&info->callback); - spin_unlock_irqrestore(&blkif_io_lock, flags); + spin_unlock_irqrestore(&info->io_lock, flags); /* Flush gnttab callback work. Must be done with no locks held. */ flush_work_sync(&info->work); @@ -675,16 +673,16 @@ static void blkif_restart_queue(struct work_struct *work) { struct blkfront_info *info = container_of(work, struct blkfront_info, work); - spin_lock_irq(&blkif_io_lock); + spin_lock_irq(&info->io_lock); if (info->connected == BLKIF_STATE_CONNECTED) kick_pending_request_queues(info); - spin_unlock_irq(&blkif_io_lock); + spin_unlock_irq(&info->io_lock); } static void blkif_free(struct blkfront_info *info, int suspend) { /* Prevent new requests being issued until we fix things up. */ - spin_lock_irq(&blkif_io_lock); + spin_lock_irq(&info->io_lock); info->connected = suspend ? BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED; /* No more blkif_request(). */ @@ -692,7 +690,7 @@ static void blkif_free(struct blkfront_info *info, int suspend) blk_stop_queue(info->rq); /* No more gnttab callback work. */ gnttab_cancel_free_callback(&info->callback); - spin_unlock_irq(&blkif_io_lock); + spin_unlock_irq(&info->io_lock); /* Flush gnttab callback work. Must be done with no locks held. */ flush_work_sync(&info->work); @@ -728,10 +726,10 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) struct blkfront_info *info = (struct blkfront_info *)dev_id; int error; - spin_lock_irqsave(&blkif_io_lock, flags); + spin_lock_irqsave(&info->io_lock, flags); if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) { - spin_unlock_irqrestore(&blkif_io_lock, flags); + spin_unlock_irqrestore(&info->io_lock, flags); return IRQ_HANDLED; } @@ -816,7 +814,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) kick_pending_request_queues(info); - spin_unlock_irqrestore(&blkif_io_lock, flags); + spin_unlock_irqrestore(&info->io_lock, flags); return IRQ_HANDLED; } @@ -991,6 +989,7 @@ static int blkfront_probe(struct xenbus_device *dev, } mutex_init(&info->mutex); + spin_lock_init(&info->io_lock); info->xbdev = dev; info->vdevice = vdevice; info->connected = BLKIF_STATE_DISCONNECTED; @@ -1068,7 +1067,7 @@ static int blkif_recover(struct blkfront_info *info) xenbus_switch_state(info->xbdev, XenbusStateConnected); - spin_lock_irq(&blkif_io_lock); + spin_lock_irq(&info->io_lock); /* Now safe for us to use the shared ring */ info->connected = BLKIF_STATE_CONNECTED; @@ -1079,7 +1078,7 @@ static int blkif_recover(struct blkfront_info *info) /* Kick any other new requests queued since we resumed */ kick_pending_request_queues(info); - spin_unlock_irq(&blkif_io_lock); + spin_unlock_irq(&info->io_lock); return 0; } @@ -1277,10 +1276,10 @@ static void blkfront_connect(struct blkfront_info *info) xenbus_switch_state(info->xbdev, XenbusStateConnected); /* Kick pending requests. */ - spin_lock_irq(&blkif_io_lock); + spin_lock_irq(&info->io_lock); info->connected = BLKIF_STATE_CONNECTED; kick_pending_request_queues(info); - spin_unlock_irq(&blkif_io_lock); + spin_unlock_irq(&info->io_lock); add_disk(info->gd); @@ -1410,7 +1409,6 @@ static int blkif_release(struct gendisk *disk, fmode_t mode) mutex_lock(&blkfront_mutex); bdev = bdget_disk(disk, 0); - bdput(bdev); if (bdev->bd_openers) goto out; @@ -1441,6 +1439,7 @@ static int blkif_release(struct gendisk *disk, fmode_t mode) } out: + bdput(bdev); mutex_unlock(&blkfront_mutex); return 0; } @@ -1475,6 +1474,9 @@ static int __init xlblk_init(void) if (!xen_domain()) return -ENODEV; + if (xen_hvm_domain() && !xen_platform_pci_unplug) + return -ENODEV; + if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) { printk(KERN_WARNING "xen_blk: can't get major %d with name %s\n", XENVBD_MAJOR, DEV_NAME); |