summaryrefslogtreecommitdiff
path: root/drivers/block
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/brd.c20
-rw-r--r--drivers/block/cciss_scsi.c3
-rw-r--r--drivers/block/drbd/drbd_bitmap.c50
-rw-r--r--drivers/block/drbd/drbd_nl.c6
-rw-r--r--drivers/block/floppy.c37
-rw-r--r--drivers/block/hd.c1
-rw-r--r--drivers/block/loop.c16
-rw-r--r--drivers/block/mtip32xx/Kconfig2
-rw-r--r--drivers/block/mtip32xx/mtip32xx.c860
-rw-r--r--drivers/block/mtip32xx/mtip32xx.h58
-rw-r--r--drivers/block/nbd.c296
-rw-r--r--drivers/block/nvme.c1
-rw-r--r--drivers/block/pktcdvd.c8
-rw-r--r--drivers/block/rbd.c730
-rw-r--r--drivers/block/rbd_types.h4
-rw-r--r--drivers/block/sunvdc.c5
-rw-r--r--drivers/block/viodasd.c809
-rw-r--r--drivers/block/virtio_blk.c42
-rw-r--r--drivers/block/xd.c1
-rw-r--r--drivers/block/xen-blkback/blkback.c50
-rw-r--r--drivers/block/xen-blkback/common.h6
-rw-r--r--drivers/block/xen-blkback/xenbus.c89
-rw-r--r--drivers/block/xen-blkfront.c44
23 files changed, 1457 insertions, 1681 deletions
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index ec246437f5a4..531ceb31d0ff 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -242,9 +242,9 @@ static void copy_to_brd(struct brd_device *brd, const void *src,
page = brd_lookup_page(brd, sector);
BUG_ON(!page);
- dst = kmap_atomic(page, KM_USER1);
+ dst = kmap_atomic(page);
memcpy(dst + offset, src, copy);
- kunmap_atomic(dst, KM_USER1);
+ kunmap_atomic(dst);
if (copy < n) {
src += copy;
@@ -253,9 +253,9 @@ static void copy_to_brd(struct brd_device *brd, const void *src,
page = brd_lookup_page(brd, sector);
BUG_ON(!page);
- dst = kmap_atomic(page, KM_USER1);
+ dst = kmap_atomic(page);
memcpy(dst, src, copy);
- kunmap_atomic(dst, KM_USER1);
+ kunmap_atomic(dst);
}
}
@@ -273,9 +273,9 @@ static void copy_from_brd(void *dst, struct brd_device *brd,
copy = min_t(size_t, n, PAGE_SIZE - offset);
page = brd_lookup_page(brd, sector);
if (page) {
- src = kmap_atomic(page, KM_USER1);
+ src = kmap_atomic(page);
memcpy(dst, src + offset, copy);
- kunmap_atomic(src, KM_USER1);
+ kunmap_atomic(src);
} else
memset(dst, 0, copy);
@@ -285,9 +285,9 @@ static void copy_from_brd(void *dst, struct brd_device *brd,
copy = n - copy;
page = brd_lookup_page(brd, sector);
if (page) {
- src = kmap_atomic(page, KM_USER1);
+ src = kmap_atomic(page);
memcpy(dst, src, copy);
- kunmap_atomic(src, KM_USER1);
+ kunmap_atomic(src);
} else
memset(dst, 0, copy);
}
@@ -309,7 +309,7 @@ static int brd_do_bvec(struct brd_device *brd, struct page *page,
goto out;
}
- mem = kmap_atomic(page, KM_USER0);
+ mem = kmap_atomic(page);
if (rw == READ) {
copy_from_brd(mem + off, brd, sector, len);
flush_dcache_page(page);
@@ -317,7 +317,7 @@ static int brd_do_bvec(struct brd_device *brd, struct page *page,
flush_dcache_page(page);
copy_to_brd(brd, mem + off, sector, len);
}
- kunmap_atomic(mem, KM_USER0);
+ kunmap_atomic(mem);
out:
return err;
diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c
index e820b68d2f6c..acda773b3720 100644
--- a/drivers/block/cciss_scsi.c
+++ b/drivers/block/cciss_scsi.c
@@ -866,6 +866,7 @@ cciss_scsi_detect(ctlr_info_t *h)
sh->can_queue = cciss_tape_cmds;
sh->sg_tablesize = h->maxsgentries;
sh->max_cmd_len = MAX_COMMAND_SIZE;
+ sh->max_sectors = h->cciss_max_sectors;
((struct cciss_scsi_adapter_data_t *)
h->scsi_ctlr)->scsi_host = sh;
@@ -1410,7 +1411,7 @@ static void cciss_scatter_gather(ctlr_info_t *h, CommandList_struct *c,
/* track how many SG entries we are using */
if (request_nsgs > h->maxSG)
h->maxSG = request_nsgs;
- c->Header.SGTotal = (__u8) request_nsgs + chained;
+ c->Header.SGTotal = (u16) request_nsgs + chained;
if (request_nsgs > h->max_cmd_sgentries)
c->Header.SGList = h->max_cmd_sgentries;
else
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index 912f585a760f..3030201c69d8 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -289,25 +289,25 @@ static unsigned int bm_bit_to_page_idx(struct drbd_bitmap *b, u64 bitnr)
return page_nr;
}
-static unsigned long *__bm_map_pidx(struct drbd_bitmap *b, unsigned int idx, const enum km_type km)
+static unsigned long *__bm_map_pidx(struct drbd_bitmap *b, unsigned int idx)
{
struct page *page = b->bm_pages[idx];
- return (unsigned long *) kmap_atomic(page, km);
+ return (unsigned long *) kmap_atomic(page);
}
static unsigned long *bm_map_pidx(struct drbd_bitmap *b, unsigned int idx)
{
- return __bm_map_pidx(b, idx, KM_IRQ1);
+ return __bm_map_pidx(b, idx);
}
-static void __bm_unmap(unsigned long *p_addr, const enum km_type km)
+static void __bm_unmap(unsigned long *p_addr)
{
- kunmap_atomic(p_addr, km);
+ kunmap_atomic(p_addr);
};
static void bm_unmap(unsigned long *p_addr)
{
- return __bm_unmap(p_addr, KM_IRQ1);
+ return __bm_unmap(p_addr);
}
/* long word offset of _bitmap_ sector */
@@ -543,15 +543,15 @@ static unsigned long bm_count_bits(struct drbd_bitmap *b)
/* all but last page */
for (idx = 0; idx < b->bm_number_of_pages - 1; idx++) {
- p_addr = __bm_map_pidx(b, idx, KM_USER0);
+ p_addr = __bm_map_pidx(b, idx);
for (i = 0; i < LWPP; i++)
bits += hweight_long(p_addr[i]);
- __bm_unmap(p_addr, KM_USER0);
+ __bm_unmap(p_addr);
cond_resched();
}
/* last (or only) page */
last_word = ((b->bm_bits - 1) & BITS_PER_PAGE_MASK) >> LN2_BPL;
- p_addr = __bm_map_pidx(b, idx, KM_USER0);
+ p_addr = __bm_map_pidx(b, idx);
for (i = 0; i < last_word; i++)
bits += hweight_long(p_addr[i]);
p_addr[last_word] &= cpu_to_lel(mask);
@@ -559,7 +559,7 @@ static unsigned long bm_count_bits(struct drbd_bitmap *b)
/* 32bit arch, may have an unused padding long */
if (BITS_PER_LONG == 32 && (last_word & 1) == 0)
p_addr[last_word+1] = 0;
- __bm_unmap(p_addr, KM_USER0);
+ __bm_unmap(p_addr);
return bits;
}
@@ -970,11 +970,11 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must
* to use pre-allocated page pool */
void *src, *dest;
page = alloc_page(__GFP_HIGHMEM|__GFP_WAIT);
- dest = kmap_atomic(page, KM_USER0);
- src = kmap_atomic(b->bm_pages[page_nr], KM_USER1);
+ dest = kmap_atomic(page);
+ src = kmap_atomic(b->bm_pages[page_nr]);
memcpy(dest, src, PAGE_SIZE);
- kunmap_atomic(src, KM_USER1);
- kunmap_atomic(dest, KM_USER0);
+ kunmap_atomic(src);
+ kunmap_atomic(dest);
bm_store_page_idx(page, page_nr);
} else
page = b->bm_pages[page_nr];
@@ -1163,7 +1163,7 @@ int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(loc
* this returns a bit number, NOT a sector!
*/
static unsigned long __bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo,
- const int find_zero_bit, const enum km_type km)
+ const int find_zero_bit)
{
struct drbd_bitmap *b = mdev->bitmap;
unsigned long *p_addr;
@@ -1178,7 +1178,7 @@ static unsigned long __bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo,
while (bm_fo < b->bm_bits) {
/* bit offset of the first bit in the page */
bit_offset = bm_fo & ~BITS_PER_PAGE_MASK;
- p_addr = __bm_map_pidx(b, bm_bit_to_page_idx(b, bm_fo), km);
+ p_addr = __bm_map_pidx(b, bm_bit_to_page_idx(b, bm_fo));
if (find_zero_bit)
i = find_next_zero_bit_le(p_addr,
@@ -1187,7 +1187,7 @@ static unsigned long __bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo,
i = find_next_bit_le(p_addr,
PAGE_SIZE*8, bm_fo & BITS_PER_PAGE_MASK);
- __bm_unmap(p_addr, km);
+ __bm_unmap(p_addr);
if (i < PAGE_SIZE*8) {
bm_fo = bit_offset + i;
if (bm_fo >= b->bm_bits)
@@ -1215,7 +1215,7 @@ static unsigned long bm_find_next(struct drbd_conf *mdev,
if (BM_DONT_TEST & b->bm_flags)
bm_print_lock_info(mdev);
- i = __bm_find_next(mdev, bm_fo, find_zero_bit, KM_IRQ1);
+ i = __bm_find_next(mdev, bm_fo, find_zero_bit);
spin_unlock_irq(&b->bm_lock);
return i;
@@ -1239,13 +1239,13 @@ unsigned long drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo
unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo)
{
/* WARN_ON(!(BM_DONT_SET & mdev->b->bm_flags)); */
- return __bm_find_next(mdev, bm_fo, 0, KM_USER1);
+ return __bm_find_next(mdev, bm_fo, 0);
}
unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo)
{
/* WARN_ON(!(BM_DONT_SET & mdev->b->bm_flags)); */
- return __bm_find_next(mdev, bm_fo, 1, KM_USER1);
+ return __bm_find_next(mdev, bm_fo, 1);
}
/* returns number of bits actually changed.
@@ -1273,14 +1273,14 @@ static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
unsigned int page_nr = bm_bit_to_page_idx(b, bitnr);
if (page_nr != last_page_nr) {
if (p_addr)
- __bm_unmap(p_addr, KM_IRQ1);
+ __bm_unmap(p_addr);
if (c < 0)
bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]);
else if (c > 0)
bm_set_page_need_writeout(b->bm_pages[last_page_nr]);
changed_total += c;
c = 0;
- p_addr = __bm_map_pidx(b, page_nr, KM_IRQ1);
+ p_addr = __bm_map_pidx(b, page_nr);
last_page_nr = page_nr;
}
if (val)
@@ -1289,7 +1289,7 @@ static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
c -= (0 != __test_and_clear_bit_le(bitnr & BITS_PER_PAGE_MASK, p_addr));
}
if (p_addr)
- __bm_unmap(p_addr, KM_IRQ1);
+ __bm_unmap(p_addr);
if (c < 0)
bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]);
else if (c > 0)
@@ -1342,13 +1342,13 @@ static inline void bm_set_full_words_within_one_page(struct drbd_bitmap *b,
{
int i;
int bits;
- unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr], KM_IRQ1);
+ unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr]);
for (i = first_word; i < last_word; i++) {
bits = hweight_long(paddr[i]);
paddr[i] = ~0UL;
b->bm_set += BITS_PER_LONG - bits;
}
- kunmap_atomic(paddr, KM_IRQ1);
+ kunmap_atomic(paddr);
}
/* Same thing as drbd_bm_set_bits,
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index af2a25049bce..abfaacaaf346 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -179,7 +179,7 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd)
dev_info(DEV, "helper command: %s %s %s\n", usermode_helper, cmd, mb);
drbd_bcast_ev_helper(mdev, cmd);
- ret = call_usermodehelper(usermode_helper, argv, envp, 1);
+ ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC);
if (ret)
dev_warn(DEV, "helper command: %s %s %s exit code %u (0x%x)\n",
usermode_helper, cmd, mb,
@@ -2526,10 +2526,10 @@ void drbd_bcast_ee(struct drbd_conf *mdev,
page = e->pages;
page_chain_for_each(page) {
- void *d = kmap_atomic(page, KM_USER0);
+ void *d = kmap_atomic(page);
unsigned l = min_t(unsigned, len, PAGE_SIZE);
memcpy(tl, d, l);
- kunmap_atomic(d, KM_USER0);
+ kunmap_atomic(d);
tl = (unsigned short*)((char*)tl + l);
len -= l;
if (len == 0)
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 744f078f4dd8..b0b00d70c166 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -202,7 +202,6 @@ static int slow_floppy;
#include <asm/dma.h>
#include <asm/irq.h>
-#include <asm/system.h>
static int FLOPPY_IRQ = 6;
static int FLOPPY_DMA = 2;
@@ -1031,37 +1030,6 @@ static int fd_wait_for_completion(unsigned long delay, timeout_fn function)
return 0;
}
-static DEFINE_SPINLOCK(floppy_hlt_lock);
-static int hlt_disabled;
-static void floppy_disable_hlt(void)
-{
- unsigned long flags;
-
- WARN_ONCE(1, "floppy_disable_hlt() scheduled for removal in 2012");
- spin_lock_irqsave(&floppy_hlt_lock, flags);
- if (!hlt_disabled) {
- hlt_disabled = 1;
-#ifdef HAVE_DISABLE_HLT
- disable_hlt();
-#endif
- }
- spin_unlock_irqrestore(&floppy_hlt_lock, flags);
-}
-
-static void floppy_enable_hlt(void)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&floppy_hlt_lock, flags);
- if (hlt_disabled) {
- hlt_disabled = 0;
-#ifdef HAVE_DISABLE_HLT
- enable_hlt();
-#endif
- }
- spin_unlock_irqrestore(&floppy_hlt_lock, flags);
-}
-
static void setup_DMA(void)
{
unsigned long f;
@@ -1106,7 +1074,6 @@ static void setup_DMA(void)
fd_enable_dma();
release_dma_lock(f);
#endif
- floppy_disable_hlt();
}
static void show_floppy(void);
@@ -1708,7 +1675,6 @@ irqreturn_t floppy_interrupt(int irq, void *dev_id)
fd_disable_dma();
release_dma_lock(f);
- floppy_enable_hlt();
do_floppy = NULL;
if (fdc >= N_FDC || FDCS->address == -1) {
/* we don't even know which FDC is the culprit */
@@ -1857,8 +1823,6 @@ static void floppy_shutdown(unsigned long data)
show_floppy();
cancel_activity();
- floppy_enable_hlt();
-
flags = claim_dma_lock();
fd_disable_dma();
release_dma_lock(flags);
@@ -4509,7 +4473,6 @@ static void floppy_release_irq_and_dma(void)
#if N_FDC > 1
set_dor(1, ~8, 0);
#endif
- floppy_enable_hlt();
if (floppy_track_buffer && max_buffer_sectors) {
tmpsize = max_buffer_sectors * 1024;
diff --git a/drivers/block/hd.c b/drivers/block/hd.c
index b52c9ca146fc..bf397bf108b7 100644
--- a/drivers/block/hd.c
+++ b/drivers/block/hd.c
@@ -44,7 +44,6 @@
#define HD_IRQ 14
#define REALLY_SLOW_IO
-#include <asm/system.h>
#include <asm/io.h>
#include <asm/uaccess.h>
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index cd504353b278..bbca966f8f66 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -93,16 +93,16 @@ static int transfer_none(struct loop_device *lo, int cmd,
struct page *loop_page, unsigned loop_off,
int size, sector_t real_block)
{
- char *raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off;
- char *loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off;
+ char *raw_buf = kmap_atomic(raw_page) + raw_off;
+ char *loop_buf = kmap_atomic(loop_page) + loop_off;
if (cmd == READ)
memcpy(loop_buf, raw_buf, size);
else
memcpy(raw_buf, loop_buf, size);
- kunmap_atomic(loop_buf, KM_USER1);
- kunmap_atomic(raw_buf, KM_USER0);
+ kunmap_atomic(loop_buf);
+ kunmap_atomic(raw_buf);
cond_resched();
return 0;
}
@@ -112,8 +112,8 @@ static int transfer_xor(struct loop_device *lo, int cmd,
struct page *loop_page, unsigned loop_off,
int size, sector_t real_block)
{
- char *raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off;
- char *loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off;
+ char *raw_buf = kmap_atomic(raw_page) + raw_off;
+ char *loop_buf = kmap_atomic(loop_page) + loop_off;
char *in, *out, *key;
int i, keysize;
@@ -130,8 +130,8 @@ static int transfer_xor(struct loop_device *lo, int cmd,
for (i = 0; i < size; i++)
*out++ = *in++ ^ key[(i & 511) % keysize];
- kunmap_atomic(loop_buf, KM_USER1);
- kunmap_atomic(raw_buf, KM_USER0);
+ kunmap_atomic(loop_buf);
+ kunmap_atomic(raw_buf);
cond_resched();
return 0;
}
diff --git a/drivers/block/mtip32xx/Kconfig b/drivers/block/mtip32xx/Kconfig
index b5dd14e072f2..0ba837fc62a8 100644
--- a/drivers/block/mtip32xx/Kconfig
+++ b/drivers/block/mtip32xx/Kconfig
@@ -4,6 +4,6 @@
config BLK_DEV_PCIESSD_MTIP32XX
tristate "Block Device Driver for Micron PCIe SSDs"
- depends on HOTPLUG_PCI_PCIE
+ depends on PCI
help
This enables the block driver for Micron PCIe SSDs.
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 8eb81c96608f..00f9fc992090 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -36,6 +36,7 @@
#include <linux/idr.h>
#include <linux/kthread.h>
#include <../drivers/ata/ahci.h>
+#include <linux/export.h>
#include "mtip32xx.h"
#define HW_CMD_SLOT_SZ (MTIP_MAX_COMMAND_SLOTS * 32)
@@ -44,6 +45,7 @@
#define HW_PORT_PRIV_DMA_SZ \
(HW_CMD_SLOT_SZ + HW_CMD_TBL_AR_SZ + AHCI_RX_FIS_SZ)
+#define HOST_CAP_NZDMA (1 << 19)
#define HOST_HSORG 0xFC
#define HSORG_DISABLE_SLOTGRP_INTR (1<<24)
#define HSORG_DISABLE_SLOTGRP_PXIS (1<<16)
@@ -139,6 +141,12 @@ static void mtip_command_cleanup(struct driver_data *dd)
int group = 0, commandslot = 0, commandindex = 0;
struct mtip_cmd *command;
struct mtip_port *port = dd->port;
+ static int in_progress;
+
+ if (in_progress)
+ return;
+
+ in_progress = 1;
for (group = 0; group < 4; group++) {
for (commandslot = 0; commandslot < 32; commandslot++) {
@@ -165,7 +173,8 @@ static void mtip_command_cleanup(struct driver_data *dd)
up(&port->cmd_slot);
- atomic_set(&dd->drv_cleanup_done, true);
+ set_bit(MTIP_DDF_CLEANUP_BIT, &dd->dd_flag);
+ in_progress = 0;
}
/*
@@ -262,6 +271,9 @@ static int hba_reset_nosleep(struct driver_data *dd)
&& time_before(jiffies, timeout))
mdelay(1);
+ if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag))
+ return -1;
+
if (readl(dd->mmio + HOST_CTL) & HOST_RESET)
return -1;
@@ -294,6 +306,10 @@ static inline void mtip_issue_ncq_command(struct mtip_port *port, int tag)
port->cmd_issue[MTIP_TAG_INDEX(tag)]);
spin_unlock_irqrestore(&port->cmd_issue_lock, flags);
+
+ /* Set the command's timeout value.*/
+ port->commands[tag].comp_time = jiffies + msecs_to_jiffies(
+ MTIP_NCQ_COMMAND_TIMEOUT_MS);
}
/*
@@ -420,7 +436,12 @@ static void mtip_init_port(struct mtip_port *port)
writel(0xFFFFFFFF, port->completed[i]);
/* Clear any pending interrupts for this port */
- writel(readl(port->mmio + PORT_IRQ_STAT), port->mmio + PORT_IRQ_STAT);
+ writel(readl(port->dd->mmio + PORT_IRQ_STAT),
+ port->dd->mmio + PORT_IRQ_STAT);
+
+ /* Clear any pending interrupts on the HBA. */
+ writel(readl(port->dd->mmio + HOST_IRQ_STAT),
+ port->dd->mmio + HOST_IRQ_STAT);
/* Enable port interrupts */
writel(DEF_PORT_IRQ, port->mmio + PORT_IRQ_MASK);
@@ -447,6 +468,9 @@ static void mtip_restart_port(struct mtip_port *port)
&& time_before(jiffies, timeout))
;
+ if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &port->dd->dd_flag))
+ return;
+
/*
* Chip quirk: escalate to hba reset if
* PxCMD.CR not clear after 500 ms
@@ -475,6 +499,9 @@ static void mtip_restart_port(struct mtip_port *port)
while (time_before(jiffies, timeout))
;
+ if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &port->dd->dd_flag))
+ return;
+
/* Clear PxSCTL.DET */
writel(readl(port->mmio + PORT_SCR_CTL) & ~1,
port->mmio + PORT_SCR_CTL);
@@ -486,15 +513,35 @@ static void mtip_restart_port(struct mtip_port *port)
&& time_before(jiffies, timeout))
;
+ if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &port->dd->dd_flag))
+ return;
+
if ((readl(port->mmio + PORT_SCR_STAT) & 0x01) == 0)
dev_warn(&port->dd->pdev->dev,
"COM reset failed\n");
- /* Clear SError, the PxSERR.DIAG.x should be set so clear it */
- writel(readl(port->mmio + PORT_SCR_ERR), port->mmio + PORT_SCR_ERR);
+ mtip_init_port(port);
+ mtip_start_port(port);
- /* Enable the DMA engine */
- mtip_enable_engine(port, 1);
+}
+
+/*
+ * Helper function for tag logging
+ */
+static void print_tags(struct driver_data *dd,
+ char *msg,
+ unsigned long *tagbits,
+ int cnt)
+{
+ unsigned char tagmap[128];
+ int group, tagmap_len = 0;
+
+ memset(tagmap, 0, sizeof(tagmap));
+ for (group = SLOTBITS_IN_LONGS; group > 0; group--)
+ tagmap_len = sprintf(tagmap + tagmap_len, "%016lX ",
+ tagbits[group-1]);
+ dev_warn(&dd->pdev->dev,
+ "%d command(s) %s: tagmap [%s]", cnt, msg, tagmap);
}
/*
@@ -514,15 +561,18 @@ static void mtip_timeout_function(unsigned long int data)
int tag, cmdto_cnt = 0;
unsigned int bit, group;
unsigned int num_command_slots = port->dd->slot_groups * 32;
+ unsigned long to, tagaccum[SLOTBITS_IN_LONGS];
if (unlikely(!port))
return;
- if (atomic_read(&port->dd->resumeflag) == true) {
+ if (test_bit(MTIP_DDF_RESUME_BIT, &port->dd->dd_flag)) {
mod_timer(&port->cmd_timer,
jiffies + msecs_to_jiffies(30000));
return;
}
+ /* clear the tag accumulator */
+ memset(tagaccum, 0, SLOTBITS_IN_LONGS * sizeof(long));
for (tag = 0; tag < num_command_slots; tag++) {
/*
@@ -540,12 +590,10 @@ static void mtip_timeout_function(unsigned long int data)
command = &port->commands[tag];
fis = (struct host_to_dev_fis *) command->command;
- dev_warn(&port->dd->pdev->dev,
- "Timeout for command tag %d\n", tag);
-
+ set_bit(tag, tagaccum);
cmdto_cnt++;
if (cmdto_cnt == 1)
- set_bit(MTIP_FLAG_EH_ACTIVE_BIT, &port->flags);
+ set_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags);
/*
* Clear the completed bit. This should prevent
@@ -578,15 +626,29 @@ static void mtip_timeout_function(unsigned long int data)
}
}
- if (cmdto_cnt) {
- dev_warn(&port->dd->pdev->dev,
- "%d commands timed out: restarting port",
- cmdto_cnt);
+ if (cmdto_cnt && !test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) {
+ print_tags(port->dd, "timed out", tagaccum, cmdto_cnt);
+
mtip_restart_port(port);
- clear_bit(MTIP_FLAG_EH_ACTIVE_BIT, &port->flags);
+ clear_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags);
wake_up_interruptible(&port->svc_wait);
}
+ if (port->ic_pause_timer) {
+ to = port->ic_pause_timer + msecs_to_jiffies(1000);
+ if (time_after(jiffies, to)) {
+ if (!test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) {
+ port->ic_pause_timer = 0;
+ clear_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags);
+ clear_bit(MTIP_PF_DM_ACTIVE_BIT, &port->flags);
+ clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags);
+ wake_up_interruptible(&port->svc_wait);
+ }
+
+
+ }
+ }
+
/* Restart the timer */
mod_timer(&port->cmd_timer,
jiffies + msecs_to_jiffies(MTIP_TIMEOUT_CHECK_PERIOD));
@@ -681,23 +743,18 @@ static void mtip_completion(struct mtip_port *port,
complete(waiting);
}
-/*
- * Helper function for tag logging
- */
-static void print_tags(struct driver_data *dd,
- char *msg,
- unsigned long *tagbits)
+static void mtip_null_completion(struct mtip_port *port,
+ int tag,
+ void *data,
+ int status)
{
- unsigned int tag, count = 0;
-
- for (tag = 0; tag < (dd->slot_groups) * 32; tag++) {
- if (test_bit(tag, tagbits))
- count++;
- }
- if (count)
- dev_info(&dd->pdev->dev, "%s [%i tags]\n", msg, count);
+ return;
}
+static int mtip_read_log_page(struct mtip_port *port, u8 page, u16 *buffer,
+ dma_addr_t buffer_dma, unsigned int sectors);
+static int mtip_get_smart_attr(struct mtip_port *port, unsigned int id,
+ struct smart_attr *attrib);
/*
* Handle an error.
*
@@ -708,12 +765,16 @@ static void print_tags(struct driver_data *dd,
*/
static void mtip_handle_tfe(struct driver_data *dd)
{
- int group, tag, bit, reissue;
+ int group, tag, bit, reissue, rv;
struct mtip_port *port;
- struct mtip_cmd *command;
+ struct mtip_cmd *cmd;
u32 completed;
struct host_to_dev_fis *fis;
unsigned long tagaccum[SLOTBITS_IN_LONGS];
+ unsigned int cmd_cnt = 0;
+ unsigned char *buf;
+ char *fail_reason = NULL;
+ int fail_all_ncq_write = 0, fail_all_ncq_cmds = 0;
dev_warn(&dd->pdev->dev, "Taskfile error\n");
@@ -722,8 +783,11 @@ static void mtip_handle_tfe(struct driver_data *dd)
/* Stop the timer to prevent command timeouts. */
del_timer(&port->cmd_timer);
+ /* clear the tag accumulator */
+ memset(tagaccum, 0, SLOTBITS_IN_LONGS * sizeof(long));
+
/* Set eh_active */
- set_bit(MTIP_FLAG_EH_ACTIVE_BIT, &port->flags);
+ set_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags);
/* Loop through all the groups */
for (group = 0; group < dd->slot_groups; group++) {
@@ -732,9 +796,6 @@ static void mtip_handle_tfe(struct driver_data *dd)
/* clear completed status register in the hardware.*/
writel(completed, port->completed[group]);
- /* clear the tag accumulator */
- memset(tagaccum, 0, SLOTBITS_IN_LONGS * sizeof(long));
-
/* Process successfully completed commands */
for (bit = 0; bit < 32 && completed; bit++) {
if (!(completed & (1<<bit)))
@@ -745,13 +806,14 @@ static void mtip_handle_tfe(struct driver_data *dd)
if (tag == MTIP_TAG_INTERNAL)
continue;
- command = &port->commands[tag];
- if (likely(command->comp_func)) {
+ cmd = &port->commands[tag];
+ if (likely(cmd->comp_func)) {
set_bit(tag, tagaccum);
- atomic_set(&port->commands[tag].active, 0);
- command->comp_func(port,
+ cmd_cnt++;
+ atomic_set(&cmd->active, 0);
+ cmd->comp_func(port,
tag,
- command->comp_data,
+ cmd->comp_data,
0);
} else {
dev_err(&port->dd->pdev->dev,
@@ -765,12 +827,45 @@ static void mtip_handle_tfe(struct driver_data *dd)
}
}
}
- print_tags(dd, "TFE tags completed:", tagaccum);
+
+ print_tags(dd, "completed (TFE)", tagaccum, cmd_cnt);
/* Restart the port */
mdelay(20);
mtip_restart_port(port);
+ /* Trying to determine the cause of the error */
+ rv = mtip_read_log_page(dd->port, ATA_LOG_SATA_NCQ,
+ dd->port->log_buf,
+ dd->port->log_buf_dma, 1);
+ if (rv) {
+ dev_warn(&dd->pdev->dev,
+ "Error in READ LOG EXT (10h) command\n");
+ /* non-critical error, don't fail the load */
+ } else {
+ buf = (unsigned char *)dd->port->log_buf;
+ if (buf[259] & 0x1) {
+ dev_info(&dd->pdev->dev,
+ "Write protect bit is set.\n");
+ set_bit(MTIP_DDF_WRITE_PROTECT_BIT, &dd->dd_flag);
+ fail_all_ncq_write = 1;
+ fail_reason = "write protect";
+ }
+ if (buf[288] == 0xF7) {
+ dev_info(&dd->pdev->dev,
+ "Exceeded Tmax, drive in thermal shutdown.\n");
+ set_bit(MTIP_DDF_OVER_TEMP_BIT, &dd->dd_flag);
+ fail_all_ncq_cmds = 1;
+ fail_reason = "thermal shutdown";
+ }
+ if (buf[288] == 0xBF) {
+ dev_info(&dd->pdev->dev,
+ "Drive indicates rebuild has failed.\n");
+ fail_all_ncq_cmds = 1;
+ fail_reason = "rebuild failed";
+ }
+ }
+
/* clear the tag accumulator */
memset(tagaccum, 0, SLOTBITS_IN_LONGS * sizeof(long));
@@ -779,32 +874,47 @@ static void mtip_handle_tfe(struct driver_data *dd)
for (bit = 0; bit < 32; bit++) {
reissue = 1;
tag = (group << 5) + bit;
+ cmd = &port->commands[tag];
/* If the active bit is set re-issue the command */
- if (atomic_read(&port->commands[tag].active) == 0)
+ if (atomic_read(&cmd->active) == 0)
continue;
- fis = (struct host_to_dev_fis *)
- port->commands[tag].command;
+ fis = (struct host_to_dev_fis *)cmd->command;
/* Should re-issue? */
if (tag == MTIP_TAG_INTERNAL ||
fis->command == ATA_CMD_SET_FEATURES)
reissue = 0;
+ else {
+ if (fail_all_ncq_cmds ||
+ (fail_all_ncq_write &&
+ fis->command == ATA_CMD_FPDMA_WRITE)) {
+ dev_warn(&dd->pdev->dev,
+ " Fail: %s w/tag %d [%s].\n",
+ fis->command == ATA_CMD_FPDMA_WRITE ?
+ "write" : "read",
+ tag,
+ fail_reason != NULL ?
+ fail_reason : "unknown");
+ atomic_set(&cmd->active, 0);
+ if (cmd->comp_func) {
+ cmd->comp_func(port, tag,
+ cmd->comp_data,
+ -ENODATA);
+ }
+ continue;
+ }
+ }
/*
* First check if this command has
* exceeded its retries.
*/
- if (reissue &&
- (port->commands[tag].retries-- > 0)) {
+ if (reissue && (cmd->retries-- > 0)) {
set_bit(tag, tagaccum);
- /* Update the timeout value. */
- port->commands[tag].comp_time =
- jiffies + msecs_to_jiffies(
- MTIP_NCQ_COMMAND_TIMEOUT_MS);
/* Re-issue the command. */
mtip_issue_ncq_command(port, tag);
@@ -814,13 +924,13 @@ static void mtip_handle_tfe(struct driver_data *dd)
/* Retire a command that will not be reissued */
dev_warn(&port->dd->pdev->dev,
"retiring tag %d\n", tag);
- atomic_set(&port->commands[tag].active, 0);
+ atomic_set(&cmd->active, 0);
- if (port->commands[tag].comp_func)
- port->commands[tag].comp_func(
+ if (cmd->comp_func)
+ cmd->comp_func(
port,
tag,
- port->commands[tag].comp_data,
+ cmd->comp_data,
PORT_IRQ_TF_ERR);
else
dev_warn(&port->dd->pdev->dev,
@@ -828,10 +938,10 @@ static void mtip_handle_tfe(struct driver_data *dd)
tag);
}
}
- print_tags(dd, "TFE tags reissued:", tagaccum);
+ print_tags(dd, "reissued (TFE)", tagaccum, cmd_cnt);
/* clear eh_active */
- clear_bit(MTIP_FLAG_EH_ACTIVE_BIT, &port->flags);
+ clear_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags);
wake_up_interruptible(&port->svc_wait);
mod_timer(&port->cmd_timer,
@@ -899,7 +1009,7 @@ static inline void mtip_process_legacy(struct driver_data *dd, u32 port_stat)
struct mtip_port *port = dd->port;
struct mtip_cmd *cmd = &port->commands[MTIP_TAG_INTERNAL];
- if (test_bit(MTIP_FLAG_IC_ACTIVE_BIT, &port->flags) &&
+ if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags) &&
(cmd != NULL) && !(readl(port->cmd_issue[MTIP_TAG_INTERNAL])
& (1 << MTIP_TAG_INTERNAL))) {
if (cmd->comp_func) {
@@ -911,8 +1021,6 @@ static inline void mtip_process_legacy(struct driver_data *dd, u32 port_stat)
}
}
- dev_warn(&dd->pdev->dev, "IRQ status 0x%x ignored.\n", port_stat);
-
return;
}
@@ -968,6 +1076,9 @@ static inline irqreturn_t mtip_handle_irq(struct driver_data *data)
/* don't proceed further */
return IRQ_HANDLED;
}
+ if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
+ &dd->dd_flag))
+ return rv;
mtip_process_errors(dd, port_stat & PORT_IRQ_ERR);
}
@@ -1015,6 +1126,39 @@ static void mtip_issue_non_ncq_command(struct mtip_port *port, int tag)
port->cmd_issue[MTIP_TAG_INDEX(tag)]);
}
+static bool mtip_pause_ncq(struct mtip_port *port,
+ struct host_to_dev_fis *fis)
+{
+ struct host_to_dev_fis *reply;
+ unsigned long task_file_data;
+
+ reply = port->rxfis + RX_FIS_D2H_REG;
+ task_file_data = readl(port->mmio+PORT_TFDATA);
+
+ if ((task_file_data & 1) || (fis->command == ATA_CMD_SEC_ERASE_UNIT))
+ return false;
+
+ if (fis->command == ATA_CMD_SEC_ERASE_PREP) {
+ set_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags);
+ port->ic_pause_timer = jiffies;
+ return true;
+ } else if ((fis->command == ATA_CMD_DOWNLOAD_MICRO) &&
+ (fis->features == 0x03)) {
+ set_bit(MTIP_PF_DM_ACTIVE_BIT, &port->flags);
+ port->ic_pause_timer = jiffies;
+ return true;
+ } else if ((fis->command == ATA_CMD_SEC_ERASE_UNIT) ||
+ ((fis->command == 0xFC) &&
+ (fis->features == 0x27 || fis->features == 0x72 ||
+ fis->features == 0x62 || fis->features == 0x26))) {
+ /* Com reset after secure erase or lowlevel format */
+ mtip_restart_port(port);
+ return false;
+ }
+
+ return false;
+}
+
/*
* Wait for port to quiesce
*
@@ -1033,11 +1177,13 @@ static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout)
to = jiffies + msecs_to_jiffies(timeout);
do {
- if (test_bit(MTIP_FLAG_SVC_THD_ACTIVE_BIT, &port->flags) &&
- test_bit(MTIP_FLAG_ISSUE_CMDS_BIT, &port->flags)) {
+ if (test_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags) &&
+ test_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags)) {
msleep(20);
continue; /* svc thd is actively issuing commands */
}
+ if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &port->dd->dd_flag))
+ return -EFAULT;
/*
* Ignore s_active bit 0 of array element 0.
* This bit will always be set
@@ -1074,7 +1220,7 @@ static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout)
* -EAGAIN Time out waiting for command to complete.
*/
static int mtip_exec_internal_command(struct mtip_port *port,
- void *fis,
+ struct host_to_dev_fis *fis,
int fis_len,
dma_addr_t buffer,
int buf_len,
@@ -1084,8 +1230,9 @@ static int mtip_exec_internal_command(struct mtip_port *port,
{
struct mtip_cmd_sg *command_sg;
DECLARE_COMPLETION_ONSTACK(wait);
- int rv = 0;
+ int rv = 0, ready2go = 1;
struct mtip_cmd *int_cmd = &port->commands[MTIP_TAG_INTERNAL];
+ unsigned long to;
/* Make sure the buffer is 8 byte aligned. This is asic specific. */
if (buffer & 0x00000007) {
@@ -1094,23 +1241,38 @@ static int mtip_exec_internal_command(struct mtip_port *port,
return -EFAULT;
}
- /* Only one internal command should be running at a time */
- if (test_and_set_bit(MTIP_TAG_INTERNAL, port->allocated)) {
+ to = jiffies + msecs_to_jiffies(timeout);
+ do {
+ ready2go = !test_and_set_bit(MTIP_TAG_INTERNAL,
+ port->allocated);
+ if (ready2go)
+ break;
+ mdelay(100);
+ } while (time_before(jiffies, to));
+ if (!ready2go) {
dev_warn(&port->dd->pdev->dev,
- "Internal command already active\n");
+ "Internal cmd active. new cmd [%02X]\n", fis->command);
return -EBUSY;
}
- set_bit(MTIP_FLAG_IC_ACTIVE_BIT, &port->flags);
+ set_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags);
+ port->ic_pause_timer = 0;
+
+ if (fis->command == ATA_CMD_SEC_ERASE_UNIT)
+ clear_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags);
+ else if (fis->command == ATA_CMD_DOWNLOAD_MICRO)
+ clear_bit(MTIP_PF_DM_ACTIVE_BIT, &port->flags);
if (atomic == GFP_KERNEL) {
- /* wait for io to complete if non atomic */
- if (mtip_quiesce_io(port, 5000) < 0) {
- dev_warn(&port->dd->pdev->dev,
- "Failed to quiesce IO\n");
- release_slot(port, MTIP_TAG_INTERNAL);
- clear_bit(MTIP_FLAG_IC_ACTIVE_BIT, &port->flags);
- wake_up_interruptible(&port->svc_wait);
- return -EBUSY;
+ if (fis->command != ATA_CMD_STANDBYNOW1) {
+ /* wait for io to complete if non atomic */
+ if (mtip_quiesce_io(port, 5000) < 0) {
+ dev_warn(&port->dd->pdev->dev,
+ "Failed to quiesce IO\n");
+ release_slot(port, MTIP_TAG_INTERNAL);
+ clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags);
+ wake_up_interruptible(&port->svc_wait);
+ return -EBUSY;
+ }
}
/* Set the completion function and data for the command. */
@@ -1120,7 +1282,7 @@ static int mtip_exec_internal_command(struct mtip_port *port,
} else {
/* Clear completion - we're going to poll */
int_cmd->comp_data = NULL;
- int_cmd->comp_func = NULL;
+ int_cmd->comp_func = mtip_null_completion;
}
/* Copy the command to the command table */
@@ -1159,6 +1321,12 @@ static int mtip_exec_internal_command(struct mtip_port *port,
"Internal command did not complete [%d] "
"within timeout of %lu ms\n",
atomic, timeout);
+ if (mtip_check_surprise_removal(port->dd->pdev) ||
+ test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
+ &port->dd->dd_flag)) {
+ rv = -ENXIO;
+ goto exec_ic_exit;
+ }
rv = -EAGAIN;
}
@@ -1166,31 +1334,59 @@ static int mtip_exec_internal_command(struct mtip_port *port,
& (1 << MTIP_TAG_INTERNAL)) {
dev_warn(&port->dd->pdev->dev,
"Retiring internal command but CI is 1.\n");
+ if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
+ &port->dd->dd_flag)) {
+ hba_reset_nosleep(port->dd);
+ rv = -ENXIO;
+ } else {
+ mtip_restart_port(port);
+ rv = -EAGAIN;
+ }
+ goto exec_ic_exit;
}
} else {
/* Spin for <timeout> checking if command still outstanding */
timeout = jiffies + msecs_to_jiffies(timeout);
-
- while ((readl(
- port->cmd_issue[MTIP_TAG_INTERNAL])
- & (1 << MTIP_TAG_INTERNAL))
- && time_before(jiffies, timeout))
- ;
+ while ((readl(port->cmd_issue[MTIP_TAG_INTERNAL])
+ & (1 << MTIP_TAG_INTERNAL))
+ && time_before(jiffies, timeout)) {
+ if (mtip_check_surprise_removal(port->dd->pdev)) {
+ rv = -ENXIO;
+ goto exec_ic_exit;
+ }
+ if ((fis->command != ATA_CMD_STANDBYNOW1) &&
+ test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
+ &port->dd->dd_flag)) {
+ rv = -ENXIO;
+ goto exec_ic_exit;
+ }
+ }
if (readl(port->cmd_issue[MTIP_TAG_INTERNAL])
& (1 << MTIP_TAG_INTERNAL)) {
dev_err(&port->dd->pdev->dev,
- "Internal command did not complete [%d]\n",
- atomic);
+ "Internal command did not complete [atomic]\n");
rv = -EAGAIN;
+ if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
+ &port->dd->dd_flag)) {
+ hba_reset_nosleep(port->dd);
+ rv = -ENXIO;
+ } else {
+ mtip_restart_port(port);
+ rv = -EAGAIN;
+ }
}
}
-
+exec_ic_exit:
/* Clear the allocated and active bits for the internal command. */
atomic_set(&int_cmd->active, 0);
release_slot(port, MTIP_TAG_INTERNAL);
- clear_bit(MTIP_FLAG_IC_ACTIVE_BIT, &port->flags);
+ if (rv >= 0 && mtip_pause_ncq(port, fis)) {
+ /* NCQ paused */
+ return rv;
+ }
+ clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags);
wake_up_interruptible(&port->svc_wait);
return rv;
@@ -1240,6 +1436,9 @@ static int mtip_get_identify(struct mtip_port *port, void __user *user_buffer)
int rv = 0;
struct host_to_dev_fis fis;
+ if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &port->dd->dd_flag))
+ return -EFAULT;
+
/* Build the FIS. */
memset(&fis, 0, sizeof(struct host_to_dev_fis));
fis.type = 0x27;
@@ -1313,6 +1512,7 @@ static int mtip_standby_immediate(struct mtip_port *port)
{
int rv;
struct host_to_dev_fis fis;
+ unsigned long start;
/* Build the FIS. */
memset(&fis, 0, sizeof(struct host_to_dev_fis));
@@ -1320,15 +1520,150 @@ static int mtip_standby_immediate(struct mtip_port *port)
fis.opts = 1 << 7;
fis.command = ATA_CMD_STANDBYNOW1;
- /* Execute the command. Use a 15-second timeout for large drives. */
+ start = jiffies;
rv = mtip_exec_internal_command(port,
&fis,
5,
0,
0,
0,
- GFP_KERNEL,
+ GFP_ATOMIC,
15000);
+ dbg_printk(MTIP_DRV_NAME "Time taken to complete standby cmd: %d ms\n",
+ jiffies_to_msecs(jiffies - start));
+ if (rv)
+ dev_warn(&port->dd->pdev->dev,
+ "STANDBY IMMEDIATE command failed.\n");
+
+ return rv;
+}
+
+/*
+ * Issue a READ LOG EXT command to the device.
+ *
+ * @port pointer to the port structure.
+ * @page page number to fetch
+ * @buffer pointer to buffer
+ * @buffer_dma dma address corresponding to @buffer
+ * @sectors page length to fetch, in sectors
+ *
+ * return value
+ * @rv return value from mtip_exec_internal_command()
+ */
+static int mtip_read_log_page(struct mtip_port *port, u8 page, u16 *buffer,
+ dma_addr_t buffer_dma, unsigned int sectors)
+{
+ struct host_to_dev_fis fis;
+
+ memset(&fis, 0, sizeof(struct host_to_dev_fis));
+ fis.type = 0x27;
+ fis.opts = 1 << 7;
+ fis.command = ATA_CMD_READ_LOG_EXT;
+ fis.sect_count = sectors & 0xFF;
+ fis.sect_cnt_ex = (sectors >> 8) & 0xFF;
+ fis.lba_low = page;
+ fis.lba_mid = 0;
+ fis.device = ATA_DEVICE_OBS;
+
+ memset(buffer, 0, sectors * ATA_SECT_SIZE);
+
+ return mtip_exec_internal_command(port,
+ &fis,
+ 5,
+ buffer_dma,
+ sectors * ATA_SECT_SIZE,
+ 0,
+ GFP_ATOMIC,
+ MTIP_INTERNAL_COMMAND_TIMEOUT_MS);
+}
+
+/*
+ * Issue a SMART READ DATA command to the device.
+ *
+ * @port pointer to the port structure.
+ * @buffer pointer to buffer
+ * @buffer_dma dma address corresponding to @buffer
+ *
+ * return value
+ * @rv return value from mtip_exec_internal_command()
+ */
+static int mtip_get_smart_data(struct mtip_port *port, u8 *buffer,
+ dma_addr_t buffer_dma)
+{
+ struct host_to_dev_fis fis;
+
+ memset(&fis, 0, sizeof(struct host_to_dev_fis));
+ fis.type = 0x27;
+ fis.opts = 1 << 7;
+ fis.command = ATA_CMD_SMART;
+ fis.features = 0xD0;
+ fis.sect_count = 1;
+ fis.lba_mid = 0x4F;
+ fis.lba_hi = 0xC2;
+ fis.device = ATA_DEVICE_OBS;
+
+ return mtip_exec_internal_command(port,
+ &fis,
+ 5,
+ buffer_dma,
+ ATA_SECT_SIZE,
+ 0,
+ GFP_ATOMIC,
+ 15000);
+}
+
+/*
+ * Get the value of a smart attribute
+ *
+ * @port pointer to the port structure
+ * @id attribute number
+ * @attrib pointer to return attrib information corresponding to @id
+ *
+ * return value
+ * -EINVAL NULL buffer passed or unsupported attribute @id.
+ * -EPERM Identify data not valid, SMART not supported or not enabled
+ */
+static int mtip_get_smart_attr(struct mtip_port *port, unsigned int id,
+ struct smart_attr *attrib)
+{
+ int rv, i;
+ struct smart_attr *pattr;
+
+ if (!attrib)
+ return -EINVAL;
+
+ if (!port->identify_valid) {
+ dev_warn(&port->dd->pdev->dev, "IDENTIFY DATA not valid\n");
+ return -EPERM;
+ }
+ if (!(port->identify[82] & 0x1)) {
+ dev_warn(&port->dd->pdev->dev, "SMART not supported\n");
+ return -EPERM;
+ }
+ if (!(port->identify[85] & 0x1)) {
+ dev_warn(&port->dd->pdev->dev, "SMART not enabled\n");
+ return -EPERM;
+ }
+
+ memset(port->smart_buf, 0, ATA_SECT_SIZE);
+ rv = mtip_get_smart_data(port, port->smart_buf, port->smart_buf_dma);
+ if (rv) {
+ dev_warn(&port->dd->pdev->dev, "Failed to ge SMART data\n");
+ return rv;
+ }
+
+ pattr = (struct smart_attr *)(port->smart_buf + 2);
+ for (i = 0; i < 29; i++, pattr++)
+ if (pattr->attr_id == id) {
+ memcpy(attrib, pattr, sizeof(struct smart_attr));
+ break;
+ }
+
+ if (i == 29) {
+ dev_warn(&port->dd->pdev->dev,
+ "Query for invalid SMART attribute ID\n");
+ rv = -EINVAL;
+ }
return rv;
}
@@ -1504,10 +1839,7 @@ static int exec_drive_task(struct mtip_port *port, u8 *command)
fis.cyl_hi = command[5];
fis.device = command[6] & ~0x10; /* Clear the dev bit*/
-
- dbg_printk(MTIP_DRV_NAME "%s: User Command: cmd %x, feat %x, "
- "nsect %x, sect %x, lcyl %x, "
- "hcyl %x, sel %x\n",
+ dbg_printk(MTIP_DRV_NAME " %s: User Command: cmd %x, feat %x, nsect %x, sect %x, lcyl %x, hcyl %x, sel %x\n",
__func__,
command[0],
command[1],
@@ -1534,8 +1866,7 @@ static int exec_drive_task(struct mtip_port *port, u8 *command)
command[4] = reply->cyl_low;
command[5] = reply->cyl_hi;
- dbg_printk(MTIP_DRV_NAME "%s: Completion Status: stat %x, "
- "err %x , cyl_lo %x cyl_hi %x\n",
+ dbg_printk(MTIP_DRV_NAME " %s: Completion Status: stat %x, err %x , cyl_lo %x cyl_hi %x\n",
__func__,
command[0],
command[1],
@@ -1578,7 +1909,7 @@ static int exec_drive_command(struct mtip_port *port, u8 *command,
}
dbg_printk(MTIP_DRV_NAME
- "%s: User Command: cmd %x, sect %x, "
+ " %s: User Command: cmd %x, sect %x, "
"feat %x, sectcnt %x\n",
__func__,
command[0],
@@ -1607,7 +1938,7 @@ static int exec_drive_command(struct mtip_port *port, u8 *command,
command[2] = command[3];
dbg_printk(MTIP_DRV_NAME
- "%s: Completion Status: stat %x, "
+ " %s: Completion Status: stat %x, "
"err %x, cmd %x\n",
__func__,
command[0],
@@ -1810,9 +2141,10 @@ static int exec_drive_taskfile(struct driver_data *dd,
}
dbg_printk(MTIP_DRV_NAME
- "taskfile: cmd %x, feat %x, nsect %x,"
+ " %s: cmd %x, feat %x, nsect %x,"
" sect/lbal %x, lcyl/lbam %x, hcyl/lbah %x,"
" head/dev %x\n",
+ __func__,
fis.command,
fis.features,
fis.sect_count,
@@ -1823,8 +2155,8 @@ static int exec_drive_taskfile(struct driver_data *dd,
switch (fis.command) {
case ATA_CMD_DOWNLOAD_MICRO:
- /* Change timeout for Download Microcode to 60 seconds.*/
- timeout = 60000;
+ /* Change timeout for Download Microcode to 2 minutes */
+ timeout = 120000;
break;
case ATA_CMD_SEC_ERASE_UNIT:
/* Change timeout for Security Erase Unit to 4 minutes.*/
@@ -1840,8 +2172,8 @@ static int exec_drive_taskfile(struct driver_data *dd,
timeout = 10000;
break;
case ATA_CMD_SMART:
- /* Change timeout for vendor unique command to 10 secs */
- timeout = 10000;
+ /* Change timeout for vendor unique command to 15 secs */
+ timeout = 15000;
break;
default:
timeout = MTIP_IOCTL_COMMAND_TIMEOUT_MS;
@@ -1903,18 +2235,8 @@ static int exec_drive_taskfile(struct driver_data *dd,
req_task->hob_ports[1] = reply->features_ex;
req_task->hob_ports[2] = reply->sect_cnt_ex;
}
-
- /* Com rest after secure erase or lowlevel format */
- if (((fis.command == ATA_CMD_SEC_ERASE_UNIT) ||
- ((fis.command == 0xFC) &&
- (fis.features == 0x27 || fis.features == 0x72 ||
- fis.features == 0x62 || fis.features == 0x26))) &&
- !(reply->command & 1)) {
- mtip_restart_port(dd->port);
- }
-
dbg_printk(MTIP_DRV_NAME
- "%s: Completion: stat %x,"
+ " %s: Completion: stat %x,"
"err %x, sect_cnt %x, lbalo %x,"
"lbamid %x, lbahi %x, dev %x\n",
__func__,
@@ -2080,14 +2402,10 @@ static void mtip_hw_submit_io(struct driver_data *dd, sector_t start,
struct host_to_dev_fis *fis;
struct mtip_port *port = dd->port;
struct mtip_cmd *command = &port->commands[tag];
+ int dma_dir = (dir == READ) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
/* Map the scatter list for DMA access */
- if (dir == READ)
- nents = dma_map_sg(&dd->pdev->dev, command->sg,
- nents, DMA_FROM_DEVICE);
- else
- nents = dma_map_sg(&dd->pdev->dev, command->sg,
- nents, DMA_TO_DEVICE);
+ nents = dma_map_sg(&dd->pdev->dev, command->sg, nents, dma_dir);
command->scatter_ents = nents;
@@ -2127,7 +2445,7 @@ static void mtip_hw_submit_io(struct driver_data *dd, sector_t start,
*/
command->comp_data = dd;
command->comp_func = mtip_async_complete;
- command->direction = (dir == READ ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
+ command->direction = dma_dir;
/*
* Set the completion function and data for the command passed
@@ -2140,19 +2458,16 @@ static void mtip_hw_submit_io(struct driver_data *dd, sector_t start,
* To prevent this command from being issued
* if an internal command is in progress or error handling is active.
*/
- if (unlikely(test_bit(MTIP_FLAG_IC_ACTIVE_BIT, &port->flags) ||
- test_bit(MTIP_FLAG_EH_ACTIVE_BIT, &port->flags))) {
+ if (port->flags & MTIP_PF_PAUSE_IO) {
set_bit(tag, port->cmds_to_issue);
- set_bit(MTIP_FLAG_ISSUE_CMDS_BIT, &port->flags);
+ set_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags);
return;
}
/* Issue the command to the hardware */
mtip_issue_ncq_command(port, tag);
- /* Set the command's timeout value.*/
- port->commands[tag].comp_time = jiffies + msecs_to_jiffies(
- MTIP_NCQ_COMMAND_TIMEOUT_MS);
+ return;
}
/*
@@ -2191,6 +2506,10 @@ static struct scatterlist *mtip_hw_get_scatterlist(struct driver_data *dd,
down(&dd->port->cmd_slot);
*tag = get_slot(dd->port);
+ if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag))) {
+ up(&dd->port->cmd_slot);
+ return NULL;
+ }
if (unlikely(*tag < 0))
return NULL;
@@ -2207,7 +2526,7 @@ static struct scatterlist *mtip_hw_get_scatterlist(struct driver_data *dd,
* return value
* The size, in bytes, of the data copied into buf.
*/
-static ssize_t hw_show_registers(struct device *dev,
+static ssize_t mtip_hw_show_registers(struct device *dev,
struct device_attribute *attr,
char *buf)
{
@@ -2216,7 +2535,7 @@ static ssize_t hw_show_registers(struct device *dev,
int size = 0;
int n;
- size += sprintf(&buf[size], "%s:\ns_active:\n", __func__);
+ size += sprintf(&buf[size], "S ACTive:\n");
for (n = 0; n < dd->slot_groups; n++)
size += sprintf(&buf[size], "0x%08x\n",
@@ -2240,20 +2559,39 @@ static ssize_t hw_show_registers(struct device *dev,
group_allocated);
}
- size += sprintf(&buf[size], "completed:\n");
+ size += sprintf(&buf[size], "Completed:\n");
for (n = 0; n < dd->slot_groups; n++)
size += sprintf(&buf[size], "0x%08x\n",
readl(dd->port->completed[n]));
- size += sprintf(&buf[size], "PORT_IRQ_STAT 0x%08x\n",
+ size += sprintf(&buf[size], "PORT IRQ STAT : 0x%08x\n",
readl(dd->port->mmio + PORT_IRQ_STAT));
- size += sprintf(&buf[size], "HOST_IRQ_STAT 0x%08x\n",
+ size += sprintf(&buf[size], "HOST IRQ STAT : 0x%08x\n",
readl(dd->mmio + HOST_IRQ_STAT));
return size;
}
-static DEVICE_ATTR(registers, S_IRUGO, hw_show_registers, NULL);
+
+static ssize_t mtip_hw_show_status(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct driver_data *dd = dev_to_disk(dev)->private_data;
+ int size = 0;
+
+ if (test_bit(MTIP_DDF_OVER_TEMP_BIT, &dd->dd_flag))
+ size += sprintf(buf, "%s", "thermal_shutdown\n");
+ else if (test_bit(MTIP_DDF_WRITE_PROTECT_BIT, &dd->dd_flag))
+ size += sprintf(buf, "%s", "write_protect\n");
+ else
+ size += sprintf(buf, "%s", "online\n");
+
+ return size;
+}
+
+static DEVICE_ATTR(registers, S_IRUGO, mtip_hw_show_registers, NULL);
+static DEVICE_ATTR(status, S_IRUGO, mtip_hw_show_status, NULL);
/*
* Create the sysfs related attributes.
@@ -2272,7 +2610,10 @@ static int mtip_hw_sysfs_init(struct driver_data *dd, struct kobject *kobj)
if (sysfs_create_file(kobj, &dev_attr_registers.attr))
dev_warn(&dd->pdev->dev,
- "Error creating registers sysfs entry\n");
+ "Error creating 'registers' sysfs entry\n");
+ if (sysfs_create_file(kobj, &dev_attr_status.attr))
+ dev_warn(&dd->pdev->dev,
+ "Error creating 'status' sysfs entry\n");
return 0;
}
@@ -2292,6 +2633,7 @@ static int mtip_hw_sysfs_exit(struct driver_data *dd, struct kobject *kobj)
return -EINVAL;
sysfs_remove_file(kobj, &dev_attr_registers.attr);
+ sysfs_remove_file(kobj, &dev_attr_status.attr);
return 0;
}
@@ -2384,10 +2726,12 @@ static int mtip_ftl_rebuild_poll(struct driver_data *dd)
"FTL rebuild in progress. Polling for completion.\n");
start = jiffies;
- dd->ftlrebuildflag = 1;
timeout = jiffies + msecs_to_jiffies(MTIP_FTL_REBUILD_TIMEOUT_MS);
do {
+ if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
+ &dd->dd_flag)))
+ return -EFAULT;
if (mtip_check_surprise_removal(dd->pdev))
return -EFAULT;
@@ -2408,22 +2752,17 @@ static int mtip_ftl_rebuild_poll(struct driver_data *dd)
dev_warn(&dd->pdev->dev,
"FTL rebuild complete (%d secs).\n",
jiffies_to_msecs(jiffies - start) / 1000);
- dd->ftlrebuildflag = 0;
mtip_block_initialize(dd);
- break;
+ return 0;
}
ssleep(10);
} while (time_before(jiffies, timeout));
/* Check for timeout */
- if (dd->ftlrebuildflag) {
- dev_err(&dd->pdev->dev,
+ dev_err(&dd->pdev->dev,
"Timed out waiting for FTL rebuild to complete (%d secs).\n",
jiffies_to_msecs(jiffies - start) / 1000);
- return -EFAULT;
- }
-
- return 0;
+ return -EFAULT;
}
/*
@@ -2448,14 +2787,17 @@ static int mtip_service_thread(void *data)
* is in progress nor error handling is active
*/
wait_event_interruptible(port->svc_wait, (port->flags) &&
- !test_bit(MTIP_FLAG_IC_ACTIVE_BIT, &port->flags) &&
- !test_bit(MTIP_FLAG_EH_ACTIVE_BIT, &port->flags));
+ !(port->flags & MTIP_PF_PAUSE_IO));
if (kthread_should_stop())
break;
- set_bit(MTIP_FLAG_SVC_THD_ACTIVE_BIT, &port->flags);
- if (test_bit(MTIP_FLAG_ISSUE_CMDS_BIT, &port->flags)) {
+ if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
+ &dd->dd_flag)))
+ break;
+
+ set_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags);
+ if (test_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags)) {
slot = 1;
/* used to restrict the loop to one iteration */
slot_start = num_cmd_slots;
@@ -2480,21 +2822,19 @@ static int mtip_service_thread(void *data)
/* Issue the command to the hardware */
mtip_issue_ncq_command(port, slot);
- /* Set the command's timeout value.*/
- port->commands[slot].comp_time = jiffies +
- msecs_to_jiffies(MTIP_NCQ_COMMAND_TIMEOUT_MS);
-
clear_bit(slot, port->cmds_to_issue);
}
- clear_bit(MTIP_FLAG_ISSUE_CMDS_BIT, &port->flags);
- } else if (test_bit(MTIP_FLAG_REBUILD_BIT, &port->flags)) {
- mtip_ftl_rebuild_poll(dd);
- clear_bit(MTIP_FLAG_REBUILD_BIT, &port->flags);
+ clear_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags);
+ } else if (test_bit(MTIP_PF_REBUILD_BIT, &port->flags)) {
+ if (!mtip_ftl_rebuild_poll(dd))
+ set_bit(MTIP_DDF_REBUILD_FAILED_BIT,
+ &dd->dd_flag);
+ clear_bit(MTIP_PF_REBUILD_BIT, &port->flags);
}
- clear_bit(MTIP_FLAG_SVC_THD_ACTIVE_BIT, &port->flags);
+ clear_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags);
- if (test_bit(MTIP_FLAG_SVC_THD_SHOULD_STOP_BIT, &port->flags))
+ if (test_bit(MTIP_PF_SVC_THD_STOP_BIT, &port->flags))
break;
}
return 0;
@@ -2513,6 +2853,9 @@ static int mtip_hw_init(struct driver_data *dd)
int i;
int rv;
unsigned int num_command_slots;
+ unsigned long timeout, timetaken;
+ unsigned char *buf;
+ struct smart_attr attr242;
dd->mmio = pcim_iomap_table(dd->pdev)[MTIP_ABAR];
@@ -2547,7 +2890,7 @@ static int mtip_hw_init(struct driver_data *dd)
/* Allocate memory for the command list. */
dd->port->command_list =
dmam_alloc_coherent(&dd->pdev->dev,
- HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 2),
+ HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4),
&dd->port->command_list_dma,
GFP_KERNEL);
if (!dd->port->command_list) {
@@ -2560,7 +2903,7 @@ static int mtip_hw_init(struct driver_data *dd)
/* Clear the memory we have allocated. */
memset(dd->port->command_list,
0,
- HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 2));
+ HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4));
/* Setup the addresse of the RX FIS. */
dd->port->rxfis = dd->port->command_list + HW_CMD_SLOT_SZ;
@@ -2576,10 +2919,19 @@ static int mtip_hw_init(struct driver_data *dd)
dd->port->identify_dma = dd->port->command_tbl_dma +
HW_CMD_TBL_AR_SZ;
- /* Setup the address of the sector buffer. */
+ /* Setup the address of the sector buffer - for some non-ncq cmds */
dd->port->sector_buffer = (void *) dd->port->identify + ATA_SECT_SIZE;
dd->port->sector_buffer_dma = dd->port->identify_dma + ATA_SECT_SIZE;
+ /* Setup the address of the log buf - for read log command */
+ dd->port->log_buf = (void *)dd->port->sector_buffer + ATA_SECT_SIZE;
+ dd->port->log_buf_dma = dd->port->sector_buffer_dma + ATA_SECT_SIZE;
+
+ /* Setup the address of the smart buf - for smart read data command */
+ dd->port->smart_buf = (void *)dd->port->log_buf + ATA_SECT_SIZE;
+ dd->port->smart_buf_dma = dd->port->log_buf_dma + ATA_SECT_SIZE;
+
+
/* Point the command headers at the command tables. */
for (i = 0; i < num_command_slots; i++) {
dd->port->commands[i].command_header =
@@ -2623,14 +2975,43 @@ static int mtip_hw_init(struct driver_data *dd)
dd->port->mmio + i*0x80 + PORT_SDBV;
}
- /* Reset the HBA. */
- if (mtip_hba_reset(dd) < 0) {
- dev_err(&dd->pdev->dev,
- "Card did not reset within timeout\n");
- rv = -EIO;
+ timetaken = jiffies;
+ timeout = jiffies + msecs_to_jiffies(30000);
+ while (((readl(dd->port->mmio + PORT_SCR_STAT) & 0x0F) != 0x03) &&
+ time_before(jiffies, timeout)) {
+ mdelay(100);
+ }
+ if (unlikely(mtip_check_surprise_removal(dd->pdev))) {
+ timetaken = jiffies - timetaken;
+ dev_warn(&dd->pdev->dev,
+ "Surprise removal detected at %u ms\n",
+ jiffies_to_msecs(timetaken));
+ rv = -ENODEV;
+ goto out2 ;
+ }
+ if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag))) {
+ timetaken = jiffies - timetaken;
+ dev_warn(&dd->pdev->dev,
+ "Removal detected at %u ms\n",
+ jiffies_to_msecs(timetaken));
+ rv = -EFAULT;
goto out2;
}
+ /* Conditionally reset the HBA. */
+ if (!(readl(dd->mmio + HOST_CAP) & HOST_CAP_NZDMA)) {
+ if (mtip_hba_reset(dd) < 0) {
+ dev_err(&dd->pdev->dev,
+ "Card did not reset within timeout\n");
+ rv = -EIO;
+ goto out2;
+ }
+ } else {
+ /* Clear any pending interrupts on the HBA */
+ writel(readl(dd->mmio + HOST_IRQ_STAT),
+ dd->mmio + HOST_IRQ_STAT);
+ }
+
mtip_init_port(dd->port);
mtip_start_port(dd->port);
@@ -2660,6 +3041,12 @@ static int mtip_hw_init(struct driver_data *dd)
mod_timer(&dd->port->cmd_timer,
jiffies + msecs_to_jiffies(MTIP_TIMEOUT_CHECK_PERIOD));
+
+ if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag)) {
+ rv = -EFAULT;
+ goto out3;
+ }
+
if (mtip_get_identify(dd->port, NULL) < 0) {
rv = -EFAULT;
goto out3;
@@ -2667,10 +3054,47 @@ static int mtip_hw_init(struct driver_data *dd)
if (*(dd->port->identify + MTIP_FTL_REBUILD_OFFSET) ==
MTIP_FTL_REBUILD_MAGIC) {
- set_bit(MTIP_FLAG_REBUILD_BIT, &dd->port->flags);
+ set_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags);
return MTIP_FTL_REBUILD_MAGIC;
}
mtip_dump_identify(dd->port);
+
+ /* check write protect, over temp and rebuild statuses */
+ rv = mtip_read_log_page(dd->port, ATA_LOG_SATA_NCQ,
+ dd->port->log_buf,
+ dd->port->log_buf_dma, 1);
+ if (rv) {
+ dev_warn(&dd->pdev->dev,
+ "Error in READ LOG EXT (10h) command\n");
+ /* non-critical error, don't fail the load */
+ } else {
+ buf = (unsigned char *)dd->port->log_buf;
+ if (buf[259] & 0x1) {
+ dev_info(&dd->pdev->dev,
+ "Write protect bit is set.\n");
+ set_bit(MTIP_DDF_WRITE_PROTECT_BIT, &dd->dd_flag);
+ }
+ if (buf[288] == 0xF7) {
+ dev_info(&dd->pdev->dev,
+ "Exceeded Tmax, drive in thermal shutdown.\n");
+ set_bit(MTIP_DDF_OVER_TEMP_BIT, &dd->dd_flag);
+ }
+ if (buf[288] == 0xBF) {
+ dev_info(&dd->pdev->dev,
+ "Drive indicates rebuild has failed.\n");
+ /* TODO */
+ }
+ }
+
+ /* get write protect progess */
+ memset(&attr242, 0, sizeof(struct smart_attr));
+ if (mtip_get_smart_attr(dd->port, 242, &attr242))
+ dev_warn(&dd->pdev->dev,
+ "Unable to check write protect progress\n");
+ else
+ dev_info(&dd->pdev->dev,
+ "Write protect progress: %d%% (%d blocks)\n",
+ attr242.cur, attr242.data);
return rv;
out3:
@@ -2688,7 +3112,7 @@ out2:
/* Free the command/command header memory. */
dmam_free_coherent(&dd->pdev->dev,
- HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 2),
+ HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4),
dd->port->command_list,
dd->port->command_list_dma);
out1:
@@ -2712,9 +3136,12 @@ static int mtip_hw_exit(struct driver_data *dd)
* Send standby immediate (E0h) to the drive so that it
* saves its state.
*/
- if (atomic_read(&dd->drv_cleanup_done) != true) {
+ if (!test_bit(MTIP_DDF_CLEANUP_BIT, &dd->dd_flag)) {
- mtip_standby_immediate(dd->port);
+ if (!test_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags))
+ if (mtip_standby_immediate(dd->port))
+ dev_warn(&dd->pdev->dev,
+ "STANDBY IMMEDIATE failed\n");
/* de-initialize the port. */
mtip_deinit_port(dd->port);
@@ -2734,7 +3161,7 @@ static int mtip_hw_exit(struct driver_data *dd)
/* Free the command/command header memory. */
dmam_free_coherent(&dd->pdev->dev,
- HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 2),
+ HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4),
dd->port->command_list,
dd->port->command_list_dma);
/* Free the memory allocated for the for structure. */
@@ -2892,6 +3319,9 @@ static int mtip_block_ioctl(struct block_device *dev,
if (!dd)
return -ENOTTY;
+ if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag)))
+ return -ENOTTY;
+
switch (cmd) {
case BLKFLSBUF:
return -ENOTTY;
@@ -2927,6 +3357,9 @@ static int mtip_block_compat_ioctl(struct block_device *dev,
if (!dd)
return -ENOTTY;
+ if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag)))
+ return -ENOTTY;
+
switch (cmd) {
case BLKFLSBUF:
return -ENOTTY;
@@ -3049,6 +3482,24 @@ static void mtip_make_request(struct request_queue *queue, struct bio *bio)
int nents = 0;
int tag = 0;
+ if (unlikely(dd->dd_flag & MTIP_DDF_STOP_IO)) {
+ if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
+ &dd->dd_flag))) {
+ bio_endio(bio, -ENXIO);
+ return;
+ }
+ if (unlikely(test_bit(MTIP_DDF_OVER_TEMP_BIT, &dd->dd_flag))) {
+ bio_endio(bio, -ENODATA);
+ return;
+ }
+ if (unlikely(test_bit(MTIP_DDF_WRITE_PROTECT_BIT,
+ &dd->dd_flag) &&
+ bio_data_dir(bio))) {
+ bio_endio(bio, -ENODATA);
+ return;
+ }
+ }
+
if (unlikely(!bio_has_data(bio))) {
blk_queue_flush(queue, 0);
bio_endio(bio, 0);
@@ -3061,7 +3512,7 @@ static void mtip_make_request(struct request_queue *queue, struct bio *bio)
if (unlikely((bio)->bi_vcnt > MTIP_MAX_SG)) {
dev_warn(&dd->pdev->dev,
- "Maximum number of SGL entries exceeded");
+ "Maximum number of SGL entries exceeded\n");
bio_io_error(bio);
mtip_hw_release_scatterlist(dd, tag);
return;
@@ -3210,8 +3661,10 @@ skip_create_disk:
kobject_put(kobj);
}
- if (dd->mtip_svc_handler)
+ if (dd->mtip_svc_handler) {
+ set_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag);
return rv; /* service thread created for handling rebuild */
+ }
start_service_thread:
sprintf(thd_name, "mtip_svc_thd_%02d", index);
@@ -3220,12 +3673,15 @@ start_service_thread:
dd, thd_name);
if (IS_ERR(dd->mtip_svc_handler)) {
- printk(KERN_ERR "mtip32xx: service thread failed to start\n");
+ dev_err(&dd->pdev->dev, "service thread failed to start\n");
dd->mtip_svc_handler = NULL;
rv = -EFAULT;
goto kthread_run_error;
}
+ if (wait_for_rebuild == MTIP_FTL_REBUILD_MAGIC)
+ rv = wait_for_rebuild;
+
return rv;
kthread_run_error:
@@ -3266,16 +3722,18 @@ static int mtip_block_remove(struct driver_data *dd)
struct kobject *kobj;
if (dd->mtip_svc_handler) {
- set_bit(MTIP_FLAG_SVC_THD_SHOULD_STOP_BIT, &dd->port->flags);
+ set_bit(MTIP_PF_SVC_THD_STOP_BIT, &dd->port->flags);
wake_up_interruptible(&dd->port->svc_wait);
kthread_stop(dd->mtip_svc_handler);
}
- /* Clean up the sysfs attributes managed by the protocol layer. */
- kobj = kobject_get(&disk_to_dev(dd->disk)->kobj);
- if (kobj) {
- mtip_hw_sysfs_exit(dd, kobj);
- kobject_put(kobj);
+ /* Clean up the sysfs attributes, if created */
+ if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag)) {
+ kobj = kobject_get(&disk_to_dev(dd->disk)->kobj);
+ if (kobj) {
+ mtip_hw_sysfs_exit(dd, kobj);
+ kobject_put(kobj);
+ }
}
/*
@@ -3283,6 +3741,11 @@ static int mtip_block_remove(struct driver_data *dd)
* from /dev
*/
del_gendisk(dd->disk);
+
+ spin_lock(&rssd_index_lock);
+ ida_remove(&rssd_index_ida, dd->index);
+ spin_unlock(&rssd_index_lock);
+
blk_cleanup_queue(dd->queue);
dd->disk = NULL;
dd->queue = NULL;
@@ -3312,6 +3775,11 @@ static int mtip_block_shutdown(struct driver_data *dd)
/* Delete our gendisk structure, and cleanup the blk queue. */
del_gendisk(dd->disk);
+
+ spin_lock(&rssd_index_lock);
+ ida_remove(&rssd_index_ida, dd->index);
+ spin_unlock(&rssd_index_lock);
+
blk_cleanup_queue(dd->queue);
dd->disk = NULL;
dd->queue = NULL;
@@ -3359,11 +3827,6 @@ static int mtip_pci_probe(struct pci_dev *pdev,
return -ENOMEM;
}
- /* Set the atomic variable as 1 in case of SRSI */
- atomic_set(&dd->drv_cleanup_done, true);
-
- atomic_set(&dd->resumeflag, false);
-
/* Attach the private data to this PCI device. */
pci_set_drvdata(pdev, dd);
@@ -3420,7 +3883,8 @@ static int mtip_pci_probe(struct pci_dev *pdev,
* instance number.
*/
instance++;
-
+ if (rv != MTIP_FTL_REBUILD_MAGIC)
+ set_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag);
goto done;
block_initialize_err:
@@ -3434,9 +3898,6 @@ iomap_err:
pci_set_drvdata(pdev, NULL);
return rv;
done:
- /* Set the atomic variable as 0 in case of SRSI */
- atomic_set(&dd->drv_cleanup_done, true);
-
return rv;
}
@@ -3452,8 +3913,10 @@ static void mtip_pci_remove(struct pci_dev *pdev)
struct driver_data *dd = pci_get_drvdata(pdev);
int counter = 0;
+ set_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag);
+
if (mtip_check_surprise_removal(pdev)) {
- while (atomic_read(&dd->drv_cleanup_done) == false) {
+ while (!test_bit(MTIP_DDF_CLEANUP_BIT, &dd->dd_flag)) {
counter++;
msleep(20);
if (counter == 10) {
@@ -3463,8 +3926,6 @@ static void mtip_pci_remove(struct pci_dev *pdev)
}
}
}
- /* Set the atomic variable as 1 in case of SRSI */
- atomic_set(&dd->drv_cleanup_done, true);
/* Clean up the block layer. */
mtip_block_remove(dd);
@@ -3493,7 +3954,7 @@ static int mtip_pci_suspend(struct pci_dev *pdev, pm_message_t mesg)
return -EFAULT;
}
- atomic_set(&dd->resumeflag, true);
+ set_bit(MTIP_DDF_RESUME_BIT, &dd->dd_flag);
/* Disable ports & interrupts then send standby immediate */
rv = mtip_block_suspend(dd);
@@ -3559,7 +4020,7 @@ static int mtip_pci_resume(struct pci_dev *pdev)
dev_err(&pdev->dev, "Unable to resume\n");
err:
- atomic_set(&dd->resumeflag, false);
+ clear_bit(MTIP_DDF_RESUME_BIT, &dd->dd_flag);
return rv;
}
@@ -3608,18 +4069,25 @@ MODULE_DEVICE_TABLE(pci, mtip_pci_tbl);
*/
static int __init mtip_init(void)
{
+ int error;
+
printk(KERN_INFO MTIP_DRV_NAME " Version " MTIP_DRV_VERSION "\n");
/* Allocate a major block device number to use with this driver. */
- mtip_major = register_blkdev(0, MTIP_DRV_NAME);
- if (mtip_major < 0) {
+ error = register_blkdev(0, MTIP_DRV_NAME);
+ if (error <= 0) {
printk(KERN_ERR "Unable to register block device (%d)\n",
- mtip_major);
+ error);
return -EBUSY;
}
+ mtip_major = error;
/* Register our PCI operations. */
- return pci_register_driver(&mtip_pci_driver);
+ error = pci_register_driver(&mtip_pci_driver);
+ if (error)
+ unregister_blkdev(mtip_major, MTIP_DRV_NAME);
+
+ return error;
}
/*
diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h
index e0554a8f2233..4ef58336310a 100644
--- a/drivers/block/mtip32xx/mtip32xx.h
+++ b/drivers/block/mtip32xx/mtip32xx.h
@@ -34,8 +34,8 @@
/* offset of Device Control register in PCIe extended capabilites space */
#define PCIE_CONFIG_EXT_DEVICE_CONTROL_OFFSET 0x48
-/* # of times to retry timed out IOs */
-#define MTIP_MAX_RETRIES 5
+/* # of times to retry timed out/failed IOs */
+#define MTIP_MAX_RETRIES 2
/* Various timeout values in ms */
#define MTIP_NCQ_COMMAND_TIMEOUT_MS 5000
@@ -114,12 +114,41 @@
#define __force_bit2int (unsigned int __force)
/* below are bit numbers in 'flags' defined in mtip_port */
-#define MTIP_FLAG_IC_ACTIVE_BIT 0
-#define MTIP_FLAG_EH_ACTIVE_BIT 1
-#define MTIP_FLAG_SVC_THD_ACTIVE_BIT 2
-#define MTIP_FLAG_ISSUE_CMDS_BIT 4
-#define MTIP_FLAG_REBUILD_BIT 5
-#define MTIP_FLAG_SVC_THD_SHOULD_STOP_BIT 8
+#define MTIP_PF_IC_ACTIVE_BIT 0 /* pio/ioctl */
+#define MTIP_PF_EH_ACTIVE_BIT 1 /* error handling */
+#define MTIP_PF_SE_ACTIVE_BIT 2 /* secure erase */
+#define MTIP_PF_DM_ACTIVE_BIT 3 /* download microcde */
+#define MTIP_PF_PAUSE_IO ((1 << MTIP_PF_IC_ACTIVE_BIT) | \
+ (1 << MTIP_PF_EH_ACTIVE_BIT) | \
+ (1 << MTIP_PF_SE_ACTIVE_BIT) | \
+ (1 << MTIP_PF_DM_ACTIVE_BIT))
+
+#define MTIP_PF_SVC_THD_ACTIVE_BIT 4
+#define MTIP_PF_ISSUE_CMDS_BIT 5
+#define MTIP_PF_REBUILD_BIT 6
+#define MTIP_PF_SVC_THD_STOP_BIT 8
+
+/* below are bit numbers in 'dd_flag' defined in driver_data */
+#define MTIP_DDF_REMOVE_PENDING_BIT 1
+#define MTIP_DDF_OVER_TEMP_BIT 2
+#define MTIP_DDF_WRITE_PROTECT_BIT 3
+#define MTIP_DDF_STOP_IO ((1 << MTIP_DDF_REMOVE_PENDING_BIT) | \
+ (1 << MTIP_DDF_OVER_TEMP_BIT) | \
+ (1 << MTIP_DDF_WRITE_PROTECT_BIT))
+
+#define MTIP_DDF_CLEANUP_BIT 5
+#define MTIP_DDF_RESUME_BIT 6
+#define MTIP_DDF_INIT_DONE_BIT 7
+#define MTIP_DDF_REBUILD_FAILED_BIT 8
+
+__packed struct smart_attr{
+ u8 attr_id;
+ u16 flags;
+ u8 cur;
+ u8 worst;
+ u32 data;
+ u8 res[3];
+};
/* Register Frame Information Structure (FIS), host to device. */
struct host_to_dev_fis {
@@ -345,6 +374,12 @@ struct mtip_port {
* when the command slot and all associated data structures
* are no longer needed.
*/
+ u16 *log_buf;
+ dma_addr_t log_buf_dma;
+
+ u8 *smart_buf;
+ dma_addr_t smart_buf_dma;
+
unsigned long allocated[SLOTBITS_IN_LONGS];
/*
* used to queue commands when an internal command is in progress
@@ -368,6 +403,7 @@ struct mtip_port {
* Timer used to complete commands that have been active for too long.
*/
struct timer_list cmd_timer;
+ unsigned long ic_pause_timer;
/*
* Semaphore used to block threads if there are no
* command slots available.
@@ -404,13 +440,9 @@ struct driver_data {
unsigned slot_groups; /* number of slot groups the product supports */
- atomic_t drv_cleanup_done; /* Atomic variable for SRSI */
-
unsigned long index; /* Index to determine the disk name */
- unsigned int ftlrebuildflag; /* FTL rebuild flag */
-
- atomic_t resumeflag; /* Atomic variable to track suspend/resume */
+ unsigned long dd_flag; /* NOTE: use atomic bit operations on this */
struct task_struct *mtip_svc_handler; /* task_struct of svc thd */
};
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index c3f0ee16594d..061427a75d37 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -34,12 +34,11 @@
#include <linux/kthread.h>
#include <asm/uaccess.h>
-#include <asm/system.h>
#include <asm/types.h>
#include <linux/nbd.h>
-#define LO_MAGIC 0x68797548
+#define NBD_MAGIC 0x68797548
#ifdef NDEBUG
#define dprintk(flags, fmt...)
@@ -116,7 +115,7 @@ static void nbd_end_request(struct request *req)
spin_unlock_irqrestore(q->queue_lock, flags);
}
-static void sock_shutdown(struct nbd_device *lo, int lock)
+static void sock_shutdown(struct nbd_device *nbd, int lock)
{
/* Forcibly shutdown the socket causing all listeners
* to error
@@ -125,14 +124,14 @@ static void sock_shutdown(struct nbd_device *lo, int lock)
* there should be a more generic interface rather than
* calling socket ops directly here */
if (lock)
- mutex_lock(&lo->tx_lock);
- if (lo->sock) {
- dev_warn(disk_to_dev(lo->disk), "shutting down socket\n");
- kernel_sock_shutdown(lo->sock, SHUT_RDWR);
- lo->sock = NULL;
+ mutex_lock(&nbd->tx_lock);
+ if (nbd->sock) {
+ dev_warn(disk_to_dev(nbd->disk), "shutting down socket\n");
+ kernel_sock_shutdown(nbd->sock, SHUT_RDWR);
+ nbd->sock = NULL;
}
if (lock)
- mutex_unlock(&lo->tx_lock);
+ mutex_unlock(&nbd->tx_lock);
}
static void nbd_xmit_timeout(unsigned long arg)
@@ -147,17 +146,17 @@ static void nbd_xmit_timeout(unsigned long arg)
/*
* Send or receive packet.
*/
-static int sock_xmit(struct nbd_device *lo, int send, void *buf, int size,
+static int sock_xmit(struct nbd_device *nbd, int send, void *buf, int size,
int msg_flags)
{
- struct socket *sock = lo->sock;
+ struct socket *sock = nbd->sock;
int result;
struct msghdr msg;
struct kvec iov;
sigset_t blocked, oldset;
if (unlikely(!sock)) {
- dev_err(disk_to_dev(lo->disk),
+ dev_err(disk_to_dev(nbd->disk),
"Attempted %s on closed socket in sock_xmit\n",
(send ? "send" : "recv"));
return -EINVAL;
@@ -181,15 +180,15 @@ static int sock_xmit(struct nbd_device *lo, int send, void *buf, int size,
if (send) {
struct timer_list ti;
- if (lo->xmit_timeout) {
+ if (nbd->xmit_timeout) {
init_timer(&ti);
ti.function = nbd_xmit_timeout;
ti.data = (unsigned long)current;
- ti.expires = jiffies + lo->xmit_timeout;
+ ti.expires = jiffies + nbd->xmit_timeout;
add_timer(&ti);
}
result = kernel_sendmsg(sock, &msg, &iov, 1, size);
- if (lo->xmit_timeout)
+ if (nbd->xmit_timeout)
del_timer_sync(&ti);
} else
result = kernel_recvmsg(sock, &msg, &iov, 1, size,
@@ -201,7 +200,7 @@ static int sock_xmit(struct nbd_device *lo, int send, void *buf, int size,
task_pid_nr(current), current->comm,
dequeue_signal_lock(current, &current->blocked, &info));
result = -EINTR;
- sock_shutdown(lo, !send);
+ sock_shutdown(nbd, !send);
break;
}
@@ -219,18 +218,19 @@ static int sock_xmit(struct nbd_device *lo, int send, void *buf, int size,
return result;
}
-static inline int sock_send_bvec(struct nbd_device *lo, struct bio_vec *bvec,
+static inline int sock_send_bvec(struct nbd_device *nbd, struct bio_vec *bvec,
int flags)
{
int result;
void *kaddr = kmap(bvec->bv_page);
- result = sock_xmit(lo, 1, kaddr + bvec->bv_offset, bvec->bv_len, flags);
+ result = sock_xmit(nbd, 1, kaddr + bvec->bv_offset,
+ bvec->bv_len, flags);
kunmap(bvec->bv_page);
return result;
}
/* always call with the tx_lock held */
-static int nbd_send_req(struct nbd_device *lo, struct request *req)
+static int nbd_send_req(struct nbd_device *nbd, struct request *req)
{
int result, flags;
struct nbd_request request;
@@ -243,14 +243,14 @@ static int nbd_send_req(struct nbd_device *lo, struct request *req)
memcpy(request.handle, &req, sizeof(req));
dprintk(DBG_TX, "%s: request %p: sending control (%s@%llu,%uB)\n",
- lo->disk->disk_name, req,
+ nbd->disk->disk_name, req,
nbdcmd_to_ascii(nbd_cmd(req)),
(unsigned long long)blk_rq_pos(req) << 9,
blk_rq_bytes(req));
- result = sock_xmit(lo, 1, &request, sizeof(request),
+ result = sock_xmit(nbd, 1, &request, sizeof(request),
(nbd_cmd(req) == NBD_CMD_WRITE) ? MSG_MORE : 0);
if (result <= 0) {
- dev_err(disk_to_dev(lo->disk),
+ dev_err(disk_to_dev(nbd->disk),
"Send control failed (result %d)\n", result);
goto error_out;
}
@@ -267,10 +267,10 @@ static int nbd_send_req(struct nbd_device *lo, struct request *req)
if (!rq_iter_last(req, iter))
flags = MSG_MORE;
dprintk(DBG_TX, "%s: request %p: sending %d bytes data\n",
- lo->disk->disk_name, req, bvec->bv_len);
- result = sock_send_bvec(lo, bvec, flags);
+ nbd->disk->disk_name, req, bvec->bv_len);
+ result = sock_send_bvec(nbd, bvec, flags);
if (result <= 0) {
- dev_err(disk_to_dev(lo->disk),
+ dev_err(disk_to_dev(nbd->disk),
"Send data failed (result %d)\n",
result);
goto error_out;
@@ -283,25 +283,25 @@ error_out:
return -EIO;
}
-static struct request *nbd_find_request(struct nbd_device *lo,
+static struct request *nbd_find_request(struct nbd_device *nbd,
struct request *xreq)
{
struct request *req, *tmp;
int err;
- err = wait_event_interruptible(lo->active_wq, lo->active_req != xreq);
+ err = wait_event_interruptible(nbd->active_wq, nbd->active_req != xreq);
if (unlikely(err))
goto out;
- spin_lock(&lo->queue_lock);
- list_for_each_entry_safe(req, tmp, &lo->queue_head, queuelist) {
+ spin_lock(&nbd->queue_lock);
+ list_for_each_entry_safe(req, tmp, &nbd->queue_head, queuelist) {
if (req != xreq)
continue;
list_del_init(&req->queuelist);
- spin_unlock(&lo->queue_lock);
+ spin_unlock(&nbd->queue_lock);
return req;
}
- spin_unlock(&lo->queue_lock);
+ spin_unlock(&nbd->queue_lock);
err = -ENOENT;
@@ -309,78 +309,78 @@ out:
return ERR_PTR(err);
}
-static inline int sock_recv_bvec(struct nbd_device *lo, struct bio_vec *bvec)
+static inline int sock_recv_bvec(struct nbd_device *nbd, struct bio_vec *bvec)
{
int result;
void *kaddr = kmap(bvec->bv_page);
- result = sock_xmit(lo, 0, kaddr + bvec->bv_offset, bvec->bv_len,
+ result = sock_xmit(nbd, 0, kaddr + bvec->bv_offset, bvec->bv_len,
MSG_WAITALL);
kunmap(bvec->bv_page);
return result;
}
/* NULL returned = something went wrong, inform userspace */
-static struct request *nbd_read_stat(struct nbd_device *lo)
+static struct request *nbd_read_stat(struct nbd_device *nbd)
{
int result;
struct nbd_reply reply;
struct request *req;
reply.magic = 0;
- result = sock_xmit(lo, 0, &reply, sizeof(reply), MSG_WAITALL);
+ result = sock_xmit(nbd, 0, &reply, sizeof(reply), MSG_WAITALL);
if (result <= 0) {
- dev_err(disk_to_dev(lo->disk),
+ dev_err(disk_to_dev(nbd->disk),
"Receive control failed (result %d)\n", result);
goto harderror;
}
if (ntohl(reply.magic) != NBD_REPLY_MAGIC) {
- dev_err(disk_to_dev(lo->disk), "Wrong magic (0x%lx)\n",
+ dev_err(disk_to_dev(nbd->disk), "Wrong magic (0x%lx)\n",
(unsigned long)ntohl(reply.magic));
result = -EPROTO;
goto harderror;
}
- req = nbd_find_request(lo, *(struct request **)reply.handle);
+ req = nbd_find_request(nbd, *(struct request **)reply.handle);
if (IS_ERR(req)) {
result = PTR_ERR(req);
if (result != -ENOENT)
goto harderror;
- dev_err(disk_to_dev(lo->disk), "Unexpected reply (%p)\n",
+ dev_err(disk_to_dev(nbd->disk), "Unexpected reply (%p)\n",
reply.handle);
result = -EBADR;
goto harderror;
}
if (ntohl(reply.error)) {
- dev_err(disk_to_dev(lo->disk), "Other side returned error (%d)\n",
+ dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n",
ntohl(reply.error));
req->errors++;
return req;
}
dprintk(DBG_RX, "%s: request %p: got reply\n",
- lo->disk->disk_name, req);
+ nbd->disk->disk_name, req);
if (nbd_cmd(req) == NBD_CMD_READ) {
struct req_iterator iter;
struct bio_vec *bvec;
rq_for_each_segment(bvec, req, iter) {
- result = sock_recv_bvec(lo, bvec);
+ result = sock_recv_bvec(nbd, bvec);
if (result <= 0) {
- dev_err(disk_to_dev(lo->disk), "Receive data failed (result %d)\n",
+ dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n",
result);
req->errors++;
return req;
}
dprintk(DBG_RX, "%s: request %p: got %d bytes data\n",
- lo->disk->disk_name, req, bvec->bv_len);
+ nbd->disk->disk_name, req, bvec->bv_len);
}
}
return req;
harderror:
- lo->harderror = result;
+ nbd->harderror = result;
return NULL;
}
@@ -398,48 +398,48 @@ static struct device_attribute pid_attr = {
.show = pid_show,
};
-static int nbd_do_it(struct nbd_device *lo)
+static int nbd_do_it(struct nbd_device *nbd)
{
struct request *req;
int ret;
- BUG_ON(lo->magic != LO_MAGIC);
+ BUG_ON(nbd->magic != NBD_MAGIC);
- lo->pid = task_pid_nr(current);
- ret = device_create_file(disk_to_dev(lo->disk), &pid_attr);
+ nbd->pid = task_pid_nr(current);
+ ret = device_create_file(disk_to_dev(nbd->disk), &pid_attr);
if (ret) {
- dev_err(disk_to_dev(lo->disk), "device_create_file failed!\n");
- lo->pid = 0;
+ dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n");
+ nbd->pid = 0;
return ret;
}
- while ((req = nbd_read_stat(lo)) != NULL)
+ while ((req = nbd_read_stat(nbd)) != NULL)
nbd_end_request(req);
- device_remove_file(disk_to_dev(lo->disk), &pid_attr);
- lo->pid = 0;
+ device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
+ nbd->pid = 0;
return 0;
}
-static void nbd_clear_que(struct nbd_device *lo)
+static void nbd_clear_que(struct nbd_device *nbd)
{
struct request *req;
- BUG_ON(lo->magic != LO_MAGIC);
+ BUG_ON(nbd->magic != NBD_MAGIC);
/*
- * Because we have set lo->sock to NULL under the tx_lock, all
+ * Because we have set nbd->sock to NULL under the tx_lock, all
* modifications to the list must have completed by now. For
* the same reason, the active_req must be NULL.
*
* As a consequence, we don't need to take the spin lock while
* purging the list here.
*/
- BUG_ON(lo->sock);
- BUG_ON(lo->active_req);
+ BUG_ON(nbd->sock);
+ BUG_ON(nbd->active_req);
- while (!list_empty(&lo->queue_head)) {
- req = list_entry(lo->queue_head.next, struct request,
+ while (!list_empty(&nbd->queue_head)) {
+ req = list_entry(nbd->queue_head.next, struct request,
queuelist);
list_del_init(&req->queuelist);
req->errors++;
@@ -448,7 +448,7 @@ static void nbd_clear_que(struct nbd_device *lo)
}
-static void nbd_handle_req(struct nbd_device *lo, struct request *req)
+static void nbd_handle_req(struct nbd_device *nbd, struct request *req)
{
if (req->cmd_type != REQ_TYPE_FS)
goto error_out;
@@ -456,8 +456,8 @@ static void nbd_handle_req(struct nbd_device *lo, struct request *req)
nbd_cmd(req) = NBD_CMD_READ;
if (rq_data_dir(req) == WRITE) {
nbd_cmd(req) = NBD_CMD_WRITE;
- if (lo->flags & NBD_READ_ONLY) {
- dev_err(disk_to_dev(lo->disk),
+ if (nbd->flags & NBD_READ_ONLY) {
+ dev_err(disk_to_dev(nbd->disk),
"Write on read-only\n");
goto error_out;
}
@@ -465,29 +465,29 @@ static void nbd_handle_req(struct nbd_device *lo, struct request *req)
req->errors = 0;
- mutex_lock(&lo->tx_lock);
- if (unlikely(!lo->sock)) {
- mutex_unlock(&lo->tx_lock);
- dev_err(disk_to_dev(lo->disk),
+ mutex_lock(&nbd->tx_lock);
+ if (unlikely(!nbd->sock)) {
+ mutex_unlock(&nbd->tx_lock);
+ dev_err(disk_to_dev(nbd->disk),
"Attempted send on closed socket\n");
goto error_out;
}
- lo->active_req = req;
+ nbd->active_req = req;
- if (nbd_send_req(lo, req) != 0) {
- dev_err(disk_to_dev(lo->disk), "Request send failed\n");
+ if (nbd_send_req(nbd, req) != 0) {
+ dev_err(disk_to_dev(nbd->disk), "Request send failed\n");
req->errors++;
nbd_end_request(req);
} else {
- spin_lock(&lo->queue_lock);
- list_add(&req->queuelist, &lo->queue_head);
- spin_unlock(&lo->queue_lock);
+ spin_lock(&nbd->queue_lock);
+ list_add(&req->queuelist, &nbd->queue_head);
+ spin_unlock(&nbd->queue_lock);
}
- lo->active_req = NULL;
- mutex_unlock(&lo->tx_lock);
- wake_up_all(&lo->active_wq);
+ nbd->active_req = NULL;
+ mutex_unlock(&nbd->tx_lock);
+ wake_up_all(&nbd->active_wq);
return;
@@ -498,28 +498,28 @@ error_out:
static int nbd_thread(void *data)
{
- struct nbd_device *lo = data;
+ struct nbd_device *nbd = data;
struct request *req;
set_user_nice(current, -20);
- while (!kthread_should_stop() || !list_empty(&lo->waiting_queue)) {
+ while (!kthread_should_stop() || !list_empty(&nbd->waiting_queue)) {
/* wait for something to do */
- wait_event_interruptible(lo->waiting_wq,
+ wait_event_interruptible(nbd->waiting_wq,
kthread_should_stop() ||
- !list_empty(&lo->waiting_queue));
+ !list_empty(&nbd->waiting_queue));
/* extract request */
- if (list_empty(&lo->waiting_queue))
+ if (list_empty(&nbd->waiting_queue))
continue;
- spin_lock_irq(&lo->queue_lock);
- req = list_entry(lo->waiting_queue.next, struct request,
+ spin_lock_irq(&nbd->queue_lock);
+ req = list_entry(nbd->waiting_queue.next, struct request,
queuelist);
list_del_init(&req->queuelist);
- spin_unlock_irq(&lo->queue_lock);
+ spin_unlock_irq(&nbd->queue_lock);
/* handle request */
- nbd_handle_req(lo, req);
+ nbd_handle_req(nbd, req);
}
return 0;
}
@@ -527,7 +527,7 @@ static int nbd_thread(void *data)
/*
* We always wait for result of write, for now. It would be nice to make it optional
* in future
- * if ((rq_data_dir(req) == WRITE) && (lo->flags & NBD_WRITE_NOCHK))
+ * if ((rq_data_dir(req) == WRITE) && (nbd->flags & NBD_WRITE_NOCHK))
* { printk( "Warning: Ignoring result!\n"); nbd_end_request( req ); }
*/
@@ -536,19 +536,19 @@ static void do_nbd_request(struct request_queue *q)
struct request *req;
while ((req = blk_fetch_request(q)) != NULL) {
- struct nbd_device *lo;
+ struct nbd_device *nbd;
spin_unlock_irq(q->queue_lock);
dprintk(DBG_BLKDEV, "%s: request %p: dequeued (flags=%x)\n",
req->rq_disk->disk_name, req, req->cmd_type);
- lo = req->rq_disk->private_data;
+ nbd = req->rq_disk->private_data;
- BUG_ON(lo->magic != LO_MAGIC);
+ BUG_ON(nbd->magic != NBD_MAGIC);
- if (unlikely(!lo->sock)) {
- dev_err(disk_to_dev(lo->disk),
+ if (unlikely(!nbd->sock)) {
+ dev_err(disk_to_dev(nbd->disk),
"Attempted send on closed socket\n");
req->errors++;
nbd_end_request(req);
@@ -556,11 +556,11 @@ static void do_nbd_request(struct request_queue *q)
continue;
}
- spin_lock_irq(&lo->queue_lock);
- list_add_tail(&req->queuelist, &lo->waiting_queue);
- spin_unlock_irq(&lo->queue_lock);
+ spin_lock_irq(&nbd->queue_lock);
+ list_add_tail(&req->queuelist, &nbd->waiting_queue);
+ spin_unlock_irq(&nbd->queue_lock);
- wake_up(&lo->waiting_wq);
+ wake_up(&nbd->waiting_wq);
spin_lock_irq(q->queue_lock);
}
@@ -568,32 +568,32 @@ static void do_nbd_request(struct request_queue *q)
/* Must be called with tx_lock held */
-static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo,
+static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
unsigned int cmd, unsigned long arg)
{
switch (cmd) {
case NBD_DISCONNECT: {
struct request sreq;
- dev_info(disk_to_dev(lo->disk), "NBD_DISCONNECT\n");
+ dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n");
blk_rq_init(NULL, &sreq);
sreq.cmd_type = REQ_TYPE_SPECIAL;
nbd_cmd(&sreq) = NBD_CMD_DISC;
- if (!lo->sock)
+ if (!nbd->sock)
return -EINVAL;
- nbd_send_req(lo, &sreq);
+ nbd_send_req(nbd, &sreq);
return 0;
}
case NBD_CLEAR_SOCK: {
struct file *file;
- lo->sock = NULL;
- file = lo->file;
- lo->file = NULL;
- nbd_clear_que(lo);
- BUG_ON(!list_empty(&lo->queue_head));
+ nbd->sock = NULL;
+ file = nbd->file;
+ nbd->file = NULL;
+ nbd_clear_que(nbd);
+ BUG_ON(!list_empty(&nbd->queue_head));
if (file)
fput(file);
return 0;
@@ -601,14 +601,14 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo,
case NBD_SET_SOCK: {
struct file *file;
- if (lo->file)
+ if (nbd->file)
return -EBUSY;
file = fget(arg);
if (file) {
struct inode *inode = file->f_path.dentry->d_inode;
if (S_ISSOCK(inode->i_mode)) {
- lo->file = file;
- lo->sock = SOCKET_I(inode);
+ nbd->file = file;
+ nbd->sock = SOCKET_I(inode);
if (max_part > 0)
bdev->bd_invalidated = 1;
return 0;
@@ -620,29 +620,29 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo,
}
case NBD_SET_BLKSIZE:
- lo->blksize = arg;
- lo->bytesize &= ~(lo->blksize-1);
- bdev->bd_inode->i_size = lo->bytesize;
- set_blocksize(bdev, lo->blksize);
- set_capacity(lo->disk, lo->bytesize >> 9);
+ nbd->blksize = arg;
+ nbd->bytesize &= ~(nbd->blksize-1);
+ bdev->bd_inode->i_size = nbd->bytesize;
+ set_blocksize(bdev, nbd->blksize);
+ set_capacity(nbd->disk, nbd->bytesize >> 9);
return 0;
case NBD_SET_SIZE:
- lo->bytesize = arg & ~(lo->blksize-1);
- bdev->bd_inode->i_size = lo->bytesize;
- set_blocksize(bdev, lo->blksize);
- set_capacity(lo->disk, lo->bytesize >> 9);
+ nbd->bytesize = arg & ~(nbd->blksize-1);
+ bdev->bd_inode->i_size = nbd->bytesize;
+ set_blocksize(bdev, nbd->blksize);
+ set_capacity(nbd->disk, nbd->bytesize >> 9);
return 0;
case NBD_SET_TIMEOUT:
- lo->xmit_timeout = arg * HZ;
+ nbd->xmit_timeout = arg * HZ;
return 0;
case NBD_SET_SIZE_BLOCKS:
- lo->bytesize = ((u64) arg) * lo->blksize;
- bdev->bd_inode->i_size = lo->bytesize;
- set_blocksize(bdev, lo->blksize);
- set_capacity(lo->disk, lo->bytesize >> 9);
+ nbd->bytesize = ((u64) arg) * nbd->blksize;
+ bdev->bd_inode->i_size = nbd->bytesize;
+ set_blocksize(bdev, nbd->blksize);
+ set_capacity(nbd->disk, nbd->bytesize >> 9);
return 0;
case NBD_DO_IT: {
@@ -650,38 +650,38 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo,
struct file *file;
int error;
- if (lo->pid)
+ if (nbd->pid)
return -EBUSY;
- if (!lo->file)
+ if (!nbd->file)
return -EINVAL;
- mutex_unlock(&lo->tx_lock);
+ mutex_unlock(&nbd->tx_lock);
- thread = kthread_create(nbd_thread, lo, lo->disk->disk_name);
+ thread = kthread_create(nbd_thread, nbd, nbd->disk->disk_name);
if (IS_ERR(thread)) {
- mutex_lock(&lo->tx_lock);
+ mutex_lock(&nbd->tx_lock);
return PTR_ERR(thread);
}
wake_up_process(thread);
- error = nbd_do_it(lo);
+ error = nbd_do_it(nbd);
kthread_stop(thread);
- mutex_lock(&lo->tx_lock);
+ mutex_lock(&nbd->tx_lock);
if (error)
return error;
- sock_shutdown(lo, 0);
- file = lo->file;
- lo->file = NULL;
- nbd_clear_que(lo);
- dev_warn(disk_to_dev(lo->disk), "queue cleared\n");
+ sock_shutdown(nbd, 0);
+ file = nbd->file;
+ nbd->file = NULL;
+ nbd_clear_que(nbd);
+ dev_warn(disk_to_dev(nbd->disk), "queue cleared\n");
if (file)
fput(file);
- lo->bytesize = 0;
+ nbd->bytesize = 0;
bdev->bd_inode->i_size = 0;
- set_capacity(lo->disk, 0);
+ set_capacity(nbd->disk, 0);
if (max_part > 0)
ioctl_by_bdev(bdev, BLKRRPART, 0);
- return lo->harderror;
+ return nbd->harderror;
}
case NBD_CLEAR_QUE:
@@ -689,14 +689,14 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo,
* This is for compatibility only. The queue is always cleared
* by NBD_DO_IT or NBD_CLEAR_SOCK.
*/
- BUG_ON(!lo->sock && !list_empty(&lo->queue_head));
+ BUG_ON(!nbd->sock && !list_empty(&nbd->queue_head));
return 0;
case NBD_PRINT_DEBUG:
- dev_info(disk_to_dev(lo->disk),
+ dev_info(disk_to_dev(nbd->disk),
"next = %p, prev = %p, head = %p\n",
- lo->queue_head.next, lo->queue_head.prev,
- &lo->queue_head);
+ nbd->queue_head.next, nbd->queue_head.prev,
+ &nbd->queue_head);
return 0;
}
return -ENOTTY;
@@ -705,21 +705,21 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo,
static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg)
{
- struct nbd_device *lo = bdev->bd_disk->private_data;
+ struct nbd_device *nbd = bdev->bd_disk->private_data;
int error;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- BUG_ON(lo->magic != LO_MAGIC);
+ BUG_ON(nbd->magic != NBD_MAGIC);
/* Anyone capable of this syscall can do *real bad* things */
dprintk(DBG_IOCTL, "%s: nbd_ioctl cmd=%s(0x%x) arg=%lu\n",
- lo->disk->disk_name, ioctl_cmd_to_ascii(cmd), cmd, arg);
+ nbd->disk->disk_name, ioctl_cmd_to_ascii(cmd), cmd, arg);
- mutex_lock(&lo->tx_lock);
- error = __nbd_ioctl(bdev, lo, cmd, arg);
- mutex_unlock(&lo->tx_lock);
+ mutex_lock(&nbd->tx_lock);
+ error = __nbd_ioctl(bdev, nbd, cmd, arg);
+ mutex_unlock(&nbd->tx_lock);
return error;
}
@@ -805,7 +805,7 @@ static int __init nbd_init(void)
for (i = 0; i < nbds_max; i++) {
struct gendisk *disk = nbd_dev[i].disk;
nbd_dev[i].file = NULL;
- nbd_dev[i].magic = LO_MAGIC;
+ nbd_dev[i].magic = NBD_MAGIC;
nbd_dev[i].flags = 0;
INIT_LIST_HEAD(&nbd_dev[i].waiting_queue);
spin_lock_init(&nbd_dev[i].queue_lock);
diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c
index 1f3c1a7d132a..38a2d0631882 100644
--- a/drivers/block/nvme.c
+++ b/drivers/block/nvme.c
@@ -39,7 +39,6 @@
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/types.h>
-#include <linux/version.h>
#include <asm-generic/io-64-nonatomic-lo-hi.h>
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index d59edeabd93f..ba66e4445f41 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -987,14 +987,14 @@ static void pkt_copy_bio_data(struct bio *src_bio, int seg, int offs, struct pag
while (copy_size > 0) {
struct bio_vec *src_bvl = bio_iovec_idx(src_bio, seg);
- void *vfrom = kmap_atomic(src_bvl->bv_page, KM_USER0) +
+ void *vfrom = kmap_atomic(src_bvl->bv_page) +
src_bvl->bv_offset + offs;
void *vto = page_address(dst_page) + dst_offs;
int len = min_t(int, copy_size, src_bvl->bv_len - offs);
BUG_ON(len < 0);
memcpy(vto, vfrom, len);
- kunmap_atomic(vfrom, KM_USER0);
+ kunmap_atomic(vfrom);
seg++;
offs = 0;
@@ -1019,10 +1019,10 @@ static void pkt_make_local_copy(struct packet_data *pkt, struct bio_vec *bvec)
offs = 0;
for (f = 0; f < pkt->frames; f++) {
if (bvec[f].bv_page != pkt->pages[p]) {
- void *vfrom = kmap_atomic(bvec[f].bv_page, KM_USER0) + bvec[f].bv_offset;
+ void *vfrom = kmap_atomic(bvec[f].bv_page) + bvec[f].bv_offset;
void *vto = page_address(pkt->pages[p]) + offs;
memcpy(vto, vfrom, CD_FRAMESIZE);
- kunmap_atomic(vfrom, KM_USER0);
+ kunmap_atomic(vfrom);
bvec[f].bv_page = pkt->pages[p];
bvec[f].bv_offset = offs;
} else {
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index a6278e7e61a0..013c7a549fb6 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -41,19 +41,35 @@
#include "rbd_types.h"
-#define DRV_NAME "rbd"
-#define DRV_NAME_LONG "rbd (rados block device)"
+/*
+ * The basic unit of block I/O is a sector. It is interpreted in a
+ * number of contexts in Linux (blk, bio, genhd), but the default is
+ * universally 512 bytes. These symbols are just slightly more
+ * meaningful than the bare numbers they represent.
+ */
+#define SECTOR_SHIFT 9
+#define SECTOR_SIZE (1ULL << SECTOR_SHIFT)
+
+#define RBD_DRV_NAME "rbd"
+#define RBD_DRV_NAME_LONG "rbd (rados block device)"
#define RBD_MINORS_PER_MAJOR 256 /* max minors per blkdev */
-#define RBD_MAX_MD_NAME_LEN (96 + sizeof(RBD_SUFFIX))
+#define RBD_MAX_MD_NAME_LEN (RBD_MAX_OBJ_NAME_LEN + sizeof(RBD_SUFFIX))
#define RBD_MAX_POOL_NAME_LEN 64
#define RBD_MAX_SNAP_NAME_LEN 32
#define RBD_MAX_OPT_LEN 1024
#define RBD_SNAP_HEAD_NAME "-"
+/*
+ * An RBD device name will be "rbd#", where the "rbd" comes from
+ * RBD_DRV_NAME above, and # is a unique integer identifier.
+ * MAX_INT_FORMAT_WIDTH is used in ensuring DEV_NAME_LEN is big
+ * enough to hold all possible device names.
+ */
#define DEV_NAME_LEN 32
+#define MAX_INT_FORMAT_WIDTH ((5 * sizeof (int)) / 2 + 1)
#define RBD_NOTIFY_TIMEOUT_DEFAULT 10
@@ -66,7 +82,6 @@ struct rbd_image_header {
__u8 obj_order;
__u8 crypt_type;
__u8 comp_type;
- struct rw_semaphore snap_rwsem;
struct ceph_snap_context *snapc;
size_t snap_names_len;
u64 snap_seq;
@@ -83,7 +98,7 @@ struct rbd_options {
};
/*
- * an instance of the client. multiple devices may share a client.
+ * an instance of the client. multiple devices may share an rbd client.
*/
struct rbd_client {
struct ceph_client *client;
@@ -92,20 +107,9 @@ struct rbd_client {
struct list_head node;
};
-struct rbd_req_coll;
-
/*
- * a single io request
+ * a request completion status
*/
-struct rbd_request {
- struct request *rq; /* blk layer request */
- struct bio *bio; /* cloned bio */
- struct page **pages; /* list of used pages */
- u64 len;
- int coll_index;
- struct rbd_req_coll *coll;
-};
-
struct rbd_req_status {
int done;
int rc;
@@ -122,6 +126,18 @@ struct rbd_req_coll {
struct rbd_req_status status[0];
};
+/*
+ * a single io request
+ */
+struct rbd_request {
+ struct request *rq; /* blk layer request */
+ struct bio *bio; /* cloned bio */
+ struct page **pages; /* list of used pages */
+ u64 len;
+ int coll_index;
+ struct rbd_req_coll *coll;
+};
+
struct rbd_snap {
struct device dev;
const char *name;
@@ -140,7 +156,6 @@ struct rbd_device {
struct gendisk *disk; /* blkdev's gendisk and rq */
struct request_queue *q;
- struct ceph_client *client;
struct rbd_client *rbd_client;
char name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */
@@ -157,6 +172,8 @@ struct rbd_device {
struct ceph_osd_event *watch_event;
struct ceph_osd_request *watch_request;
+ /* protects updating the header */
+ struct rw_semaphore header_rwsem;
char snap_name[RBD_MAX_SNAP_NAME_LEN];
u32 cur_snap; /* index+1 of current snapshot within snap context
0 - for the head */
@@ -171,15 +188,13 @@ struct rbd_device {
struct device dev;
};
-static struct bus_type rbd_bus_type = {
- .name = "rbd",
-};
-
-static spinlock_t node_lock; /* protects client get/put */
-
static DEFINE_MUTEX(ctl_mutex); /* Serialize open/close/setup/teardown */
+
static LIST_HEAD(rbd_dev_list); /* devices */
-static LIST_HEAD(rbd_client_list); /* clients */
+static DEFINE_SPINLOCK(rbd_dev_list_lock);
+
+static LIST_HEAD(rbd_client_list); /* clients */
+static DEFINE_SPINLOCK(rbd_client_list_lock);
static int __rbd_init_snaps_header(struct rbd_device *rbd_dev);
static void rbd_dev_release(struct device *dev);
@@ -190,12 +205,32 @@ static ssize_t rbd_snap_add(struct device *dev,
static void __rbd_remove_snap_dev(struct rbd_device *rbd_dev,
struct rbd_snap *snap);
+static ssize_t rbd_add(struct bus_type *bus, const char *buf,
+ size_t count);
+static ssize_t rbd_remove(struct bus_type *bus, const char *buf,
+ size_t count);
-static struct rbd_device *dev_to_rbd(struct device *dev)
+static struct bus_attribute rbd_bus_attrs[] = {
+ __ATTR(add, S_IWUSR, NULL, rbd_add),
+ __ATTR(remove, S_IWUSR, NULL, rbd_remove),
+ __ATTR_NULL
+};
+
+static struct bus_type rbd_bus_type = {
+ .name = "rbd",
+ .bus_attrs = rbd_bus_attrs,
+};
+
+static void rbd_root_dev_release(struct device *dev)
{
- return container_of(dev, struct rbd_device, dev);
}
+static struct device rbd_root_dev = {
+ .init_name = "rbd",
+ .release = rbd_root_dev_release,
+};
+
+
static struct device *rbd_get_dev(struct rbd_device *rbd_dev)
{
return get_device(&rbd_dev->dev);
@@ -210,8 +245,7 @@ static int __rbd_update_snaps(struct rbd_device *rbd_dev);
static int rbd_open(struct block_device *bdev, fmode_t mode)
{
- struct gendisk *disk = bdev->bd_disk;
- struct rbd_device *rbd_dev = disk->private_data;
+ struct rbd_device *rbd_dev = bdev->bd_disk->private_data;
rbd_get_dev(rbd_dev);
@@ -256,9 +290,11 @@ static struct rbd_client *rbd_client_create(struct ceph_options *opt,
kref_init(&rbdc->kref);
INIT_LIST_HEAD(&rbdc->node);
+ mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+
rbdc->client = ceph_create_client(opt, rbdc, 0, 0);
if (IS_ERR(rbdc->client))
- goto out_rbdc;
+ goto out_mutex;
opt = NULL; /* Now rbdc->client is responsible for opt */
ret = ceph_open_session(rbdc->client);
@@ -267,16 +303,19 @@ static struct rbd_client *rbd_client_create(struct ceph_options *opt,
rbdc->rbd_opts = rbd_opts;
- spin_lock(&node_lock);
+ spin_lock(&rbd_client_list_lock);
list_add_tail(&rbdc->node, &rbd_client_list);
- spin_unlock(&node_lock);
+ spin_unlock(&rbd_client_list_lock);
+
+ mutex_unlock(&ctl_mutex);
dout("rbd_client_create created %p\n", rbdc);
return rbdc;
out_err:
ceph_destroy_client(rbdc->client);
-out_rbdc:
+out_mutex:
+ mutex_unlock(&ctl_mutex);
kfree(rbdc);
out_opt:
if (opt)
@@ -324,7 +363,7 @@ static int parse_rbd_opts_token(char *c, void *private)
substring_t argstr[MAX_OPT_ARGS];
int token, intval, ret;
- token = match_token((char *)c, rbdopt_tokens, argstr);
+ token = match_token(c, rbdopt_tokens, argstr);
if (token < 0)
return -EINVAL;
@@ -357,58 +396,54 @@ static int parse_rbd_opts_token(char *c, void *private)
* Get a ceph client with specific addr and configuration, if one does
* not exist create it.
*/
-static int rbd_get_client(struct rbd_device *rbd_dev, const char *mon_addr,
- char *options)
+static struct rbd_client *rbd_get_client(const char *mon_addr,
+ size_t mon_addr_len,
+ char *options)
{
struct rbd_client *rbdc;
struct ceph_options *opt;
- int ret;
struct rbd_options *rbd_opts;
rbd_opts = kzalloc(sizeof(*rbd_opts), GFP_KERNEL);
if (!rbd_opts)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
rbd_opts->notify_timeout = RBD_NOTIFY_TIMEOUT_DEFAULT;
- ret = ceph_parse_options(&opt, options, mon_addr,
- mon_addr + strlen(mon_addr), parse_rbd_opts_token, rbd_opts);
- if (ret < 0)
- goto done_err;
+ opt = ceph_parse_options(options, mon_addr,
+ mon_addr + mon_addr_len,
+ parse_rbd_opts_token, rbd_opts);
+ if (IS_ERR(opt)) {
+ kfree(rbd_opts);
+ return ERR_CAST(opt);
+ }
- spin_lock(&node_lock);
+ spin_lock(&rbd_client_list_lock);
rbdc = __rbd_client_find(opt);
if (rbdc) {
+ /* using an existing client */
+ kref_get(&rbdc->kref);
+ spin_unlock(&rbd_client_list_lock);
+
ceph_destroy_options(opt);
kfree(rbd_opts);
- /* using an existing client */
- kref_get(&rbdc->kref);
- rbd_dev->rbd_client = rbdc;
- rbd_dev->client = rbdc->client;
- spin_unlock(&node_lock);
- return 0;
+ return rbdc;
}
- spin_unlock(&node_lock);
+ spin_unlock(&rbd_client_list_lock);
rbdc = rbd_client_create(opt, rbd_opts);
- if (IS_ERR(rbdc)) {
- ret = PTR_ERR(rbdc);
- goto done_err;
- }
- rbd_dev->rbd_client = rbdc;
- rbd_dev->client = rbdc->client;
- return 0;
-done_err:
- kfree(rbd_opts);
- return ret;
+ if (IS_ERR(rbdc))
+ kfree(rbd_opts);
+
+ return rbdc;
}
/*
* Destroy ceph client
*
- * Caller must hold node_lock.
+ * Caller must hold rbd_client_list_lock.
*/
static void rbd_client_release(struct kref *kref)
{
@@ -428,11 +463,10 @@ static void rbd_client_release(struct kref *kref)
*/
static void rbd_put_client(struct rbd_device *rbd_dev)
{
- spin_lock(&node_lock);
+ spin_lock(&rbd_client_list_lock);
kref_put(&rbd_dev->rbd_client->kref, rbd_client_release);
- spin_unlock(&node_lock);
+ spin_unlock(&rbd_client_list_lock);
rbd_dev->rbd_client = NULL;
- rbd_dev->client = NULL;
}
/*
@@ -457,21 +491,19 @@ static int rbd_header_from_disk(struct rbd_image_header *header,
gfp_t gfp_flags)
{
int i;
- u32 snap_count = le32_to_cpu(ondisk->snap_count);
- int ret = -ENOMEM;
+ u32 snap_count;
- if (memcmp(ondisk, RBD_HEADER_TEXT, sizeof(RBD_HEADER_TEXT))) {
+ if (memcmp(ondisk, RBD_HEADER_TEXT, sizeof(RBD_HEADER_TEXT)))
return -ENXIO;
- }
- init_rwsem(&header->snap_rwsem);
- header->snap_names_len = le64_to_cpu(ondisk->snap_names_len);
+ snap_count = le32_to_cpu(ondisk->snap_count);
header->snapc = kmalloc(sizeof(struct ceph_snap_context) +
- snap_count *
- sizeof(struct rbd_image_snap_ondisk),
+ snap_count * sizeof (*ondisk),
gfp_flags);
if (!header->snapc)
return -ENOMEM;
+
+ header->snap_names_len = le64_to_cpu(ondisk->snap_names_len);
if (snap_count) {
header->snap_names = kmalloc(header->snap_names_len,
GFP_KERNEL);
@@ -498,8 +530,7 @@ static int rbd_header_from_disk(struct rbd_image_header *header,
header->snapc->num_snaps = snap_count;
header->total_snaps = snap_count;
- if (snap_count &&
- allocated_snaps == snap_count) {
+ if (snap_count && allocated_snaps == snap_count) {
for (i = 0; i < snap_count; i++) {
header->snapc->snaps[i] =
le64_to_cpu(ondisk->snaps[i].id);
@@ -518,7 +549,7 @@ err_names:
kfree(header->snap_names);
err_snapc:
kfree(header->snapc);
- return ret;
+ return -ENOMEM;
}
static int snap_index(struct rbd_image_header *header, int snap_num)
@@ -542,35 +573,34 @@ static int snap_by_name(struct rbd_image_header *header, const char *snap_name,
int i;
char *p = header->snap_names;
- for (i = 0; i < header->total_snaps; i++, p += strlen(p) + 1) {
- if (strcmp(snap_name, p) == 0)
- break;
- }
- if (i == header->total_snaps)
- return -ENOENT;
- if (seq)
- *seq = header->snapc->snaps[i];
+ for (i = 0; i < header->total_snaps; i++) {
+ if (!strcmp(snap_name, p)) {
- if (size)
- *size = header->snap_sizes[i];
+ /* Found it. Pass back its id and/or size */
- return i;
+ if (seq)
+ *seq = header->snapc->snaps[i];
+ if (size)
+ *size = header->snap_sizes[i];
+ return i;
+ }
+ p += strlen(p) + 1; /* Skip ahead to the next name */
+ }
+ return -ENOENT;
}
-static int rbd_header_set_snap(struct rbd_device *dev,
- const char *snap_name,
- u64 *size)
+static int rbd_header_set_snap(struct rbd_device *dev, u64 *size)
{
struct rbd_image_header *header = &dev->header;
struct ceph_snap_context *snapc = header->snapc;
int ret = -ENOENT;
- down_write(&header->snap_rwsem);
+ BUILD_BUG_ON(sizeof (dev->snap_name) < sizeof (RBD_SNAP_HEAD_NAME));
- if (!snap_name ||
- !*snap_name ||
- strcmp(snap_name, "-") == 0 ||
- strcmp(snap_name, RBD_SNAP_HEAD_NAME) == 0) {
+ down_write(&dev->header_rwsem);
+
+ if (!memcmp(dev->snap_name, RBD_SNAP_HEAD_NAME,
+ sizeof (RBD_SNAP_HEAD_NAME))) {
if (header->total_snaps)
snapc->seq = header->snap_seq;
else
@@ -580,7 +610,7 @@ static int rbd_header_set_snap(struct rbd_device *dev,
if (size)
*size = header->image_size;
} else {
- ret = snap_by_name(header, snap_name, &snapc->seq, size);
+ ret = snap_by_name(header, dev->snap_name, &snapc->seq, size);
if (ret < 0)
goto done;
@@ -590,7 +620,7 @@ static int rbd_header_set_snap(struct rbd_device *dev,
ret = 0;
done:
- up_write(&header->snap_rwsem);
+ up_write(&dev->header_rwsem);
return ret;
}
@@ -717,7 +747,7 @@ static struct bio *bio_chain_clone(struct bio **old, struct bio **next,
/* split the bio. We'll release it either in the next
call, or it will have to be released outside */
- bp = bio_split(old_chain, (len - total) / 512ULL);
+ bp = bio_split(old_chain, (len - total) / SECTOR_SIZE);
if (!bp)
goto err_out;
@@ -857,7 +887,7 @@ static int rbd_do_request(struct request *rq,
struct timespec mtime = CURRENT_TIME;
struct rbd_request *req_data;
struct ceph_osd_request_head *reqhead;
- struct rbd_image_header *header = &dev->header;
+ struct ceph_osd_client *osdc;
req_data = kzalloc(sizeof(*req_data), GFP_NOIO);
if (!req_data) {
@@ -874,15 +904,13 @@ static int rbd_do_request(struct request *rq,
dout("rbd_do_request obj=%s ofs=%lld len=%lld\n", obj, len, ofs);
- down_read(&header->snap_rwsem);
+ down_read(&dev->header_rwsem);
- req = ceph_osdc_alloc_request(&dev->client->osdc, flags,
- snapc,
- ops,
- false,
- GFP_NOIO, pages, bio);
+ osdc = &dev->rbd_client->client->osdc;
+ req = ceph_osdc_alloc_request(osdc, flags, snapc, ops,
+ false, GFP_NOIO, pages, bio);
if (!req) {
- up_read(&header->snap_rwsem);
+ up_read(&dev->header_rwsem);
ret = -ENOMEM;
goto done_pages;
}
@@ -909,27 +937,27 @@ static int rbd_do_request(struct request *rq,
layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
layout->fl_pg_preferred = cpu_to_le32(-1);
layout->fl_pg_pool = cpu_to_le32(dev->poolid);
- ceph_calc_raw_layout(&dev->client->osdc, layout, snapid,
- ofs, &len, &bno, req, ops);
+ ceph_calc_raw_layout(osdc, layout, snapid, ofs, &len, &bno,
+ req, ops);
ceph_osdc_build_request(req, ofs, &len,
ops,
snapc,
&mtime,
req->r_oid, req->r_oid_len);
- up_read(&header->snap_rwsem);
+ up_read(&dev->header_rwsem);
if (linger_req) {
- ceph_osdc_set_request_linger(&dev->client->osdc, req);
+ ceph_osdc_set_request_linger(osdc, req);
*linger_req = req;
}
- ret = ceph_osdc_start_request(&dev->client->osdc, req, false);
+ ret = ceph_osdc_start_request(osdc, req, false);
if (ret < 0)
goto done_err;
if (!rbd_cb) {
- ret = ceph_osdc_wait_request(&dev->client->osdc, req);
+ ret = ceph_osdc_wait_request(osdc, req);
if (ver)
*ver = le64_to_cpu(req->r_reassert_version.version);
dout("reassert_ver=%lld\n",
@@ -1213,8 +1241,8 @@ static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data)
rc = __rbd_update_snaps(dev);
mutex_unlock(&ctl_mutex);
if (rc)
- pr_warning(DRV_NAME "%d got notification but failed to update"
- " snaps: %d\n", dev->major, rc);
+ pr_warning(RBD_DRV_NAME "%d got notification but failed to "
+ " update snaps: %d\n", dev->major, rc);
rbd_req_sync_notify_ack(dev, ver, notify_id, dev->obj_md_name);
}
@@ -1227,7 +1255,7 @@ static int rbd_req_sync_watch(struct rbd_device *dev,
u64 ver)
{
struct ceph_osd_req_op *ops;
- struct ceph_osd_client *osdc = &dev->client->osdc;
+ struct ceph_osd_client *osdc = &dev->rbd_client->client->osdc;
int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_WATCH, 0);
if (ret < 0)
@@ -1314,7 +1342,7 @@ static int rbd_req_sync_notify(struct rbd_device *dev,
const char *obj)
{
struct ceph_osd_req_op *ops;
- struct ceph_osd_client *osdc = &dev->client->osdc;
+ struct ceph_osd_client *osdc = &dev->rbd_client->client->osdc;
struct ceph_osd_event *event;
struct rbd_notify_info info;
int payload_len = sizeof(u32) + sizeof(u32);
@@ -1421,9 +1449,7 @@ static void rbd_rq_fn(struct request_queue *q)
struct request *rq;
struct bio_pair *bp = NULL;
- rq = blk_fetch_request(q);
-
- while (1) {
+ while ((rq = blk_fetch_request(q))) {
struct bio *bio;
struct bio *rq_bio, *next_bio = NULL;
bool do_write;
@@ -1441,32 +1467,32 @@ static void rbd_rq_fn(struct request_queue *q)
/* filter out block requests we don't understand */
if ((rq->cmd_type != REQ_TYPE_FS)) {
__blk_end_request_all(rq, 0);
- goto next;
+ continue;
}
/* deduce our operation (read, write) */
do_write = (rq_data_dir(rq) == WRITE);
size = blk_rq_bytes(rq);
- ofs = blk_rq_pos(rq) * 512ULL;
+ ofs = blk_rq_pos(rq) * SECTOR_SIZE;
rq_bio = rq->bio;
if (do_write && rbd_dev->read_only) {
__blk_end_request_all(rq, -EROFS);
- goto next;
+ continue;
}
spin_unlock_irq(q->queue_lock);
dout("%s 0x%x bytes at 0x%llx\n",
do_write ? "write" : "read",
- size, blk_rq_pos(rq) * 512ULL);
+ size, blk_rq_pos(rq) * SECTOR_SIZE);
num_segs = rbd_get_num_segments(&rbd_dev->header, ofs, size);
coll = rbd_alloc_coll(num_segs);
if (!coll) {
spin_lock_irq(q->queue_lock);
__blk_end_request_all(rq, -ENOMEM);
- goto next;
+ continue;
}
do {
@@ -1512,8 +1538,6 @@ next_seg:
if (bp)
bio_pair_release(bp);
spin_lock_irq(q->queue_lock);
-next:
- rq = blk_fetch_request(q);
}
}
@@ -1526,13 +1550,17 @@ static int rbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bmd,
struct bio_vec *bvec)
{
struct rbd_device *rbd_dev = q->queuedata;
- unsigned int chunk_sectors = 1 << (rbd_dev->header.obj_order - 9);
- sector_t sector = bmd->bi_sector + get_start_sect(bmd->bi_bdev);
- unsigned int bio_sectors = bmd->bi_size >> 9;
+ unsigned int chunk_sectors;
+ sector_t sector;
+ unsigned int bio_sectors;
int max;
+ chunk_sectors = 1 << (rbd_dev->header.obj_order - SECTOR_SHIFT);
+ sector = bmd->bi_sector + get_start_sect(bmd->bi_bdev);
+ bio_sectors = bmd->bi_size >> SECTOR_SHIFT;
+
max = (chunk_sectors - ((sector & (chunk_sectors - 1))
- + bio_sectors)) << 9;
+ + bio_sectors)) << SECTOR_SHIFT;
if (max < 0)
max = 0; /* bio_add cannot handle a negative return */
if (max <= bvec->bv_len && bio_sectors == 0)
@@ -1565,15 +1593,16 @@ static int rbd_read_header(struct rbd_device *rbd_dev,
ssize_t rc;
struct rbd_image_header_ondisk *dh;
int snap_count = 0;
- u64 snap_names_len = 0;
u64 ver;
+ size_t len;
+ /*
+ * First reads the fixed-size header to determine the number
+ * of snapshots, then re-reads it, along with all snapshot
+ * records as well as their stored names.
+ */
+ len = sizeof (*dh);
while (1) {
- int len = sizeof(*dh) +
- snap_count * sizeof(struct rbd_image_snap_ondisk) +
- snap_names_len;
-
- rc = -ENOMEM;
dh = kmalloc(len, GFP_KERNEL);
if (!dh)
return -ENOMEM;
@@ -1588,21 +1617,22 @@ static int rbd_read_header(struct rbd_device *rbd_dev,
rc = rbd_header_from_disk(header, dh, snap_count, GFP_KERNEL);
if (rc < 0) {
- if (rc == -ENXIO) {
+ if (rc == -ENXIO)
pr_warning("unrecognized header format"
" for image %s", rbd_dev->obj);
- }
goto out_dh;
}
- if (snap_count != header->total_snaps) {
- snap_count = header->total_snaps;
- snap_names_len = header->snap_names_len;
- rbd_header_free(header);
- kfree(dh);
- continue;
- }
- break;
+ if (snap_count == header->total_snaps)
+ break;
+
+ snap_count = header->total_snaps;
+ len = sizeof (*dh) +
+ snap_count * sizeof(struct rbd_image_snap_ondisk) +
+ header->snap_names_len;
+
+ rbd_header_free(header);
+ kfree(dh);
}
header->obj_version = ver;
@@ -1623,13 +1653,14 @@ static int rbd_header_add_snap(struct rbd_device *dev,
int ret;
void *data, *p, *e;
u64 ver;
+ struct ceph_mon_client *monc;
/* we should create a snapshot only if we're pointing at the head */
if (dev->cur_snap)
return -EINVAL;
- ret = ceph_monc_create_snapid(&dev->client->monc, dev->poolid,
- &new_snapid);
+ monc = &dev->rbd_client->client->monc;
+ ret = ceph_monc_create_snapid(monc, dev->poolid, &new_snapid);
dout("created snapid=%lld\n", new_snapid);
if (ret < 0)
return ret;
@@ -1684,9 +1715,9 @@ static int __rbd_update_snaps(struct rbd_device *rbd_dev)
return ret;
/* resized? */
- set_capacity(rbd_dev->disk, h.image_size / 512ULL);
+ set_capacity(rbd_dev->disk, h.image_size / SECTOR_SIZE);
- down_write(&rbd_dev->header.snap_rwsem);
+ down_write(&rbd_dev->header_rwsem);
snap_seq = rbd_dev->header.snapc->seq;
if (rbd_dev->header.total_snaps &&
@@ -1711,7 +1742,7 @@ static int __rbd_update_snaps(struct rbd_device *rbd_dev)
ret = __rbd_init_snaps_header(rbd_dev);
- up_write(&rbd_dev->header.snap_rwsem);
+ up_write(&rbd_dev->header_rwsem);
return ret;
}
@@ -1721,6 +1752,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
struct gendisk *disk;
struct request_queue *q;
int rc;
+ u64 segment_size;
u64 total_size = 0;
/* contact OSD, request size info about the object being mapped */
@@ -1733,7 +1765,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
if (rc)
return rc;
- rc = rbd_header_set_snap(rbd_dev, rbd_dev->snap_name, &total_size);
+ rc = rbd_header_set_snap(rbd_dev, &total_size);
if (rc)
return rc;
@@ -1743,7 +1775,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
if (!disk)
goto out;
- snprintf(disk->disk_name, sizeof(disk->disk_name), DRV_NAME "%d",
+ snprintf(disk->disk_name, sizeof(disk->disk_name), RBD_DRV_NAME "%d",
rbd_dev->id);
disk->major = rbd_dev->major;
disk->first_minor = 0;
@@ -1756,11 +1788,15 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
if (!q)
goto out_disk;
+ /* We use the default size, but let's be explicit about it. */
+ blk_queue_physical_block_size(q, SECTOR_SIZE);
+
/* set io sizes to object size */
- blk_queue_max_hw_sectors(q, rbd_obj_bytes(&rbd_dev->header) / 512ULL);
- blk_queue_max_segment_size(q, rbd_obj_bytes(&rbd_dev->header));
- blk_queue_io_min(q, rbd_obj_bytes(&rbd_dev->header));
- blk_queue_io_opt(q, rbd_obj_bytes(&rbd_dev->header));
+ segment_size = rbd_obj_bytes(&rbd_dev->header);
+ blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE);
+ blk_queue_max_segment_size(q, segment_size);
+ blk_queue_io_min(q, segment_size);
+ blk_queue_io_opt(q, segment_size);
blk_queue_merge_bvec(q, rbd_merge_bvec);
disk->queue = q;
@@ -1771,7 +1807,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
rbd_dev->q = q;
/* finally, announce the disk to the world */
- set_capacity(disk, total_size / 512ULL);
+ set_capacity(disk, total_size / SECTOR_SIZE);
add_disk(disk);
pr_info("%s: added with size 0x%llx\n",
@@ -1788,10 +1824,15 @@ out:
sysfs
*/
+static struct rbd_device *dev_to_rbd_dev(struct device *dev)
+{
+ return container_of(dev, struct rbd_device, dev);
+}
+
static ssize_t rbd_size_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
- struct rbd_device *rbd_dev = dev_to_rbd(dev);
+ struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
return sprintf(buf, "%llu\n", (unsigned long long)rbd_dev->header.image_size);
}
@@ -1799,7 +1840,7 @@ static ssize_t rbd_size_show(struct device *dev,
static ssize_t rbd_major_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
- struct rbd_device *rbd_dev = dev_to_rbd(dev);
+ struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
return sprintf(buf, "%d\n", rbd_dev->major);
}
@@ -1807,15 +1848,16 @@ static ssize_t rbd_major_show(struct device *dev,
static ssize_t rbd_client_id_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
- struct rbd_device *rbd_dev = dev_to_rbd(dev);
+ struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
- return sprintf(buf, "client%lld\n", ceph_client_id(rbd_dev->client));
+ return sprintf(buf, "client%lld\n",
+ ceph_client_id(rbd_dev->rbd_client->client));
}
static ssize_t rbd_pool_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
- struct rbd_device *rbd_dev = dev_to_rbd(dev);
+ struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
return sprintf(buf, "%s\n", rbd_dev->pool_name);
}
@@ -1823,7 +1865,7 @@ static ssize_t rbd_pool_show(struct device *dev,
static ssize_t rbd_name_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
- struct rbd_device *rbd_dev = dev_to_rbd(dev);
+ struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
return sprintf(buf, "%s\n", rbd_dev->obj);
}
@@ -1832,7 +1874,7 @@ static ssize_t rbd_snap_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- struct rbd_device *rbd_dev = dev_to_rbd(dev);
+ struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
return sprintf(buf, "%s\n", rbd_dev->snap_name);
}
@@ -1842,7 +1884,7 @@ static ssize_t rbd_image_refresh(struct device *dev,
const char *buf,
size_t size)
{
- struct rbd_device *rbd_dev = dev_to_rbd(dev);
+ struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
int rc;
int ret = size;
@@ -1907,7 +1949,7 @@ static ssize_t rbd_snap_size_show(struct device *dev,
{
struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev);
- return sprintf(buf, "%lld\n", (long long)snap->size);
+ return sprintf(buf, "%zd\n", snap->size);
}
static ssize_t rbd_snap_id_show(struct device *dev,
@@ -1916,7 +1958,7 @@ static ssize_t rbd_snap_id_show(struct device *dev,
{
struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev);
- return sprintf(buf, "%lld\n", (long long)snap->id);
+ return sprintf(buf, "%llu\n", (unsigned long long) snap->id);
}
static DEVICE_ATTR(snap_size, S_IRUGO, rbd_snap_size_show, NULL);
@@ -2088,19 +2130,9 @@ static int __rbd_init_snaps_header(struct rbd_device *rbd_dev)
return 0;
}
-
-static void rbd_root_dev_release(struct device *dev)
-{
-}
-
-static struct device rbd_root_dev = {
- .init_name = "rbd",
- .release = rbd_root_dev_release,
-};
-
static int rbd_bus_add_dev(struct rbd_device *rbd_dev)
{
- int ret = -ENOMEM;
+ int ret;
struct device *dev;
struct rbd_snap *snap;
@@ -2114,7 +2146,7 @@ static int rbd_bus_add_dev(struct rbd_device *rbd_dev)
dev_set_name(dev, "%d", rbd_dev->id);
ret = device_register(dev);
if (ret < 0)
- goto done_free;
+ goto out;
list_for_each_entry(snap, &rbd_dev->snaps, node) {
ret = rbd_register_snap_dev(rbd_dev, snap,
@@ -2122,10 +2154,7 @@ static int rbd_bus_add_dev(struct rbd_device *rbd_dev)
if (ret < 0)
break;
}
-
- mutex_unlock(&ctl_mutex);
- return 0;
-done_free:
+out:
mutex_unlock(&ctl_mutex);
return ret;
}
@@ -2154,104 +2183,250 @@ static int rbd_init_watch_dev(struct rbd_device *rbd_dev)
return ret;
}
+static atomic64_t rbd_id_max = ATOMIC64_INIT(0);
+
+/*
+ * Get a unique rbd identifier for the given new rbd_dev, and add
+ * the rbd_dev to the global list. The minimum rbd id is 1.
+ */
+static void rbd_id_get(struct rbd_device *rbd_dev)
+{
+ rbd_dev->id = atomic64_inc_return(&rbd_id_max);
+
+ spin_lock(&rbd_dev_list_lock);
+ list_add_tail(&rbd_dev->node, &rbd_dev_list);
+ spin_unlock(&rbd_dev_list_lock);
+}
+
+/*
+ * Remove an rbd_dev from the global list, and record that its
+ * identifier is no longer in use.
+ */
+static void rbd_id_put(struct rbd_device *rbd_dev)
+{
+ struct list_head *tmp;
+ int rbd_id = rbd_dev->id;
+ int max_id;
+
+ BUG_ON(rbd_id < 1);
+
+ spin_lock(&rbd_dev_list_lock);
+ list_del_init(&rbd_dev->node);
+
+ /*
+ * If the id being "put" is not the current maximum, there
+ * is nothing special we need to do.
+ */
+ if (rbd_id != atomic64_read(&rbd_id_max)) {
+ spin_unlock(&rbd_dev_list_lock);
+ return;
+ }
+
+ /*
+ * We need to update the current maximum id. Search the
+ * list to find out what it is. We're more likely to find
+ * the maximum at the end, so search the list backward.
+ */
+ max_id = 0;
+ list_for_each_prev(tmp, &rbd_dev_list) {
+ struct rbd_device *rbd_dev;
+
+ rbd_dev = list_entry(tmp, struct rbd_device, node);
+ if (rbd_id > max_id)
+ max_id = rbd_id;
+ }
+ spin_unlock(&rbd_dev_list_lock);
+
+ /*
+ * The max id could have been updated by rbd_id_get(), in
+ * which case it now accurately reflects the new maximum.
+ * Be careful not to overwrite the maximum value in that
+ * case.
+ */
+ atomic64_cmpxchg(&rbd_id_max, rbd_id, max_id);
+}
+
+/*
+ * Skips over white space at *buf, and updates *buf to point to the
+ * first found non-space character (if any). Returns the length of
+ * the token (string of non-white space characters) found. Note
+ * that *buf must be terminated with '\0'.
+ */
+static inline size_t next_token(const char **buf)
+{
+ /*
+ * These are the characters that produce nonzero for
+ * isspace() in the "C" and "POSIX" locales.
+ */
+ const char *spaces = " \f\n\r\t\v";
+
+ *buf += strspn(*buf, spaces); /* Find start of token */
+
+ return strcspn(*buf, spaces); /* Return token length */
+}
+
+/*
+ * Finds the next token in *buf, and if the provided token buffer is
+ * big enough, copies the found token into it. The result, if
+ * copied, is guaranteed to be terminated with '\0'. Note that *buf
+ * must be terminated with '\0' on entry.
+ *
+ * Returns the length of the token found (not including the '\0').
+ * Return value will be 0 if no token is found, and it will be >=
+ * token_size if the token would not fit.
+ *
+ * The *buf pointer will be updated to point beyond the end of the
+ * found token. Note that this occurs even if the token buffer is
+ * too small to hold it.
+ */
+static inline size_t copy_token(const char **buf,
+ char *token,
+ size_t token_size)
+{
+ size_t len;
+
+ len = next_token(buf);
+ if (len < token_size) {
+ memcpy(token, *buf, len);
+ *(token + len) = '\0';
+ }
+ *buf += len;
+
+ return len;
+}
+
+/*
+ * This fills in the pool_name, obj, obj_len, snap_name, obj_len,
+ * rbd_dev, rbd_md_name, and name fields of the given rbd_dev, based
+ * on the list of monitor addresses and other options provided via
+ * /sys/bus/rbd/add.
+ */
+static int rbd_add_parse_args(struct rbd_device *rbd_dev,
+ const char *buf,
+ const char **mon_addrs,
+ size_t *mon_addrs_size,
+ char *options,
+ size_t options_size)
+{
+ size_t len;
+
+ /* The first four tokens are required */
+
+ len = next_token(&buf);
+ if (!len)
+ return -EINVAL;
+ *mon_addrs_size = len + 1;
+ *mon_addrs = buf;
+
+ buf += len;
+
+ len = copy_token(&buf, options, options_size);
+ if (!len || len >= options_size)
+ return -EINVAL;
+
+ len = copy_token(&buf, rbd_dev->pool_name, sizeof (rbd_dev->pool_name));
+ if (!len || len >= sizeof (rbd_dev->pool_name))
+ return -EINVAL;
+
+ len = copy_token(&buf, rbd_dev->obj, sizeof (rbd_dev->obj));
+ if (!len || len >= sizeof (rbd_dev->obj))
+ return -EINVAL;
+
+ /* We have the object length in hand, save it. */
+
+ rbd_dev->obj_len = len;
+
+ BUILD_BUG_ON(RBD_MAX_MD_NAME_LEN
+ < RBD_MAX_OBJ_NAME_LEN + sizeof (RBD_SUFFIX));
+ sprintf(rbd_dev->obj_md_name, "%s%s", rbd_dev->obj, RBD_SUFFIX);
+
+ /*
+ * The snapshot name is optional, but it's an error if it's
+ * too long. If no snapshot is supplied, fill in the default.
+ */
+ len = copy_token(&buf, rbd_dev->snap_name, sizeof (rbd_dev->snap_name));
+ if (!len)
+ memcpy(rbd_dev->snap_name, RBD_SNAP_HEAD_NAME,
+ sizeof (RBD_SNAP_HEAD_NAME));
+ else if (len >= sizeof (rbd_dev->snap_name))
+ return -EINVAL;
+
+ return 0;
+}
+
static ssize_t rbd_add(struct bus_type *bus,
const char *buf,
size_t count)
{
- struct ceph_osd_client *osdc;
struct rbd_device *rbd_dev;
- ssize_t rc = -ENOMEM;
- int irc, new_id = 0;
- struct list_head *tmp;
- char *mon_dev_name;
- char *options;
+ const char *mon_addrs = NULL;
+ size_t mon_addrs_size = 0;
+ char *options = NULL;
+ struct ceph_osd_client *osdc;
+ int rc = -ENOMEM;
if (!try_module_get(THIS_MODULE))
return -ENODEV;
- mon_dev_name = kmalloc(RBD_MAX_OPT_LEN, GFP_KERNEL);
- if (!mon_dev_name)
- goto err_out_mod;
-
- options = kmalloc(RBD_MAX_OPT_LEN, GFP_KERNEL);
- if (!options)
- goto err_mon_dev;
-
- /* new rbd_device object */
rbd_dev = kzalloc(sizeof(*rbd_dev), GFP_KERNEL);
if (!rbd_dev)
- goto err_out_opt;
+ goto err_nomem;
+ options = kmalloc(count, GFP_KERNEL);
+ if (!options)
+ goto err_nomem;
/* static rbd_device initialization */
spin_lock_init(&rbd_dev->lock);
INIT_LIST_HEAD(&rbd_dev->node);
INIT_LIST_HEAD(&rbd_dev->snaps);
+ init_rwsem(&rbd_dev->header_rwsem);
- init_rwsem(&rbd_dev->header.snap_rwsem);
+ init_rwsem(&rbd_dev->header_rwsem);
/* generate unique id: find highest unique id, add one */
- mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
-
- list_for_each(tmp, &rbd_dev_list) {
- struct rbd_device *rbd_dev;
+ rbd_id_get(rbd_dev);
- rbd_dev = list_entry(tmp, struct rbd_device, node);
- if (rbd_dev->id >= new_id)
- new_id = rbd_dev->id + 1;
- }
-
- rbd_dev->id = new_id;
-
- /* add to global list */
- list_add_tail(&rbd_dev->node, &rbd_dev_list);
+ /* Fill in the device name, now that we have its id. */
+ BUILD_BUG_ON(DEV_NAME_LEN
+ < sizeof (RBD_DRV_NAME) + MAX_INT_FORMAT_WIDTH);
+ sprintf(rbd_dev->name, "%s%d", RBD_DRV_NAME, rbd_dev->id);
/* parse add command */
- if (sscanf(buf, "%" __stringify(RBD_MAX_OPT_LEN) "s "
- "%" __stringify(RBD_MAX_OPT_LEN) "s "
- "%" __stringify(RBD_MAX_POOL_NAME_LEN) "s "
- "%" __stringify(RBD_MAX_OBJ_NAME_LEN) "s"
- "%" __stringify(RBD_MAX_SNAP_NAME_LEN) "s",
- mon_dev_name, options, rbd_dev->pool_name,
- rbd_dev->obj, rbd_dev->snap_name) < 4) {
- rc = -EINVAL;
- goto err_out_slot;
- }
-
- if (rbd_dev->snap_name[0] == 0)
- rbd_dev->snap_name[0] = '-';
-
- rbd_dev->obj_len = strlen(rbd_dev->obj);
- snprintf(rbd_dev->obj_md_name, sizeof(rbd_dev->obj_md_name), "%s%s",
- rbd_dev->obj, RBD_SUFFIX);
-
- /* initialize rest of new object */
- snprintf(rbd_dev->name, DEV_NAME_LEN, DRV_NAME "%d", rbd_dev->id);
- rc = rbd_get_client(rbd_dev, mon_dev_name, options);
- if (rc < 0)
- goto err_out_slot;
+ rc = rbd_add_parse_args(rbd_dev, buf, &mon_addrs, &mon_addrs_size,
+ options, count);
+ if (rc)
+ goto err_put_id;
- mutex_unlock(&ctl_mutex);
+ rbd_dev->rbd_client = rbd_get_client(mon_addrs, mon_addrs_size - 1,
+ options);
+ if (IS_ERR(rbd_dev->rbd_client)) {
+ rc = PTR_ERR(rbd_dev->rbd_client);
+ goto err_put_id;
+ }
/* pick the pool */
- osdc = &rbd_dev->client->osdc;
+ osdc = &rbd_dev->rbd_client->client->osdc;
rc = ceph_pg_poolid_by_name(osdc->osdmap, rbd_dev->pool_name);
if (rc < 0)
goto err_out_client;
rbd_dev->poolid = rc;
/* register our block device */
- irc = register_blkdev(0, rbd_dev->name);
- if (irc < 0) {
- rc = irc;
+ rc = register_blkdev(0, rbd_dev->name);
+ if (rc < 0)
goto err_out_client;
- }
- rbd_dev->major = irc;
+ rbd_dev->major = rc;
rc = rbd_bus_add_dev(rbd_dev);
if (rc)
goto err_out_blkdev;
- /* set up and announce blkdev mapping */
+ /*
+ * At this point cleanup in the event of an error is the job
+ * of the sysfs code (initiated by rbd_bus_del_dev()).
+ *
+ * Set up and announce blkdev mapping.
+ */
rc = rbd_init_disk(rbd_dev);
if (rc)
goto err_out_bus;
@@ -2263,35 +2438,26 @@ static ssize_t rbd_add(struct bus_type *bus,
return count;
err_out_bus:
- mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
- list_del_init(&rbd_dev->node);
- mutex_unlock(&ctl_mutex);
-
/* this will also clean up rest of rbd_dev stuff */
rbd_bus_del_dev(rbd_dev);
kfree(options);
- kfree(mon_dev_name);
return rc;
err_out_blkdev:
unregister_blkdev(rbd_dev->major, rbd_dev->name);
err_out_client:
rbd_put_client(rbd_dev);
- mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
-err_out_slot:
- list_del_init(&rbd_dev->node);
- mutex_unlock(&ctl_mutex);
-
- kfree(rbd_dev);
-err_out_opt:
+err_put_id:
+ rbd_id_put(rbd_dev);
+err_nomem:
kfree(options);
-err_mon_dev:
- kfree(mon_dev_name);
-err_out_mod:
+ kfree(rbd_dev);
+
dout("Error adding device %s\n", buf);
module_put(THIS_MODULE);
- return rc;
+
+ return (ssize_t) rc;
}
static struct rbd_device *__rbd_get_dev(unsigned long id)
@@ -2299,22 +2465,28 @@ static struct rbd_device *__rbd_get_dev(unsigned long id)
struct list_head *tmp;
struct rbd_device *rbd_dev;
+ spin_lock(&rbd_dev_list_lock);
list_for_each(tmp, &rbd_dev_list) {
rbd_dev = list_entry(tmp, struct rbd_device, node);
- if (rbd_dev->id == id)
+ if (rbd_dev->id == id) {
+ spin_unlock(&rbd_dev_list_lock);
return rbd_dev;
+ }
}
+ spin_unlock(&rbd_dev_list_lock);
return NULL;
}
static void rbd_dev_release(struct device *dev)
{
- struct rbd_device *rbd_dev =
- container_of(dev, struct rbd_device, dev);
+ struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
- if (rbd_dev->watch_request)
- ceph_osdc_unregister_linger_request(&rbd_dev->client->osdc,
+ if (rbd_dev->watch_request) {
+ struct ceph_client *client = rbd_dev->rbd_client->client;
+
+ ceph_osdc_unregister_linger_request(&client->osdc,
rbd_dev->watch_request);
+ }
if (rbd_dev->watch_event)
rbd_req_sync_unwatch(rbd_dev, rbd_dev->obj_md_name);
@@ -2323,6 +2495,9 @@ static void rbd_dev_release(struct device *dev)
/* clean up and free blkdev */
rbd_free_disk(rbd_dev);
unregister_blkdev(rbd_dev->major, rbd_dev->name);
+
+ /* done with the id, and with the rbd_dev */
+ rbd_id_put(rbd_dev);
kfree(rbd_dev);
/* release module ref */
@@ -2355,8 +2530,6 @@ static ssize_t rbd_remove(struct bus_type *bus,
goto done;
}
- list_del_init(&rbd_dev->node);
-
__rbd_remove_all_snaps(rbd_dev);
rbd_bus_del_dev(rbd_dev);
@@ -2370,7 +2543,7 @@ static ssize_t rbd_snap_add(struct device *dev,
const char *buf,
size_t count)
{
- struct rbd_device *rbd_dev = dev_to_rbd(dev);
+ struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
int ret;
char *name = kmalloc(count + 1, GFP_KERNEL);
if (!name)
@@ -2406,12 +2579,6 @@ err_unlock:
return ret;
}
-static struct bus_attribute rbd_bus_attrs[] = {
- __ATTR(add, S_IWUSR, NULL, rbd_add),
- __ATTR(remove, S_IWUSR, NULL, rbd_remove),
- __ATTR_NULL
-};
-
/*
* create control files in sysfs
* /sys/bus/rbd/...
@@ -2420,21 +2587,21 @@ static int rbd_sysfs_init(void)
{
int ret;
- rbd_bus_type.bus_attrs = rbd_bus_attrs;
-
- ret = bus_register(&rbd_bus_type);
- if (ret < 0)
+ ret = device_register(&rbd_root_dev);
+ if (ret < 0)
return ret;
- ret = device_register(&rbd_root_dev);
+ ret = bus_register(&rbd_bus_type);
+ if (ret < 0)
+ device_unregister(&rbd_root_dev);
return ret;
}
static void rbd_sysfs_cleanup(void)
{
- device_unregister(&rbd_root_dev);
bus_unregister(&rbd_bus_type);
+ device_unregister(&rbd_root_dev);
}
int __init rbd_init(void)
@@ -2444,8 +2611,7 @@ int __init rbd_init(void)
rc = rbd_sysfs_init();
if (rc)
return rc;
- spin_lock_init(&node_lock);
- pr_info("loaded " DRV_NAME_LONG "\n");
+ pr_info("loaded " RBD_DRV_NAME_LONG "\n");
return 0;
}
diff --git a/drivers/block/rbd_types.h b/drivers/block/rbd_types.h
index fc6c678aa2cb..950708688f17 100644
--- a/drivers/block/rbd_types.h
+++ b/drivers/block/rbd_types.h
@@ -41,10 +41,6 @@
#define RBD_HEADER_SIGNATURE "RBD"
#define RBD_HEADER_VERSION "001.005"
-struct rbd_info {
- __le64 max_id;
-} __attribute__ ((packed));
-
struct rbd_image_snap_ondisk {
__le64 id;
__le64 image_size;
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 48e8fee9f2d4..9dcf76a10bb6 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -839,10 +839,7 @@ static struct vio_driver vdc_port_driver = {
.id_table = vdc_port_match,
.probe = vdc_port_probe,
.remove = vdc_port_remove,
- .driver = {
- .name = "vdc_port",
- .owner = THIS_MODULE,
- }
+ .name = "vdc_port",
};
static int __init vdc_init(void)
diff --git a/drivers/block/viodasd.c b/drivers/block/viodasd.c
deleted file mode 100644
index 9a5b2a2d616d..000000000000
--- a/drivers/block/viodasd.c
+++ /dev/null
@@ -1,809 +0,0 @@
-/* -*- linux-c -*-
- * viodasd.c
- * Authors: Dave Boutcher <boutcher@us.ibm.com>
- * Ryan Arnold <ryanarn@us.ibm.com>
- * Colin Devilbiss <devilbis@us.ibm.com>
- * Stephen Rothwell
- *
- * (C) Copyright 2000-2004 IBM Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of the
- * License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * This routine provides access to disk space (termed "DASD" in historical
- * IBM terms) owned and managed by an OS/400 partition running on the
- * same box as this Linux partition.
- *
- * All disk operations are performed by sending messages back and forth to
- * the OS/400 partition.
- */
-
-#define pr_fmt(fmt) "viod: " fmt
-
-#include <linux/major.h>
-#include <linux/fs.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/blkdev.h>
-#include <linux/genhd.h>
-#include <linux/hdreg.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/string.h>
-#include <linux/mutex.h>
-#include <linux/dma-mapping.h>
-#include <linux/completion.h>
-#include <linux/device.h>
-#include <linux/scatterlist.h>
-
-#include <asm/uaccess.h>
-#include <asm/vio.h>
-#include <asm/iseries/hv_types.h>
-#include <asm/iseries/hv_lp_event.h>
-#include <asm/iseries/hv_lp_config.h>
-#include <asm/iseries/vio.h>
-#include <asm/firmware.h>
-
-MODULE_DESCRIPTION("iSeries Virtual DASD");
-MODULE_AUTHOR("Dave Boutcher");
-MODULE_LICENSE("GPL");
-
-/*
- * We only support 7 partitions per physical disk....so with minor
- * numbers 0-255 we get a maximum of 32 disks.
- */
-#define VIOD_GENHD_NAME "iseries/vd"
-
-#define VIOD_VERS "1.64"
-
-enum {
- PARTITION_SHIFT = 3,
- MAX_DISKNO = HVMAXARCHITECTEDVIRTUALDISKS,
- MAX_DISK_NAME = FIELD_SIZEOF(struct gendisk, disk_name)
-};
-
-static DEFINE_MUTEX(viodasd_mutex);
-static DEFINE_SPINLOCK(viodasd_spinlock);
-
-#define VIOMAXREQ 16
-
-#define DEVICE_NO(cell) ((struct viodasd_device *)(cell) - &viodasd_devices[0])
-
-struct viodasd_waitevent {
- struct completion com;
- int rc;
- u16 sub_result;
- int max_disk; /* open */
-};
-
-static const struct vio_error_entry viodasd_err_table[] = {
- { 0x0201, EINVAL, "Invalid Range" },
- { 0x0202, EINVAL, "Invalid Token" },
- { 0x0203, EIO, "DMA Error" },
- { 0x0204, EIO, "Use Error" },
- { 0x0205, EIO, "Release Error" },
- { 0x0206, EINVAL, "Invalid Disk" },
- { 0x0207, EBUSY, "Can't Lock" },
- { 0x0208, EIO, "Already Locked" },
- { 0x0209, EIO, "Already Unlocked" },
- { 0x020A, EIO, "Invalid Arg" },
- { 0x020B, EIO, "Bad IFS File" },
- { 0x020C, EROFS, "Read Only Device" },
- { 0x02FF, EIO, "Internal Error" },
- { 0x0000, 0, NULL },
-};
-
-/*
- * Figure out the biggest I/O request (in sectors) we can accept
- */
-#define VIODASD_MAXSECTORS (4096 / 512 * VIOMAXBLOCKDMA)
-
-/*
- * Number of disk I/O requests we've sent to OS/400
- */
-static int num_req_outstanding;
-
-/*
- * This is our internal structure for keeping track of disk devices
- */
-struct viodasd_device {
- u16 cylinders;
- u16 tracks;
- u16 sectors;
- u16 bytes_per_sector;
- u64 size;
- int read_only;
- spinlock_t q_lock;
- struct gendisk *disk;
- struct device *dev;
-} viodasd_devices[MAX_DISKNO];
-
-/*
- * External open entry point.
- */
-static int viodasd_open(struct block_device *bdev, fmode_t mode)
-{
- struct viodasd_device *d = bdev->bd_disk->private_data;
- HvLpEvent_Rc hvrc;
- struct viodasd_waitevent we;
- u16 flags = 0;
-
- if (d->read_only) {
- if (mode & FMODE_WRITE)
- return -EROFS;
- flags = vioblockflags_ro;
- }
-
- init_completion(&we.com);
-
- /* Send the open event to OS/400 */
- hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
- HvLpEvent_Type_VirtualIo,
- viomajorsubtype_blockio | vioblockopen,
- HvLpEvent_AckInd_DoAck, HvLpEvent_AckType_ImmediateAck,
- viopath_sourceinst(viopath_hostLp),
- viopath_targetinst(viopath_hostLp),
- (u64)(unsigned long)&we, VIOVERSION << 16,
- ((u64)DEVICE_NO(d) << 48) | ((u64)flags << 32),
- 0, 0, 0);
- if (hvrc != 0) {
- pr_warning("HV open failed %d\n", (int)hvrc);
- return -EIO;
- }
-
- wait_for_completion(&we.com);
-
- /* Check the return code */
- if (we.rc != 0) {
- const struct vio_error_entry *err =
- vio_lookup_rc(viodasd_err_table, we.sub_result);
-
- pr_warning("bad rc opening disk: %d:0x%04x (%s)\n",
- (int)we.rc, we.sub_result, err->msg);
- return -EIO;
- }
-
- return 0;
-}
-
-static int viodasd_unlocked_open(struct block_device *bdev, fmode_t mode)
-{
- int ret;
-
- mutex_lock(&viodasd_mutex);
- ret = viodasd_open(bdev, mode);
- mutex_unlock(&viodasd_mutex);
-
- return ret;
-}
-
-
-/*
- * External release entry point.
- */
-static int viodasd_release(struct gendisk *disk, fmode_t mode)
-{
- struct viodasd_device *d = disk->private_data;
- HvLpEvent_Rc hvrc;
-
- mutex_lock(&viodasd_mutex);
- /* Send the event to OS/400. We DON'T expect a response */
- hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
- HvLpEvent_Type_VirtualIo,
- viomajorsubtype_blockio | vioblockclose,
- HvLpEvent_AckInd_NoAck, HvLpEvent_AckType_ImmediateAck,
- viopath_sourceinst(viopath_hostLp),
- viopath_targetinst(viopath_hostLp),
- 0, VIOVERSION << 16,
- ((u64)DEVICE_NO(d) << 48) /* | ((u64)flags << 32) */,
- 0, 0, 0);
- if (hvrc != 0)
- pr_warning("HV close call failed %d\n", (int)hvrc);
-
- mutex_unlock(&viodasd_mutex);
-
- return 0;
-}
-
-
-/* External ioctl entry point.
- */
-static int viodasd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
-{
- struct gendisk *disk = bdev->bd_disk;
- struct viodasd_device *d = disk->private_data;
-
- geo->sectors = d->sectors ? d->sectors : 32;
- geo->heads = d->tracks ? d->tracks : 64;
- geo->cylinders = d->cylinders ? d->cylinders :
- get_capacity(disk) / (geo->sectors * geo->heads);
-
- return 0;
-}
-
-/*
- * Our file operations table
- */
-static const struct block_device_operations viodasd_fops = {
- .owner = THIS_MODULE,
- .open = viodasd_unlocked_open,
- .release = viodasd_release,
- .getgeo = viodasd_getgeo,
-};
-
-/*
- * End a request
- */
-static void viodasd_end_request(struct request *req, int error,
- int num_sectors)
-{
- __blk_end_request(req, error, num_sectors << 9);
-}
-
-/*
- * Send an actual I/O request to OS/400
- */
-static int send_request(struct request *req)
-{
- u64 start;
- int direction;
- int nsg;
- u16 viocmd;
- HvLpEvent_Rc hvrc;
- struct vioblocklpevent *bevent;
- struct HvLpEvent *hev;
- struct scatterlist sg[VIOMAXBLOCKDMA];
- int sgindex;
- struct viodasd_device *d;
- unsigned long flags;
-
- start = (u64)blk_rq_pos(req) << 9;
-
- if (rq_data_dir(req) == READ) {
- direction = DMA_FROM_DEVICE;
- viocmd = viomajorsubtype_blockio | vioblockread;
- } else {
- direction = DMA_TO_DEVICE;
- viocmd = viomajorsubtype_blockio | vioblockwrite;
- }
-
- d = req->rq_disk->private_data;
-
- /* Now build the scatter-gather list */
- sg_init_table(sg, VIOMAXBLOCKDMA);
- nsg = blk_rq_map_sg(req->q, req, sg);
- nsg = dma_map_sg(d->dev, sg, nsg, direction);
-
- spin_lock_irqsave(&viodasd_spinlock, flags);
- num_req_outstanding++;
-
- /* This optimization handles a single DMA block */
- if (nsg == 1)
- hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
- HvLpEvent_Type_VirtualIo, viocmd,
- HvLpEvent_AckInd_DoAck,
- HvLpEvent_AckType_ImmediateAck,
- viopath_sourceinst(viopath_hostLp),
- viopath_targetinst(viopath_hostLp),
- (u64)(unsigned long)req, VIOVERSION << 16,
- ((u64)DEVICE_NO(d) << 48), start,
- ((u64)sg_dma_address(&sg[0])) << 32,
- sg_dma_len(&sg[0]));
- else {
- bevent = (struct vioblocklpevent *)
- vio_get_event_buffer(viomajorsubtype_blockio);
- if (bevent == NULL) {
- pr_warning("error allocating disk event buffer\n");
- goto error_ret;
- }
-
- /*
- * Now build up the actual request. Note that we store
- * the pointer to the request in the correlation
- * token so we can match the response up later
- */
- memset(bevent, 0, sizeof(struct vioblocklpevent));
- hev = &bevent->event;
- hev->flags = HV_LP_EVENT_VALID | HV_LP_EVENT_DO_ACK |
- HV_LP_EVENT_INT;
- hev->xType = HvLpEvent_Type_VirtualIo;
- hev->xSubtype = viocmd;
- hev->xSourceLp = HvLpConfig_getLpIndex();
- hev->xTargetLp = viopath_hostLp;
- hev->xSizeMinus1 =
- offsetof(struct vioblocklpevent, u.rw_data.dma_info) +
- (sizeof(bevent->u.rw_data.dma_info[0]) * nsg) - 1;
- hev->xSourceInstanceId = viopath_sourceinst(viopath_hostLp);
- hev->xTargetInstanceId = viopath_targetinst(viopath_hostLp);
- hev->xCorrelationToken = (u64)req;
- bevent->version = VIOVERSION;
- bevent->disk = DEVICE_NO(d);
- bevent->u.rw_data.offset = start;
-
- /*
- * Copy just the dma information from the sg list
- * into the request
- */
- for (sgindex = 0; sgindex < nsg; sgindex++) {
- bevent->u.rw_data.dma_info[sgindex].token =
- sg_dma_address(&sg[sgindex]);
- bevent->u.rw_data.dma_info[sgindex].len =
- sg_dma_len(&sg[sgindex]);
- }
-
- /* Send the request */
- hvrc = HvCallEvent_signalLpEvent(&bevent->event);
- vio_free_event_buffer(viomajorsubtype_blockio, bevent);
- }
-
- if (hvrc != HvLpEvent_Rc_Good) {
- pr_warning("error sending disk event to OS/400 (rc %d)\n",
- (int)hvrc);
- goto error_ret;
- }
- spin_unlock_irqrestore(&viodasd_spinlock, flags);
- return 0;
-
-error_ret:
- num_req_outstanding--;
- spin_unlock_irqrestore(&viodasd_spinlock, flags);
- dma_unmap_sg(d->dev, sg, nsg, direction);
- return -1;
-}
-
-/*
- * This is the external request processing routine
- */
-static void do_viodasd_request(struct request_queue *q)
-{
- struct request *req;
-
- /*
- * If we already have the maximum number of requests
- * outstanding to OS/400 just bail out. We'll come
- * back later.
- */
- while (num_req_outstanding < VIOMAXREQ) {
- req = blk_fetch_request(q);
- if (req == NULL)
- return;
- /* check that request contains a valid command */
- if (req->cmd_type != REQ_TYPE_FS) {
- viodasd_end_request(req, -EIO, blk_rq_sectors(req));
- continue;
- }
- /* Try sending the request */
- if (send_request(req) != 0)
- viodasd_end_request(req, -EIO, blk_rq_sectors(req));
- }
-}
-
-/*
- * Probe a single disk and fill in the viodasd_device structure
- * for it.
- */
-static int probe_disk(struct viodasd_device *d)
-{
- HvLpEvent_Rc hvrc;
- struct viodasd_waitevent we;
- int dev_no = DEVICE_NO(d);
- struct gendisk *g;
- struct request_queue *q;
- u16 flags = 0;
-
-retry:
- init_completion(&we.com);
-
- /* Send the open event to OS/400 */
- hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
- HvLpEvent_Type_VirtualIo,
- viomajorsubtype_blockio | vioblockopen,
- HvLpEvent_AckInd_DoAck, HvLpEvent_AckType_ImmediateAck,
- viopath_sourceinst(viopath_hostLp),
- viopath_targetinst(viopath_hostLp),
- (u64)(unsigned long)&we, VIOVERSION << 16,
- ((u64)dev_no << 48) | ((u64)flags<< 32),
- 0, 0, 0);
- if (hvrc != 0) {
- pr_warning("bad rc on HV open %d\n", (int)hvrc);
- return 0;
- }
-
- wait_for_completion(&we.com);
-
- if (we.rc != 0) {
- if (flags != 0)
- return 0;
- /* try again with read only flag set */
- flags = vioblockflags_ro;
- goto retry;
- }
- if (we.max_disk > (MAX_DISKNO - 1)) {
- printk_once(KERN_INFO pr_fmt("Only examining the first %d of %d disks connected\n"),
- MAX_DISKNO, we.max_disk + 1);
- }
-
- /* Send the close event to OS/400. We DON'T expect a response */
- hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
- HvLpEvent_Type_VirtualIo,
- viomajorsubtype_blockio | vioblockclose,
- HvLpEvent_AckInd_NoAck, HvLpEvent_AckType_ImmediateAck,
- viopath_sourceinst(viopath_hostLp),
- viopath_targetinst(viopath_hostLp),
- 0, VIOVERSION << 16,
- ((u64)dev_no << 48) | ((u64)flags << 32),
- 0, 0, 0);
- if (hvrc != 0) {
- pr_warning("bad rc sending event to OS/400 %d\n", (int)hvrc);
- return 0;
- }
-
- if (d->dev == NULL) {
- /* this is when we reprobe for new disks */
- if (vio_create_viodasd(dev_no) == NULL) {
- pr_warning("cannot allocate virtual device for disk %d\n",
- dev_no);
- return 0;
- }
- /*
- * The vio_create_viodasd will have recursed into this
- * routine with d->dev set to the new vio device and
- * will finish the setup of the disk below.
- */
- return 1;
- }
-
- /* create the request queue for the disk */
- spin_lock_init(&d->q_lock);
- q = blk_init_queue(do_viodasd_request, &d->q_lock);
- if (q == NULL) {
- pr_warning("cannot allocate queue for disk %d\n", dev_no);
- return 0;
- }
- g = alloc_disk(1 << PARTITION_SHIFT);
- if (g == NULL) {
- pr_warning("cannot allocate disk structure for disk %d\n",
- dev_no);
- blk_cleanup_queue(q);
- return 0;
- }
-
- d->disk = g;
- blk_queue_max_segments(q, VIOMAXBLOCKDMA);
- blk_queue_max_hw_sectors(q, VIODASD_MAXSECTORS);
- g->major = VIODASD_MAJOR;
- g->first_minor = dev_no << PARTITION_SHIFT;
- if (dev_no >= 26)
- snprintf(g->disk_name, sizeof(g->disk_name),
- VIOD_GENHD_NAME "%c%c",
- 'a' + (dev_no / 26) - 1, 'a' + (dev_no % 26));
- else
- snprintf(g->disk_name, sizeof(g->disk_name),
- VIOD_GENHD_NAME "%c", 'a' + (dev_no % 26));
- g->fops = &viodasd_fops;
- g->queue = q;
- g->private_data = d;
- g->driverfs_dev = d->dev;
- set_capacity(g, d->size >> 9);
-
- pr_info("disk %d: %lu sectors (%lu MB) CHS=%d/%d/%d sector size %d%s\n",
- dev_no, (unsigned long)(d->size >> 9),
- (unsigned long)(d->size >> 20),
- (int)d->cylinders, (int)d->tracks,
- (int)d->sectors, (int)d->bytes_per_sector,
- d->read_only ? " (RO)" : "");
-
- /* register us in the global list */
- add_disk(g);
- return 1;
-}
-
-/* returns the total number of scatterlist elements converted */
-static int block_event_to_scatterlist(const struct vioblocklpevent *bevent,
- struct scatterlist *sg, int *total_len)
-{
- int i, numsg;
- const struct rw_data *rw_data = &bevent->u.rw_data;
- static const int offset =
- offsetof(struct vioblocklpevent, u.rw_data.dma_info);
- static const int element_size = sizeof(rw_data->dma_info[0]);
-
- numsg = ((bevent->event.xSizeMinus1 + 1) - offset) / element_size;
- if (numsg > VIOMAXBLOCKDMA)
- numsg = VIOMAXBLOCKDMA;
-
- *total_len = 0;
- sg_init_table(sg, VIOMAXBLOCKDMA);
- for (i = 0; (i < numsg) && (rw_data->dma_info[i].len > 0); ++i) {
- sg_dma_address(&sg[i]) = rw_data->dma_info[i].token;
- sg_dma_len(&sg[i]) = rw_data->dma_info[i].len;
- *total_len += rw_data->dma_info[i].len;
- }
- return i;
-}
-
-/*
- * Restart all queues, starting with the one _after_ the disk given,
- * thus reducing the chance of starvation of higher numbered disks.
- */
-static void viodasd_restart_all_queues_starting_from(int first_index)
-{
- int i;
-
- for (i = first_index + 1; i < MAX_DISKNO; ++i)
- if (viodasd_devices[i].disk)
- blk_run_queue(viodasd_devices[i].disk->queue);
- for (i = 0; i <= first_index; ++i)
- if (viodasd_devices[i].disk)
- blk_run_queue(viodasd_devices[i].disk->queue);
-}
-
-/*
- * For read and write requests, decrement the number of outstanding requests,
- * Free the DMA buffers we allocated.
- */
-static int viodasd_handle_read_write(struct vioblocklpevent *bevent)
-{
- int num_sg, num_sect, pci_direction, total_len;
- struct request *req;
- struct scatterlist sg[VIOMAXBLOCKDMA];
- struct HvLpEvent *event = &bevent->event;
- unsigned long irq_flags;
- struct viodasd_device *d;
- int error;
- spinlock_t *qlock;
-
- num_sg = block_event_to_scatterlist(bevent, sg, &total_len);
- num_sect = total_len >> 9;
- if (event->xSubtype == (viomajorsubtype_blockio | vioblockread))
- pci_direction = DMA_FROM_DEVICE;
- else
- pci_direction = DMA_TO_DEVICE;
- req = (struct request *)bevent->event.xCorrelationToken;
- d = req->rq_disk->private_data;
-
- dma_unmap_sg(d->dev, sg, num_sg, pci_direction);
-
- /*
- * Since this is running in interrupt mode, we need to make sure
- * we're not stepping on any global I/O operations
- */
- spin_lock_irqsave(&viodasd_spinlock, irq_flags);
- num_req_outstanding--;
- spin_unlock_irqrestore(&viodasd_spinlock, irq_flags);
-
- error = (event->xRc == HvLpEvent_Rc_Good) ? 0 : -EIO;
- if (error) {
- const struct vio_error_entry *err;
- err = vio_lookup_rc(viodasd_err_table, bevent->sub_result);
- pr_warning("read/write error %d:0x%04x (%s)\n",
- event->xRc, bevent->sub_result, err->msg);
- num_sect = blk_rq_sectors(req);
- }
- qlock = req->q->queue_lock;
- spin_lock_irqsave(qlock, irq_flags);
- viodasd_end_request(req, error, num_sect);
- spin_unlock_irqrestore(qlock, irq_flags);
-
- /* Finally, try to get more requests off of this device's queue */
- viodasd_restart_all_queues_starting_from(DEVICE_NO(d));
-
- return 0;
-}
-
-/* This routine handles incoming block LP events */
-static void handle_block_event(struct HvLpEvent *event)
-{
- struct vioblocklpevent *bevent = (struct vioblocklpevent *)event;
- struct viodasd_waitevent *pwe;
-
- if (event == NULL)
- /* Notification that a partition went away! */
- return;
- /* First, we should NEVER get an int here...only acks */
- if (hvlpevent_is_int(event)) {
- pr_warning("Yikes! got an int in viodasd event handler!\n");
- if (hvlpevent_need_ack(event)) {
- event->xRc = HvLpEvent_Rc_InvalidSubtype;
- HvCallEvent_ackLpEvent(event);
- }
- }
-
- switch (event->xSubtype & VIOMINOR_SUBTYPE_MASK) {
- case vioblockopen:
- /*
- * Handle a response to an open request. We get all the
- * disk information in the response, so update it. The
- * correlation token contains a pointer to a waitevent
- * structure that has a completion in it. update the
- * return code in the waitevent structure and post the
- * completion to wake up the guy who sent the request
- */
- pwe = (struct viodasd_waitevent *)event->xCorrelationToken;
- pwe->rc = event->xRc;
- pwe->sub_result = bevent->sub_result;
- if (event->xRc == HvLpEvent_Rc_Good) {
- const struct open_data *data = &bevent->u.open_data;
- struct viodasd_device *device =
- &viodasd_devices[bevent->disk];
- device->read_only =
- bevent->flags & vioblockflags_ro;
- device->size = data->disk_size;
- device->cylinders = data->cylinders;
- device->tracks = data->tracks;
- device->sectors = data->sectors;
- device->bytes_per_sector = data->bytes_per_sector;
- pwe->max_disk = data->max_disk;
- }
- complete(&pwe->com);
- break;
- case vioblockclose:
- break;
- case vioblockread:
- case vioblockwrite:
- viodasd_handle_read_write(bevent);
- break;
-
- default:
- pr_warning("invalid subtype!");
- if (hvlpevent_need_ack(event)) {
- event->xRc = HvLpEvent_Rc_InvalidSubtype;
- HvCallEvent_ackLpEvent(event);
- }
- }
-}
-
-/*
- * Get the driver to reprobe for more disks.
- */
-static ssize_t probe_disks(struct device_driver *drv, const char *buf,
- size_t count)
-{
- struct viodasd_device *d;
-
- for (d = viodasd_devices; d < &viodasd_devices[MAX_DISKNO]; d++) {
- if (d->disk == NULL)
- probe_disk(d);
- }
- return count;
-}
-static DRIVER_ATTR(probe, S_IWUSR, NULL, probe_disks);
-
-static int viodasd_probe(struct vio_dev *vdev, const struct vio_device_id *id)
-{
- struct viodasd_device *d = &viodasd_devices[vdev->unit_address];
-
- d->dev = &vdev->dev;
- if (!probe_disk(d))
- return -ENODEV;
- return 0;
-}
-
-static int viodasd_remove(struct vio_dev *vdev)
-{
- struct viodasd_device *d;
-
- d = &viodasd_devices[vdev->unit_address];
- if (d->disk) {
- del_gendisk(d->disk);
- blk_cleanup_queue(d->disk->queue);
- put_disk(d->disk);
- d->disk = NULL;
- }
- d->dev = NULL;
- return 0;
-}
-
-/**
- * viodasd_device_table: Used by vio.c to match devices that we
- * support.
- */
-static struct vio_device_id viodasd_device_table[] __devinitdata = {
- { "block", "IBM,iSeries-viodasd" },
- { "", "" }
-};
-MODULE_DEVICE_TABLE(vio, viodasd_device_table);
-
-static struct vio_driver viodasd_driver = {
- .id_table = viodasd_device_table,
- .probe = viodasd_probe,
- .remove = viodasd_remove,
- .driver = {
- .name = "viodasd",
- .owner = THIS_MODULE,
- }
-};
-
-static int need_delete_probe;
-
-/*
- * Initialize the whole device driver. Handle module and non-module
- * versions
- */
-static int __init viodasd_init(void)
-{
- int rc;
-
- if (!firmware_has_feature(FW_FEATURE_ISERIES)) {
- rc = -ENODEV;
- goto early_fail;
- }
-
- /* Try to open to our host lp */
- if (viopath_hostLp == HvLpIndexInvalid)
- vio_set_hostlp();
-
- if (viopath_hostLp == HvLpIndexInvalid) {
- pr_warning("invalid hosting partition\n");
- rc = -EIO;
- goto early_fail;
- }
-
- pr_info("vers " VIOD_VERS ", hosting partition %d\n", viopath_hostLp);
-
- /* register the block device */
- rc = register_blkdev(VIODASD_MAJOR, VIOD_GENHD_NAME);
- if (rc) {
- pr_warning("Unable to get major number %d for %s\n",
- VIODASD_MAJOR, VIOD_GENHD_NAME);
- goto early_fail;
- }
- /* Actually open the path to the hosting partition */
- rc = viopath_open(viopath_hostLp, viomajorsubtype_blockio,
- VIOMAXREQ + 2);
- if (rc) {
- pr_warning("error opening path to host partition %d\n",
- viopath_hostLp);
- goto unregister_blk;
- }
-
- /* Initialize our request handler */
- vio_setHandler(viomajorsubtype_blockio, handle_block_event);
-
- rc = vio_register_driver(&viodasd_driver);
- if (rc) {
- pr_warning("vio_register_driver failed\n");
- goto unset_handler;
- }
-
- /*
- * If this call fails, it just means that we cannot dynamically
- * add virtual disks, but the driver will still work fine for
- * all existing disk, so ignore the failure.
- */
- if (!driver_create_file(&viodasd_driver.driver, &driver_attr_probe))
- need_delete_probe = 1;
-
- return 0;
-
-unset_handler:
- vio_clearHandler(viomajorsubtype_blockio);
- viopath_close(viopath_hostLp, viomajorsubtype_blockio, VIOMAXREQ + 2);
-unregister_blk:
- unregister_blkdev(VIODASD_MAJOR, VIOD_GENHD_NAME);
-early_fail:
- return rc;
-}
-module_init(viodasd_init);
-
-void __exit viodasd_exit(void)
-{
- if (need_delete_probe)
- driver_remove_file(&viodasd_driver.driver, &driver_attr_probe);
- vio_unregister_driver(&viodasd_driver);
- vio_clearHandler(viomajorsubtype_blockio);
- viopath_close(viopath_hostLp, viomajorsubtype_blockio, VIOMAXREQ + 2);
- unregister_blkdev(VIODASD_MAJOR, VIOD_GENHD_NAME);
-}
-module_exit(viodasd_exit);
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index c4a60badf252..0d39f2f4294a 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -351,6 +351,7 @@ static void virtblk_config_changed_work(struct work_struct *work)
cap_str_10, cap_str_2);
set_capacity(vblk->disk, capacity);
+ revalidate_disk(vblk->disk);
done:
mutex_unlock(&vblk->config_lock);
}
@@ -374,6 +375,34 @@ static int init_vq(struct virtio_blk *vblk)
return err;
}
+/*
+ * Legacy naming scheme used for virtio devices. We are stuck with it for
+ * virtio blk but don't ever use it for any new driver.
+ */
+static int virtblk_name_format(char *prefix, int index, char *buf, int buflen)
+{
+ const int base = 'z' - 'a' + 1;
+ char *begin = buf + strlen(prefix);
+ char *end = buf + buflen;
+ char *p;
+ int unit;
+
+ p = end - 1;
+ *p = '\0';
+ unit = base;
+ do {
+ if (p == begin)
+ return -EINVAL;
+ *--p = 'a' + (index % unit);
+ index = (index / unit) - 1;
+ } while (index >= 0);
+
+ memmove(begin, p, end - p);
+ memcpy(buf, prefix, strlen(prefix));
+
+ return 0;
+}
+
static int __devinit virtblk_probe(struct virtio_device *vdev)
{
struct virtio_blk *vblk;
@@ -442,18 +471,7 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
q->queuedata = vblk;
- if (index < 26) {
- sprintf(vblk->disk->disk_name, "vd%c", 'a' + index % 26);
- } else if (index < (26 + 1) * 26) {
- sprintf(vblk->disk->disk_name, "vd%c%c",
- 'a' + index / 26 - 1, 'a' + index % 26);
- } else {
- const unsigned int m1 = (index / 26 - 1) / 26 - 1;
- const unsigned int m2 = (index / 26 - 1) % 26;
- const unsigned int m3 = index % 26;
- sprintf(vblk->disk->disk_name, "vd%c%c%c",
- 'a' + m1, 'a' + m2, 'a' + m3);
- }
+ virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN);
vblk->disk->major = major;
vblk->disk->first_minor = index_to_minor(index);
diff --git a/drivers/block/xd.c b/drivers/block/xd.c
index 51a972704db5..ff540520bada 100644
--- a/drivers/block/xd.c
+++ b/drivers/block/xd.c
@@ -52,7 +52,6 @@
#include <linux/io.h>
#include <linux/gfp.h>
-#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/dma.h>
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 0088bf60f368..73f196ca713f 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -321,6 +321,7 @@ struct seg_buf {
static void xen_blkbk_unmap(struct pending_req *req)
{
struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+ struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
unsigned int i, invcount = 0;
grant_handle_t handle;
int ret;
@@ -332,25 +333,12 @@ static void xen_blkbk_unmap(struct pending_req *req)
gnttab_set_unmap_op(&unmap[invcount], vaddr(req, i),
GNTMAP_host_map, handle);
pending_handle(req, i) = BLKBACK_INVALID_HANDLE;
+ pages[invcount] = virt_to_page(vaddr(req, i));
invcount++;
}
- ret = HYPERVISOR_grant_table_op(
- GNTTABOP_unmap_grant_ref, unmap, invcount);
+ ret = gnttab_unmap_refs(unmap, pages, invcount, false);
BUG_ON(ret);
- /*
- * Note, we use invcount, so nr->pages, so we can't index
- * using vaddr(req, i).
- */
- for (i = 0; i < invcount; i++) {
- ret = m2p_remove_override(
- virt_to_page(unmap[i].host_addr), false);
- if (ret) {
- pr_alert(DRV_PFX "Failed to remove M2P override for %lx\n",
- (unsigned long)unmap[i].host_addr);
- continue;
- }
- }
}
static int xen_blkbk_map(struct blkif_request *req,
@@ -378,7 +366,7 @@ static int xen_blkbk_map(struct blkif_request *req,
pending_req->blkif->domid);
}
- ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg);
+ ret = gnttab_map_refs(map, NULL, &blkbk->pending_page(pending_req, 0), nseg);
BUG_ON(ret);
/*
@@ -398,15 +386,6 @@ static int xen_blkbk_map(struct blkif_request *req,
if (ret)
continue;
- ret = m2p_add_override(PFN_DOWN(map[i].dev_bus_addr),
- blkbk->pending_page(pending_req, i), NULL);
- if (ret) {
- pr_alert(DRV_PFX "Failed to install M2P override for %lx (ret: %d)\n",
- (unsigned long)map[i].dev_bus_addr, ret);
- /* We could switch over to GNTTABOP_copy */
- continue;
- }
-
seg[i].buf = map[i].dev_bus_addr |
(req->u.rw.seg[i].first_sect << 9);
}
@@ -419,21 +398,18 @@ static int dispatch_discard_io(struct xen_blkif *blkif,
int err = 0;
int status = BLKIF_RSP_OKAY;
struct block_device *bdev = blkif->vbd.bdev;
+ unsigned long secure;
blkif->st_ds_req++;
xen_blkif_get(blkif);
- if (blkif->blk_backend_type == BLKIF_BACKEND_PHY ||
- blkif->blk_backend_type == BLKIF_BACKEND_FILE) {
- unsigned long secure = (blkif->vbd.discard_secure &&
- (req->u.discard.flag & BLKIF_DISCARD_SECURE)) ?
- BLKDEV_DISCARD_SECURE : 0;
- err = blkdev_issue_discard(bdev,
- req->u.discard.sector_number,
- req->u.discard.nr_sectors,
- GFP_KERNEL, secure);
- } else
- err = -EOPNOTSUPP;
+ secure = (blkif->vbd.discard_secure &&
+ (req->u.discard.flag & BLKIF_DISCARD_SECURE)) ?
+ BLKDEV_DISCARD_SECURE : 0;
+
+ err = blkdev_issue_discard(bdev, req->u.discard.sector_number,
+ req->u.discard.nr_sectors,
+ GFP_KERNEL, secure);
if (err == -EOPNOTSUPP) {
pr_debug(DRV_PFX "discard op failed, not supported\n");
@@ -830,7 +806,7 @@ static int __init xen_blkif_init(void)
int i, mmap_pages;
int rc = 0;
- if (!xen_pv_domain())
+ if (!xen_domain())
return -ENODEV;
blkbk = kzalloc(sizeof(struct xen_blkbk), GFP_KERNEL);
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index d0ee7edc9be8..773cf27dc23f 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -146,11 +146,6 @@ enum blkif_protocol {
BLKIF_PROTOCOL_X86_64 = 3,
};
-enum blkif_backend_type {
- BLKIF_BACKEND_PHY = 1,
- BLKIF_BACKEND_FILE = 2,
-};
-
struct xen_vbd {
/* What the domain refers to this vbd as. */
blkif_vdev_t handle;
@@ -177,7 +172,6 @@ struct xen_blkif {
unsigned int irq;
/* Comms information. */
enum blkif_protocol blk_protocol;
- enum blkif_backend_type blk_backend_type;
union blkif_back_rings blk_rings;
void *blk_ring;
/* The VBD attached to this interface. */
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 24a2fb57e5d0..89860f34a7ec 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -381,72 +381,49 @@ int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
err = xenbus_printf(xbt, dev->nodename, "feature-flush-cache",
"%d", state);
if (err)
- xenbus_dev_fatal(dev, err, "writing feature-flush-cache");
+ dev_warn(&dev->dev, "writing feature-flush-cache (%d)", err);
return err;
}
-int xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info *be)
+static void xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info *be)
{
struct xenbus_device *dev = be->dev;
struct xen_blkif *blkif = be->blkif;
- char *type;
int err;
int state = 0;
+ struct block_device *bdev = be->blkif->vbd.bdev;
+ struct request_queue *q = bdev_get_queue(bdev);
- type = xenbus_read(XBT_NIL, dev->nodename, "type", NULL);
- if (!IS_ERR(type)) {
- if (strncmp(type, "file", 4) == 0) {
- state = 1;
- blkif->blk_backend_type = BLKIF_BACKEND_FILE;
+ if (blk_queue_discard(q)) {
+ err = xenbus_printf(xbt, dev->nodename,
+ "discard-granularity", "%u",
+ q->limits.discard_granularity);
+ if (err) {
+ dev_warn(&dev->dev, "writing discard-granularity (%d)", err);
+ return;
}
- if (strncmp(type, "phy", 3) == 0) {
- struct block_device *bdev = be->blkif->vbd.bdev;
- struct request_queue *q = bdev_get_queue(bdev);
- if (blk_queue_discard(q)) {
- err = xenbus_printf(xbt, dev->nodename,
- "discard-granularity", "%u",
- q->limits.discard_granularity);
- if (err) {
- xenbus_dev_fatal(dev, err,
- "writing discard-granularity");
- goto kfree;
- }
- err = xenbus_printf(xbt, dev->nodename,
- "discard-alignment", "%u",
- q->limits.discard_alignment);
- if (err) {
- xenbus_dev_fatal(dev, err,
- "writing discard-alignment");
- goto kfree;
- }
- state = 1;
- blkif->blk_backend_type = BLKIF_BACKEND_PHY;
- }
- /* Optional. */
- err = xenbus_printf(xbt, dev->nodename,
- "discard-secure", "%d",
- blkif->vbd.discard_secure);
- if (err) {
- xenbus_dev_fatal(dev, err,
- "writting discard-secure");
- goto kfree;
- }
+ err = xenbus_printf(xbt, dev->nodename,
+ "discard-alignment", "%u",
+ q->limits.discard_alignment);
+ if (err) {
+ dev_warn(&dev->dev, "writing discard-alignment (%d)", err);
+ return;
+ }
+ state = 1;
+ /* Optional. */
+ err = xenbus_printf(xbt, dev->nodename,
+ "discard-secure", "%d",
+ blkif->vbd.discard_secure);
+ if (err) {
+ dev_warn(dev-dev, "writing discard-secure (%d)", err);
+ return;
}
- } else {
- err = PTR_ERR(type);
- xenbus_dev_fatal(dev, err, "reading type");
- goto out;
}
-
err = xenbus_printf(xbt, dev->nodename, "feature-discard",
"%d", state);
if (err)
- xenbus_dev_fatal(dev, err, "writing feature-discard");
-kfree:
- kfree(type);
-out:
- return err;
+ dev_warn(&dev->dev, "writing feature-discard (%d)", err);
}
int xen_blkbk_barrier(struct xenbus_transaction xbt,
struct backend_info *be, int state)
@@ -457,7 +434,7 @@ int xen_blkbk_barrier(struct xenbus_transaction xbt,
err = xenbus_printf(xbt, dev->nodename, "feature-barrier",
"%d", state);
if (err)
- xenbus_dev_fatal(dev, err, "writing feature-barrier");
+ dev_warn(&dev->dev, "writing feature-barrier (%d)", err);
return err;
}
@@ -689,14 +666,12 @@ again:
return;
}
- err = xen_blkbk_flush_diskcache(xbt, be, be->blkif->vbd.flush_support);
- if (err)
- goto abort;
+ /* If we can't advertise it is OK. */
+ xen_blkbk_flush_diskcache(xbt, be, be->blkif->vbd.flush_support);
- err = xen_blkbk_discard(xbt, be);
+ xen_blkbk_discard(xbt, be);
- /* If we can't advertise it is OK. */
- err = xen_blkbk_barrier(xbt, be, be->blkif->vbd.flush_support);
+ xen_blkbk_barrier(xbt, be, be->blkif->vbd.flush_support);
err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
(unsigned long long)vbd_sz(&be->blkif->vbd));
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 2f22874c0a37..4e86393a09cf 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -43,6 +43,7 @@
#include <linux/slab.h>
#include <linux/mutex.h>
#include <linux/scatterlist.h>
+#include <linux/bitmap.h>
#include <xen/xen.h>
#include <xen/xenbus.h>
@@ -81,6 +82,7 @@ static const struct block_device_operations xlvbd_block_fops;
*/
struct blkfront_info
{
+ spinlock_t io_lock;
struct mutex mutex;
struct xenbus_device *xbdev;
struct gendisk *gd;
@@ -105,8 +107,6 @@ struct blkfront_info
int is_ready;
};
-static DEFINE_SPINLOCK(blkif_io_lock);
-
static unsigned int nr_minors;
static unsigned long *minors;
static DEFINE_SPINLOCK(minor_lock);
@@ -177,8 +177,7 @@ static int xlbd_reserve_minors(unsigned int minor, unsigned int nr)
spin_lock(&minor_lock);
if (find_next_bit(minors, end, minor) >= end) {
- for (; minor < end; ++minor)
- __set_bit(minor, minors);
+ bitmap_set(minors, minor, nr);
rc = 0;
} else
rc = -EBUSY;
@@ -193,8 +192,7 @@ static void xlbd_release_minors(unsigned int minor, unsigned int nr)
BUG_ON(end > nr_minors);
spin_lock(&minor_lock);
- for (; minor < end; ++minor)
- __clear_bit(minor, minors);
+ bitmap_clear(minors, minor, nr);
spin_unlock(&minor_lock);
}
@@ -419,7 +417,7 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
struct request_queue *rq;
struct blkfront_info *info = gd->private_data;
- rq = blk_init_queue(do_blkif_request, &blkif_io_lock);
+ rq = blk_init_queue(do_blkif_request, &info->io_lock);
if (rq == NULL)
return -1;
@@ -636,14 +634,14 @@ static void xlvbd_release_gendisk(struct blkfront_info *info)
if (info->rq == NULL)
return;
- spin_lock_irqsave(&blkif_io_lock, flags);
+ spin_lock_irqsave(&info->io_lock, flags);
/* No more blkif_request(). */
blk_stop_queue(info->rq);
/* No more gnttab callback work. */
gnttab_cancel_free_callback(&info->callback);
- spin_unlock_irqrestore(&blkif_io_lock, flags);
+ spin_unlock_irqrestore(&info->io_lock, flags);
/* Flush gnttab callback work. Must be done with no locks held. */
flush_work_sync(&info->work);
@@ -675,16 +673,16 @@ static void blkif_restart_queue(struct work_struct *work)
{
struct blkfront_info *info = container_of(work, struct blkfront_info, work);
- spin_lock_irq(&blkif_io_lock);
+ spin_lock_irq(&info->io_lock);
if (info->connected == BLKIF_STATE_CONNECTED)
kick_pending_request_queues(info);
- spin_unlock_irq(&blkif_io_lock);
+ spin_unlock_irq(&info->io_lock);
}
static void blkif_free(struct blkfront_info *info, int suspend)
{
/* Prevent new requests being issued until we fix things up. */
- spin_lock_irq(&blkif_io_lock);
+ spin_lock_irq(&info->io_lock);
info->connected = suspend ?
BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
/* No more blkif_request(). */
@@ -692,7 +690,7 @@ static void blkif_free(struct blkfront_info *info, int suspend)
blk_stop_queue(info->rq);
/* No more gnttab callback work. */
gnttab_cancel_free_callback(&info->callback);
- spin_unlock_irq(&blkif_io_lock);
+ spin_unlock_irq(&info->io_lock);
/* Flush gnttab callback work. Must be done with no locks held. */
flush_work_sync(&info->work);
@@ -728,10 +726,10 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
struct blkfront_info *info = (struct blkfront_info *)dev_id;
int error;
- spin_lock_irqsave(&blkif_io_lock, flags);
+ spin_lock_irqsave(&info->io_lock, flags);
if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {
- spin_unlock_irqrestore(&blkif_io_lock, flags);
+ spin_unlock_irqrestore(&info->io_lock, flags);
return IRQ_HANDLED;
}
@@ -816,7 +814,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
kick_pending_request_queues(info);
- spin_unlock_irqrestore(&blkif_io_lock, flags);
+ spin_unlock_irqrestore(&info->io_lock, flags);
return IRQ_HANDLED;
}
@@ -991,6 +989,7 @@ static int blkfront_probe(struct xenbus_device *dev,
}
mutex_init(&info->mutex);
+ spin_lock_init(&info->io_lock);
info->xbdev = dev;
info->vdevice = vdevice;
info->connected = BLKIF_STATE_DISCONNECTED;
@@ -1068,7 +1067,7 @@ static int blkif_recover(struct blkfront_info *info)
xenbus_switch_state(info->xbdev, XenbusStateConnected);
- spin_lock_irq(&blkif_io_lock);
+ spin_lock_irq(&info->io_lock);
/* Now safe for us to use the shared ring */
info->connected = BLKIF_STATE_CONNECTED;
@@ -1079,7 +1078,7 @@ static int blkif_recover(struct blkfront_info *info)
/* Kick any other new requests queued since we resumed */
kick_pending_request_queues(info);
- spin_unlock_irq(&blkif_io_lock);
+ spin_unlock_irq(&info->io_lock);
return 0;
}
@@ -1277,10 +1276,10 @@ static void blkfront_connect(struct blkfront_info *info)
xenbus_switch_state(info->xbdev, XenbusStateConnected);
/* Kick pending requests. */
- spin_lock_irq(&blkif_io_lock);
+ spin_lock_irq(&info->io_lock);
info->connected = BLKIF_STATE_CONNECTED;
kick_pending_request_queues(info);
- spin_unlock_irq(&blkif_io_lock);
+ spin_unlock_irq(&info->io_lock);
add_disk(info->gd);
@@ -1410,7 +1409,6 @@ static int blkif_release(struct gendisk *disk, fmode_t mode)
mutex_lock(&blkfront_mutex);
bdev = bdget_disk(disk, 0);
- bdput(bdev);
if (bdev->bd_openers)
goto out;
@@ -1441,6 +1439,7 @@ static int blkif_release(struct gendisk *disk, fmode_t mode)
}
out:
+ bdput(bdev);
mutex_unlock(&blkfront_mutex);
return 0;
}
@@ -1475,6 +1474,9 @@ static int __init xlblk_init(void)
if (!xen_domain())
return -ENODEV;
+ if (xen_hvm_domain() && !xen_platform_pci_unplug)
+ return -ENODEV;
+
if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) {
printk(KERN_WARNING "xen_blk: can't get major %d with name %s\n",
XENVBD_MAJOR, DEV_NAME);