diff options
author | Stephen M. Cameron <scameron@beardog.cce.hp.com> | 2014-02-18 23:55:33 +0400 |
---|---|---|
committer | James Bottomley <JBottomley@Parallels.com> | 2014-03-15 21:19:03 +0400 |
commit | 283b4a9b98b192ebc0e15351fd6fb60e1be78c5d (patch) | |
tree | 82ec3ca33ea7352e347dcb3b152b6ab95a1c107f | |
parent | 17eb87d216a0d8a9fa9852f331a7c6afb9f45312 (diff) | |
download | linux-283b4a9b98b192ebc0e15351fd6fb60e1be78c5d.tar.xz |
[SCSI] hpsa: add ioaccell mode 1 RAID offload support.
This enables sending i/o's destined for RAID logical drives
which can be serviced by a single physical disk down a different,
faster i/o path directly to physical drives for certain logical
volumes on SSDs bypassing the Smart Array RAID stack for a
performance improvement.
Signed-off-by: Matt Gates <matthew.gates@hp.com>
Signed-off-by: Stephen M. Cameron <scameron@beardog.cce.hp.com>
Signed-off-by: Scott Teel <scott.teel@hp.com>
Signed-off-by: Mike Miller <michael.miller@canonical.com>
Signed-off-by: Don Brace <brace@beardog.cce.hp.com>
Signed-off-by: Joe Handzik <joseph.t.handzik@hp.com>
Signed-off-by: James Bottomley <JBottomley@Parallels.com>
-rw-r--r-- | drivers/scsi/hpsa.c | 565 | ||||
-rw-r--r-- | drivers/scsi/hpsa.h | 26 | ||||
-rw-r--r-- | drivers/scsi/hpsa_cmd.h | 65 |
3 files changed, 607 insertions, 49 deletions
diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index 4b6db4c39b24..e8489a964ea4 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -49,6 +49,7 @@ #include <linux/atomic.h> #include <linux/kthread.h> #include <linux/jiffies.h> +#include <asm/div64.h> #include "hpsa_cmd.h" #include "hpsa.h" @@ -216,6 +217,7 @@ static int hpsa_lookup_board_id(struct pci_dev *pdev, u32 *board_id); static int hpsa_wait_for_board_state(struct pci_dev *pdev, void __iomem *vaddr, int wait_for_ready); static inline void finish_cmd(struct CommandList *c); +static void hpsa_wait_for_mode_change_ack(struct ctlr_info *h); #define BOARD_NOT_READY 0 #define BOARD_READY 1 @@ -1195,6 +1197,7 @@ static void complete_scsi_command(struct CommandList *cp) struct scsi_cmnd *cmd; struct ctlr_info *h; struct ErrorInfo *ei; + struct hpsa_scsi_dev_t *dev; unsigned char sense_key; unsigned char asc; /* additional sense code */ @@ -1204,6 +1207,7 @@ static void complete_scsi_command(struct CommandList *cp) ei = cp->err_info; cmd = (struct scsi_cmnd *) cp->scsi_cmd; h = cp->h; + dev = cmd->device->hostdata; scsi_dma_unmap(cmd); /* undo the DMA mappings */ if ((cp->cmd_type == CMD_SCSI) && @@ -1242,6 +1246,19 @@ static void complete_scsi_command(struct CommandList *cp) cp->Header.Tag.upper = c->Tag.upper; memcpy(cp->Header.LUN.LunAddrBytes, c->CISS_LUN, 8); memcpy(cp->Request.CDB, c->CDB, cp->Request.CDBLen); + + /* Any RAID offload error results in retry which will use + * the normal I/O path so the controller can handle whatever's + * wrong. + */ + if (is_logical_dev_addr_mode(dev->scsi3addr)) { + if (ei->CommandStatus == CMD_IOACCEL_DISABLED) + dev->offload_enabled = 0; + cmd->result = DID_SOFT_ERROR << 16; + cmd_free(h, cp); + cmd->scsi_done(cmd); + return; + } } /* an error has occurred */ @@ -1406,6 +1423,14 @@ static void complete_scsi_command(struct CommandList *cp) cmd->result = DID_ERROR << 16; dev_warn(&h->pdev->dev, "Command unabortable\n"); break; + case CMD_IOACCEL_DISABLED: + /* This only handles the direct pass-through case since RAID + * offload is handled above. Just attempt a retry. + */ + cmd->result = DID_SOFT_ERROR << 16; + dev_warn(&h->pdev->dev, + "cp %p had HP SSD Smart Path error\n", cp); + break; default: cmd->result = DID_ERROR << 16; dev_warn(&h->pdev->dev, "cp %p returned unknown status %x\n", @@ -1650,6 +1675,147 @@ static void hpsa_get_raid_level(struct ctlr_info *h, return; } +#define HPSA_MAP_DEBUG +#ifdef HPSA_MAP_DEBUG +static void hpsa_debug_map_buff(struct ctlr_info *h, int rc, + struct raid_map_data *map_buff) +{ + struct raid_map_disk_data *dd = &map_buff->data[0]; + int map, row, col; + u16 map_cnt, row_cnt, disks_per_row; + + if (rc != 0) + return; + + dev_info(&h->pdev->dev, "structure_size = %u\n", + le32_to_cpu(map_buff->structure_size)); + dev_info(&h->pdev->dev, "volume_blk_size = %u\n", + le32_to_cpu(map_buff->volume_blk_size)); + dev_info(&h->pdev->dev, "volume_blk_cnt = 0x%llx\n", + le64_to_cpu(map_buff->volume_blk_cnt)); + dev_info(&h->pdev->dev, "physicalBlockShift = %u\n", + map_buff->phys_blk_shift); + dev_info(&h->pdev->dev, "parity_rotation_shift = %u\n", + map_buff->parity_rotation_shift); + dev_info(&h->pdev->dev, "strip_size = %u\n", + le16_to_cpu(map_buff->strip_size)); + dev_info(&h->pdev->dev, "disk_starting_blk = 0x%llx\n", + le64_to_cpu(map_buff->disk_starting_blk)); + dev_info(&h->pdev->dev, "disk_blk_cnt = 0x%llx\n", + le64_to_cpu(map_buff->disk_blk_cnt)); + dev_info(&h->pdev->dev, "data_disks_per_row = %u\n", + le16_to_cpu(map_buff->data_disks_per_row)); + dev_info(&h->pdev->dev, "metadata_disks_per_row = %u\n", + le16_to_cpu(map_buff->metadata_disks_per_row)); + dev_info(&h->pdev->dev, "row_cnt = %u\n", + le16_to_cpu(map_buff->row_cnt)); + dev_info(&h->pdev->dev, "layout_map_count = %u\n", + le16_to_cpu(map_buff->layout_map_count)); + + map_cnt = le16_to_cpu(map_buff->layout_map_count); + for (map = 0; map < map_cnt; map++) { + dev_info(&h->pdev->dev, "Map%u:\n", map); + row_cnt = le16_to_cpu(map_buff->row_cnt); + for (row = 0; row < row_cnt; row++) { + dev_info(&h->pdev->dev, " Row%u:\n", row); + disks_per_row = + le16_to_cpu(map_buff->data_disks_per_row); + for (col = 0; col < disks_per_row; col++, dd++) + dev_info(&h->pdev->dev, + " D%02u: h=0x%04x xor=%u,%u\n", + col, dd->ioaccel_handle, + dd->xor_mult[0], dd->xor_mult[1]); + disks_per_row = + le16_to_cpu(map_buff->metadata_disks_per_row); + for (col = 0; col < disks_per_row; col++, dd++) + dev_info(&h->pdev->dev, + " M%02u: h=0x%04x xor=%u,%u\n", + col, dd->ioaccel_handle, + dd->xor_mult[0], dd->xor_mult[1]); + } + } +} +#else +static void hpsa_debug_map_buff(__attribute__((unused)) struct ctlr_info *h, + __attribute__((unused)) int rc, + __attribute__((unused)) struct raid_map_data *map_buff) +{ +} +#endif + +static int hpsa_get_raid_map(struct ctlr_info *h, + unsigned char *scsi3addr, struct hpsa_scsi_dev_t *this_device) +{ + int rc = 0; + struct CommandList *c; + struct ErrorInfo *ei; + + c = cmd_special_alloc(h); + if (c == NULL) { + dev_warn(&h->pdev->dev, "cmd_special_alloc returned NULL!\n"); + return -ENOMEM; + } + if (fill_cmd(c, HPSA_GET_RAID_MAP, h, &this_device->raid_map, + sizeof(this_device->raid_map), 0, + scsi3addr, TYPE_CMD)) { + dev_warn(&h->pdev->dev, "Out of memory in hpsa_get_raid_map()\n"); + cmd_special_free(h, c); + return -ENOMEM; + } + hpsa_scsi_do_simple_cmd_with_retry(h, c, PCI_DMA_FROMDEVICE); + ei = c->err_info; + if (ei->CommandStatus != 0 && ei->CommandStatus != CMD_DATA_UNDERRUN) { + hpsa_scsi_interpret_error(c); + cmd_special_free(h, c); + return -1; + } + cmd_special_free(h, c); + + /* @todo in the future, dynamically allocate RAID map memory */ + if (le32_to_cpu(this_device->raid_map.structure_size) > + sizeof(this_device->raid_map)) { + dev_warn(&h->pdev->dev, "RAID map size is too large!\n"); + rc = -1; + } + hpsa_debug_map_buff(h, rc, &this_device->raid_map); + return rc; +} + +static void hpsa_get_ioaccel_status(struct ctlr_info *h, + unsigned char *scsi3addr, struct hpsa_scsi_dev_t *this_device) +{ + int rc; + unsigned char *buf; + u8 ioaccel_status; + + this_device->offload_config = 0; + this_device->offload_enabled = 0; + + buf = kzalloc(64, GFP_KERNEL); + if (!buf) + return; + rc = hpsa_scsi_do_inquiry(h, scsi3addr, + HPSA_VPD_LV_IOACCEL_STATUS, buf, 64); + if (rc != 0) + goto out; + +#define IOACCEL_STATUS_BYTE 4 +#define OFFLOAD_CONFIGURED_BIT 0x01 +#define OFFLOAD_ENABLED_BIT 0x02 + ioaccel_status = buf[IOACCEL_STATUS_BYTE]; + this_device->offload_config = + !!(ioaccel_status & OFFLOAD_CONFIGURED_BIT); + if (this_device->offload_config) { + this_device->offload_enabled = + !!(ioaccel_status & OFFLOAD_ENABLED_BIT); + if (hpsa_get_raid_map(h, scsi3addr, this_device)) + this_device->offload_enabled = 0; + } +out: + kfree(buf); + return; +} + /* Get the device id from inquiry page 0x83 */ static int hpsa_get_device_id(struct ctlr_info *h, unsigned char *scsi3addr, unsigned char *device_id, int buflen) @@ -1698,6 +1864,14 @@ static int hpsa_scsi_do_report_luns(struct ctlr_info *h, int logical, ei->CommandStatus != CMD_DATA_UNDERRUN) { hpsa_scsi_interpret_error(c); rc = -1; + } else { + if (buf->extended_response_flag != extended_response) { + dev_err(&h->pdev->dev, + "report luns requested format %u, got %u\n", + extended_response, + buf->extended_response_flag); + rc = -1; + } } out: cmd_special_free(h, c); @@ -1763,10 +1937,15 @@ static int hpsa_update_device_info(struct ctlr_info *h, sizeof(this_device->device_id)); if (this_device->devtype == TYPE_DISK && - is_logical_dev_addr_mode(scsi3addr)) + is_logical_dev_addr_mode(scsi3addr)) { hpsa_get_raid_level(h, scsi3addr, &this_device->raid_level); - else + if (h->fw_support & MISC_FW_RAID_OFFLOAD_BASIC) + hpsa_get_ioaccel_status(h, scsi3addr, this_device); + } else { this_device->raid_level = RAID_UNKNOWN; + this_device->offload_config = 0; + this_device->offload_enabled = 0; + } if (is_OBDR_device) { /* See if this is a One-Button-Disaster-Recovery device @@ -1903,15 +2082,25 @@ static int add_ext_target_dev(struct ctlr_info *h, */ static int hpsa_gather_lun_info(struct ctlr_info *h, int reportlunsize, - struct ReportLUNdata *physdev, u32 *nphysicals, + struct ReportLUNdata *physdev, u32 *nphysicals, int *physical_mode, struct ReportLUNdata *logdev, u32 *nlogicals) { + int physical_entry_size = 8; + + *physical_mode = 0; + + /* For I/O accelerator mode we need to read physical device handles */ + if (h->transMethod & CFGTBL_Trans_io_accel1) { + *physical_mode = HPSA_REPORT_PHYS_EXTENDED; + physical_entry_size = 24; + } if (hpsa_scsi_do_report_phys_luns(h, physdev, reportlunsize, - HPSA_REPORT_PHYS_EXTENDED)) { + *physical_mode)) { dev_err(&h->pdev->dev, "report physical LUNs failed.\n"); return -1; } - *nphysicals = be32_to_cpu(*((__be32 *)physdev->LUNListLength)) / 24; + *nphysicals = be32_to_cpu(*((__be32 *)physdev->LUNListLength)) / + physical_entry_size; if (*nphysicals > HPSA_MAX_PHYS_LUN) { dev_warn(&h->pdev->dev, "maximum physical LUNs (%d) exceeded." " %d LUNs ignored.\n", HPSA_MAX_PHYS_LUN, @@ -1983,10 +2172,11 @@ static void hpsa_update_scsi_devices(struct ctlr_info *h, int hostno) struct ReportLUNdata *logdev_list = NULL; u32 nphysicals = 0; u32 nlogicals = 0; + int physical_mode = 0; u32 ndev_allocated = 0; struct hpsa_scsi_dev_t **currentsd, *this_device, *tmpdevice; int ncurrent = 0; - int reportlunsize = sizeof(*physdev_list) + HPSA_MAX_PHYS_LUN * 8; + int reportlunsize = sizeof(*physdev_list) + HPSA_MAX_PHYS_LUN * 24; int i, n_ext_target_devs, ndevs_to_allocate; int raid_ctlr_position; DECLARE_BITMAP(lunzerobits, MAX_EXT_TARGETS); @@ -2004,7 +2194,7 @@ static void hpsa_update_scsi_devices(struct ctlr_info *h, int hostno) if (hpsa_gather_lun_info(h, reportlunsize, (struct ReportLUNdata *) physdev_list, &nphysicals, - logdev_list, &nlogicals)) + &physical_mode, logdev_list, &nlogicals)) goto out; /* We might see up to the maximum number of logical and physical disks @@ -2085,12 +2275,16 @@ static void hpsa_update_scsi_devices(struct ctlr_info *h, int hostno) ncurrent++; break; case TYPE_DISK: - if (i < nphysicals) + if (i >= nphysicals) { + ncurrent++; break; - memcpy(&this_device->ioaccel_handle, - &lunaddrbytes[20], - sizeof(this_device->ioaccel_handle)); - ncurrent++; + } + if (physical_mode == HPSA_REPORT_PHYS_EXTENDED) { + memcpy(&this_device->ioaccel_handle, + &lunaddrbytes[20], + sizeof(this_device->ioaccel_handle)); + ncurrent++; + } break; case TYPE_TAPE: case TYPE_MEDIUM_CHANGER: @@ -2184,15 +2378,62 @@ sglist_finished: return 0; } +#define IO_ACCEL_INELIGIBLE (1) +static int fixup_ioaccel_cdb(u8 *cdb, int *cdb_len) +{ + int is_write = 0; + u32 block; + u32 block_cnt; + + /* Perform some CDB fixups if needed using 10 byte reads/writes only */ + switch (cdb[0]) { + case WRITE_6: + case WRITE_12: + is_write = 1; + case READ_6: + case READ_12: + if (*cdb_len == 6) { + block = (((u32) cdb[2]) << 8) | cdb[3]; + block_cnt = cdb[4]; + } else { + BUG_ON(*cdb_len != 12); + block = (((u32) cdb[2]) << 24) | + (((u32) cdb[3]) << 16) | + (((u32) cdb[4]) << 8) | + cdb[5]; + block_cnt = + (((u32) cdb[6]) << 24) | + (((u32) cdb[7]) << 16) | + (((u32) cdb[8]) << 8) | + cdb[9]; + } + if (block_cnt > 0xffff) + return IO_ACCEL_INELIGIBLE; + + cdb[0] = is_write ? WRITE_10 : READ_10; + cdb[1] = 0; + cdb[2] = (u8) (block >> 24); + cdb[3] = (u8) (block >> 16); + cdb[4] = (u8) (block >> 8); + cdb[5] = (u8) (block); + cdb[6] = 0; + cdb[7] = (u8) (block_cnt >> 8); + cdb[8] = (u8) (block_cnt); + cdb[9] = 0; + *cdb_len = 10; + break; + } + return 0; +} + /* * Queue a command to the I/O accelerator path. - * This method does not currently support S/G chaining. */ static int hpsa_scsi_ioaccel_queue_command(struct ctlr_info *h, - struct CommandList *c) + struct CommandList *c, u32 ioaccel_handle, u8 *cdb, int cdb_len, + u8 *scsi3addr) { struct scsi_cmnd *cmd = c->scsi_cmd; - struct hpsa_scsi_dev_t *dev = cmd->device->hostdata; struct io_accel1_cmd *cp = &h->ioaccel_cmd_pool[c->cmdindex]; unsigned int len; unsigned int total_len = 0; @@ -2202,8 +2443,15 @@ static int hpsa_scsi_ioaccel_queue_command(struct ctlr_info *h, struct SGDescriptor *curr_sg; u32 control = IOACCEL1_CONTROL_SIMPLEQUEUE; + /* TODO: implement chaining support */ + if (scsi_sg_count(cmd) > h->ioaccel_maxsg) + return IO_ACCEL_INELIGIBLE; + BUG_ON(cmd->cmd_len > IOACCEL1_IOFLAGS_CDBLEN_MAX); + if (fixup_ioaccel_cdb(cdb, &cdb_len)) + return IO_ACCEL_INELIGIBLE; + c->cmd_type = CMD_IOACCEL1; /* Adjust the DMA address to point to the accelerated command buffer */ @@ -2254,13 +2502,13 @@ static int hpsa_scsi_ioaccel_queue_command(struct ctlr_info *h, } /* Fill out the command structure to submit */ - cp->dev_handle = dev->ioaccel_handle; + cp->dev_handle = ioaccel_handle & 0xFFFF; cp->transfer_len = total_len; cp->io_flags = IOACCEL1_IOFLAGS_IO_REQ | - (cmd->cmd_len & IOACCEL1_IOFLAGS_CDBLEN_MASK); + (cdb_len & IOACCEL1_IOFLAGS_CDBLEN_MASK); cp->control = control; - memcpy(cp->CDB, cmd->cmnd, cmd->cmd_len); - memcpy(cp->CISS_LUN, dev->scsi3addr, 8); + memcpy(cp->CDB, cdb, cdb_len); + memcpy(cp->CISS_LUN, scsi3addr, 8); /* Tell the controller to post the reply to the queue for this * processor. This seems to give the best I/O throughput. @@ -2274,15 +2522,214 @@ static int hpsa_scsi_ioaccel_queue_command(struct ctlr_info *h, */ c->busaddr |= 1 | (h->ioaccel1_blockFetchTable[use_sg] << 1) | IOACCEL1_BUSADDR_CMDTYPE; - - /* execute command (bypassing cmd queue if possible) */ - if (unlikely(h->access.fifo_full(h))) - enqueue_cmd_and_start_io(h, c); - else - h->access.submit_command(h, c); + enqueue_cmd_and_start_io(h, c); return 0; } +/* + * Queue a command directly to a device behind the controller using the + * I/O accelerator path. + */ +static int hpsa_scsi_ioaccel_direct_map(struct ctlr_info *h, + struct CommandList *c) +{ + struct scsi_cmnd *cmd = c->scsi_cmd; + struct hpsa_scsi_dev_t *dev = cmd->device->hostdata; + + return hpsa_scsi_ioaccel_queue_command(h, c, dev->ioaccel_handle, + cmd->cmnd, cmd->cmd_len, dev->scsi3addr); +} + +/* + * Attempt to perform offload RAID mapping for a logical volume I/O. + */ +static int hpsa_scsi_ioaccel_raid_map(struct ctlr_info *h, + struct CommandList *c) +{ + struct scsi_cmnd *cmd = c->scsi_cmd; + struct hpsa_scsi_dev_t *dev = cmd->device->hostdata; + struct raid_map_data *map = &dev->raid_map; + struct raid_map_disk_data *dd = &map->data[0]; + int is_write = 0; + u32 map_index; + u64 first_block, last_block; + u32 block_cnt; + u32 blocks_per_row; + u64 first_row, last_row; + u32 first_row_offset, last_row_offset; + u32 first_column, last_column; + u32 map_row; + u32 disk_handle; + u64 disk_block; + u32 disk_block_cnt; + u8 cdb[16]; + u8 cdb_len; +#if BITS_PER_LONG == 32 + u64 tmpdiv; +#endif + + BUG_ON(!(dev->offload_config && dev->offload_enabled)); + + /* check for valid opcode, get LBA and block count */ + switch (cmd->cmnd[0]) { + case WRITE_6: + is_write = 1; + case READ_6: + first_block = + (((u64) cmd->cmnd[2]) << 8) | + cmd->cmnd[3]; + block_cnt = cmd->cmnd[4]; + break; + case WRITE_10: + is_write = 1; + case READ_10: + first_block = + (((u64) cmd->cmnd[2]) << 24) | + (((u64) cmd->cmnd[3]) << 16) | + (((u64) cmd->cmnd[4]) << 8) | + cmd->cmnd[5]; + block_cnt = + (((u32) cmd->cmnd[7]) << 8) | + cmd->cmnd[8]; + break; + case WRITE_12: + is_write = 1; + case READ_12: + first_block = + (((u64) cmd->cmnd[2]) << 24) | + (((u64) cmd->cmnd[3]) << 16) | + (((u64) cmd->cmnd[4]) << 8) | + cmd->cmnd[5]; + block_cnt = + (((u32) cmd->cmnd[6]) << 24) | + (((u32) cmd->cmnd[7]) << 16) | + (((u32) cmd->cmnd[8]) << 8) | + cmd->cmnd[9]; + break; + case WRITE_16: + is_write = 1; + case READ_16: + first_block = + (((u64) cmd->cmnd[2]) << 56) | + (((u64) cmd->cmnd[3]) << 48) | + (((u64) cmd->cmnd[4]) << 40) | + (((u64) cmd->cmnd[5]) << 32) | + (((u64) cmd->cmnd[6]) << 24) | + (((u64) cmd->cmnd[7]) << 16) | + (((u64) cmd->cmnd[8]) << 8) | + cmd->cmnd[9]; + block_cnt = + (((u32) cmd->cmnd[10]) << 24) | + (((u32) cmd->cmnd[11]) << 16) | + (((u32) cmd->cmnd[12]) << 8) | + cmd->cmnd[13]; + break; + default: + return IO_ACCEL_INELIGIBLE; /* process via normal I/O path */ + } + BUG_ON(block_cnt == 0); + last_block = first_block + block_cnt - 1; + + /* check for write to non-RAID-0 */ + if (is_write && dev->raid_level != 0) + return IO_ACCEL_INELIGIBLE; + + /* check for invalid block or wraparound */ + if (last_block >= map->volume_blk_cnt || last_block < first_block) + return IO_ACCEL_INELIGIBLE; + + /* calculate stripe information for the request */ + blocks_per_row = map->data_disks_per_row * map->strip_size; +#if BITS_PER_LONG == 32 + tmpdiv = first_block; + (void) do_div(tmpdiv, blocks_per_row); + first_row = tmpdiv; + tmpdiv = last_block; + (void) do_div(tmpdiv, blocks_per_row); + last_row = tmpdiv; + first_row_offset = (u32) (first_block - (first_row * blocks_per_row)); + last_row_offset = (u32) (last_block - (last_row * blocks_per_row)); + tmpdiv = first_row_offset; + (void) do_div(tmpdiv, map->strip_size); + first_column = tmpdiv; + tmpdiv = last_row_offset; + (void) do_div(tmpdiv, map->strip_size); + last_column = tmpdiv; +#else + first_row = first_block / blocks_per_row; + last_row = last_block / blocks_per_row; + first_row_offset = (u32) (first_block - (first_row * blocks_per_row)); + last_row_offset = (u32) (last_block - (last_row * blocks_per_row)); + first_column = first_row_offset / map->strip_size; + last_column = last_row_offset / map->strip_size; +#endif + + /* if this isn't a single row/column then give to the controller */ + if ((first_row != last_row) || (first_column != last_column)) + return IO_ACCEL_INELIGIBLE; + + /* proceeding with driver mapping */ + map_row = ((u32)(first_row >> map->parity_rotation_shift)) % + map->row_cnt; + map_index = (map_row * (map->data_disks_per_row + + map->metadata_disks_per_row)) + first_column; + if (dev->raid_level == 2) { + /* simple round-robin balancing of RAID 1+0 reads across + * primary and mirror members. this is appropriate for SSD + * but not optimal for HDD. + */ + if (dev->offload_to_mirror) + map_index += map->data_disks_per_row; + dev->offload_to_mirror = !dev->offload_to_mirror; + } + disk_handle = dd[map_index].ioaccel_handle; + disk_block = map->disk_starting_blk + (first_row * map->strip_size) + + (first_row_offset - (first_column * map->strip_size)); + disk_block_cnt = block_cnt; + + /* handle differing logical/physical block sizes */ + if (map->phys_blk_shift) { + disk_block <<= map->phys_blk_shift; + disk_block_cnt <<= map->phys_blk_shift; + } + BUG_ON(disk_block_cnt > 0xffff); + + /* build the new CDB for the physical disk I/O */ + if (disk_block > 0xffffffff) { + cdb[0] = is_write ? WRITE_16 : READ_16; + cdb[1] = 0; + cdb[2] = (u8) (disk_block >> 56); + cdb[3] = (u8) (disk_block >> 48); + cdb[4] = (u8) (disk_block >> 40); + cdb[5] = (u8) (disk_block >> 32); + cdb[6] = (u8) (disk_block >> 24); + cdb[7] = (u8) (disk_block >> 16); + cdb[8] = (u8) (disk_block >> 8); + cdb[9] = (u8) (disk_block); + cdb[10] = (u8) (disk_block_cnt >> 24); + cdb[11] = (u8) (disk_block_cnt >> 16); + cdb[12] = (u8) (disk_block_cnt >> 8); + cdb[13] = (u8) (disk_block_cnt); + cdb[14] = 0; + cdb[15] = 0; + cdb_len = 16; + } else { + cdb[0] = is_write ? WRITE_10 : READ_10; + cdb[1] = 0; + cdb[2] = (u8) (disk_block >> 24); + cdb[3] = (u8) (disk_block >> 16); + cdb[4] = (u8) (disk_block >> 8); + cdb[5] = (u8) (disk_block); + cdb[6] = 0; + cdb[7] = (u8) (disk_block_cnt >> 8); + cdb[8] = (u8) (disk_block_cnt); + cdb[9] = 0; + cdb_len = 10; + } + return hpsa_scsi_ioaccel_queue_command(h, c, disk_handle, cdb, cdb_len, + dev->scsi3addr); +} + static int hpsa_scsi_queue_command_lck(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *)) { @@ -2291,6 +2738,7 @@ static int hpsa_scsi_queue_command_lck(struct scsi_cmnd *cmd, unsigned char scsi3addr[8]; struct CommandList *c; unsigned long flags; + int rc = 0; /* Get the ptr to our adapter structure out of cmd->host. */ h = sdev_to_hba(cmd->device); @@ -2326,13 +2774,29 @@ static int hpsa_scsi_queue_command_lck(struct scsi_cmnd *cmd, c->cmd_type = CMD_SCSI; c->scsi_cmd = cmd; - /* Call alternate submit routine for I/O accelerated commands */ - if ((likely(h->transMethod & CFGTBL_Trans_io_accel1)) && - (dev->ioaccel_handle) && - ((cmd->cmnd[0] == READ_10) || (cmd->cmnd[0] == WRITE_10)) && - (scsi_sg_count(cmd) <= IOACCEL1_MAXSGENTRIES) && - likely(cmd->request->cmd_type == REQ_TYPE_FS)) - return hpsa_scsi_ioaccel_queue_command(h, c); + /* Call alternate submit routine for I/O accelerated commands. + * Retries always go down the normal I/O path. + */ + if (likely(cmd->retries == 0 && + cmd->request->cmd_type == REQ_TYPE_FS)) { + if (dev->offload_enabled) { + rc = hpsa_scsi_ioaccel_raid_map(h, c); + if (rc == 0) + return 0; /* Sent on ioaccel path */ + if (rc < 0) { /* scsi_dma_map failed. */ + cmd_free(h, c); + return SCSI_MLQUEUE_HOST_BUSY; + } + } else if (dev->ioaccel_handle) { + rc = hpsa_scsi_ioaccel_direct_map(h, c); + if (rc == 0) + return 0; /* Sent on direct map path */ + if (rc < 0) { /* scsi_dma_map failed. */ + cmd_free(h, c); + return SCSI_MLQUEUE_HOST_BUSY; + } + } + } c->Header.ReplyQueue = 0; /* unused in simple mode */ memcpy(&c->Header.LUN.LunAddrBytes[0], &scsi3addr[0], 8); @@ -3515,6 +3979,18 @@ static int fill_cmd(struct CommandList *c, u8 cmd, struct ctlr_info *h, c->Request.Type.Direction = XFER_NONE; c->Request.Timeout = 0; break; + case HPSA_GET_RAID_MAP: + c->Request.CDBLen = 12; + c->Request.Type.Attribute = ATTR_SIMPLE; + c->Request.Type.Direction = XFER_READ; + c->Request.Timeout = 0; + c->Request.CDB[0] = HPSA_CISS_READ; + c->Request.CDB[1] = cmd; + c->Request.CDB[6] = (size >> 24) & 0xFF; /* MSB */ + c->Request.CDB[7] = (size >> 16) & 0xFF; + c->Request.CDB[8] = (size >> 8) & 0xFF; + c->Request.CDB[9] = size & 0xFF; + break; default: dev_warn(&h->pdev->dev, "unknown command 0x%c\n", cmd); BUG(); @@ -4485,6 +4961,7 @@ static void hpsa_find_board_params(struct ctlr_info *h) hpsa_get_max_perf_mode_cmds(h); h->nr_cmds = h->max_commands - 4; /* Allow room for some ioctls */ h->maxsgentries = readl(&(h->cfgtable->MaxScatterGatherElements)); + h->fw_support = readl(&(h->cfgtable->misc_fw_support)); /* * Limit in-command s/g elements to 32 save dma'able memory. * Howvever spec says if 0, use 31 @@ -4569,18 +5046,19 @@ static int hpsa_enter_simple_mode(struct ctlr_info *h) return -ENOTSUPP; h->max_commands = readl(&(h->cfgtable->CmdsOutMax)); + /* Update the field, and then ring the doorbell */ writel(CFGTBL_Trans_Simple, &(h->cfgtable->HostWrite.TransportRequest)); writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL); hpsa_wait_for_mode_change_ack(h); print_cfg_table(&h->pdev->dev, h->cfgtable); - if (!(readl(&(h->cfgtable->TransportActive)) & CFGTBL_Trans_Simple)) { - dev_warn(&h->pdev->dev, - "unable to get board into simple mode\n"); - return -ENODEV; - } + if (!(readl(&(h->cfgtable->TransportActive)) & CFGTBL_Trans_Simple)) + goto error; h->transMethod = CFGTBL_Trans_Simple; return 0; +error: + dev_warn(&h->pdev->dev, "unable to get board into simple mode\n"); + return -ENODEV; } static int hpsa_pci_init(struct ctlr_info *h) @@ -4961,7 +5439,7 @@ reinit_after_soft_reset: * the 5 lower bits of the address are used by the hardware. and by * the driver. See comments in hpsa.h for more info. */ -#define COMMANDLIST_ALIGNMENT 32 +#define COMMANDLIST_ALIGNMENT 128 BUILD_BUG_ON(sizeof(struct CommandList) % COMMANDLIST_ALIGNMENT); h = kzalloc(sizeof(*h), GFP_KERNEL); if (!h) @@ -5338,8 +5816,8 @@ static void hpsa_enter_performant_mode(struct ctlr_info *h, u32 trans_support) h->reply_queue[i].current_entry = readl(h->vaddr + IOACCEL_MODE1_PRODUCER_INDEX); } - bft[7] = IOACCEL1_MAXSGENTRIES + 8; - calc_bucket_map(bft, ARRAY_SIZE(bft), IOACCEL1_MAXSGENTRIES, 8, + bft[7] = h->ioaccel_maxsg + 8; + calc_bucket_map(bft, ARRAY_SIZE(bft), h->ioaccel_maxsg, 8, h->ioaccel1_blockFetchTable); /* initialize all reply queue entries to unused */ @@ -5370,6 +5848,11 @@ static void hpsa_enter_performant_mode(struct ctlr_info *h, u32 trans_support) static int hpsa_alloc_ioaccel_cmd_and_bft(struct ctlr_info *h) { + h->ioaccel_maxsg = + readl(&(h->cfgtable->io_accel_max_embedded_sg_count)); + if (h->ioaccel_maxsg > IOACCEL1_MAXSGENTRIES) + h->ioaccel_maxsg = IOACCEL1_MAXSGENTRIES; + /* Command structures must be aligned on a 128-byte boundary * because the 7 lower bits of the address are used by the * hardware. @@ -5383,7 +5866,7 @@ static int hpsa_alloc_ioaccel_cmd_and_bft(struct ctlr_info *h) &(h->ioaccel_cmd_pool_dhandle)); h->ioaccel1_blockFetchTable = - kmalloc(((IOACCEL1_MAXSGENTRIES + 1) * + kmalloc(((h->ioaccel_maxsg + 1) * sizeof(u32)), GFP_KERNEL); if ((h->ioaccel_cmd_pool == NULL) || diff --git a/drivers/scsi/hpsa.h b/drivers/scsi/hpsa.h index c7865f30ffd1..ae08f1c46272 100644 --- a/drivers/scsi/hpsa.h +++ b/drivers/scsi/hpsa.h @@ -47,6 +47,13 @@ struct hpsa_scsi_dev_t { unsigned char model[16]; /* bytes 16-31 of inquiry data */ unsigned char raid_level; /* from inquiry page 0xC1 */ u32 ioaccel_handle; + int offload_config; /* I/O accel RAID offload configured */ + int offload_enabled; /* I/O accel RAID offload enabled */ + int offload_to_mirror; /* Send next I/O accelerator RAID + * offload request to mirror drive + */ + struct raid_map_data raid_map; /* I/O accelerator RAID map */ + }; struct reply_pool { @@ -133,6 +140,10 @@ struct ctlr_info { u32 *blockFetchTable; u32 *ioaccel1_blockFetchTable; unsigned char *hba_inquiry_data; + u32 driver_support; + u32 fw_support; + int ioaccel_support; + int ioaccel_maxsg; u64 last_intr_timestamp; u32 last_heartbeat; u64 last_heartbeat_timestamp; @@ -406,8 +417,7 @@ static bool SA5_ioaccel_mode1_intr_pending(struct ctlr_info *h) #define IOACCEL_MODE1_CONSUMER_INDEX 0x1BC #define IOACCEL_MODE1_REPLY_UNUSED 0xFFFFFFFFFFFFFFFFULL -static unsigned long SA5_ioaccel_mode1_completed(struct ctlr_info *h, - u8 q) +static unsigned long SA5_ioaccel_mode1_completed(struct ctlr_info *h, u8 q) { u64 register_value; struct reply_pool *rq = &h->reply_queue[q]; @@ -420,12 +430,18 @@ static unsigned long SA5_ioaccel_mode1_completed(struct ctlr_info *h, rq->head[rq->current_entry] = IOACCEL_MODE1_REPLY_UNUSED; if (++rq->current_entry == rq->size) rq->current_entry = 0; + /* + * @todo + * + * Don't really need to write the new index after each command, + * but with current driver design this is easiest. + */ + wmb(); + writel((q << 24) | rq->current_entry, h->vaddr + + IOACCEL_MODE1_CONSUMER_INDEX); spin_lock_irqsave(&h->lock, flags); h->commands_outstanding--; spin_unlock_irqrestore(&h->lock, flags); - } else { - writel((q << 24) | rq->current_entry, - h->vaddr + IOACCEL_MODE1_CONSUMER_INDEX); } return (unsigned long) register_value; } diff --git a/drivers/scsi/hpsa_cmd.h b/drivers/scsi/hpsa_cmd.h index e682d2e6b387..c1ae8d2a6bf2 100644 --- a/drivers/scsi/hpsa_cmd.h +++ b/drivers/scsi/hpsa_cmd.h @@ -42,6 +42,8 @@ #define CMD_UNSOLICITED_ABORT 0x000A #define CMD_TIMEOUT 0x000B #define CMD_UNABORTABLE 0x000C +#define CMD_IOACCEL_DISABLED 0x000E + /* Unit Attentions ASC's as defined for the MSA2012sa */ #define POWER_OR_RESET 0x29 @@ -137,6 +139,11 @@ #define CFGTBL_BusType_Ultra3 0x00000002l #define CFGTBL_BusType_Fibre1G 0x00000100l #define CFGTBL_BusType_Fibre2G 0x00000200l + +/* VPD Inquiry types */ +#define HPSA_VPD_LV_DEVICE_GEOMETRY 0xC1 +#define HPSA_VPD_LV_IOACCEL_STATUS 0xC2 + struct vals32 { u32 lower; u32 upper; @@ -165,9 +172,46 @@ struct InquiryData { #define HPSA_REPORT_LOG 0xc2 /* Report Logical LUNs */ #define HPSA_REPORT_PHYS 0xc3 /* Report Physical LUNs */ #define HPSA_REPORT_PHYS_EXTENDED 0x02 +#define HPSA_CISS_READ 0xc0 /* CISS Read */ +#define HPSA_GET_RAID_MAP 0xc8 /* CISS Get RAID Layout Map */ + +#define RAID_MAP_MAX_ENTRIES 256 + +struct raid_map_disk_data { + u32 ioaccel_handle; /**< Handle to access this disk via the + * I/O accelerator */ + u8 xor_mult[2]; /**< XOR multipliers for this position, + * valid for data disks only */ + u8 reserved[2]; +}; + +struct raid_map_data { + u32 structure_size; /* Size of entire structure in bytes */ + u32 volume_blk_size; /* bytes / block in the volume */ + u64 volume_blk_cnt; /* logical blocks on the volume */ + u8 phys_blk_shift; /* Shift factor to convert between + * units of logical blocks and physical + * disk blocks */ + u8 parity_rotation_shift; /* Shift factor to convert between units + * of logical stripes and physical + * stripes */ + u16 strip_size; /* blocks used on each disk / stripe */ + u64 disk_starting_blk; /* First disk block used in volume */ + u64 disk_blk_cnt; /* disk blocks used by volume / disk */ + u16 data_disks_per_row; /* data disk entries / row in the map */ + u16 metadata_disks_per_row; /* mirror/parity disk entries / row + * in the map */ + u16 row_cnt; /* rows in each layout map */ + u16 layout_map_count; /* layout maps (1 map per mirror/parity + * group) */ + u8 reserved[20]; + struct raid_map_disk_data data[RAID_MAP_MAX_ENTRIES]; +}; + struct ReportLUNdata { u8 LUNListLength[4]; - u32 reserved; + u8 extended_response_flag; + u8 reserved[3]; u8 LUN[HPSA_MAX_LUN][8]; }; @@ -331,7 +375,7 @@ struct CommandList { */ #define IS_32_BIT ((8 - sizeof(long))/4) #define IS_64_BIT (!IS_32_BIT) -#define PAD_32 (4) +#define PAD_32 (36) #define PAD_64 (4) #define COMMANDLIST_PAD (IS_32_BIT * PAD_32 + IS_64_BIT * PAD_64) u8 pad[COMMANDLIST_PAD]; @@ -371,6 +415,11 @@ struct io_accel1_cmd { struct vals32 host_addr; /* 0x70 - 0x77 */ u8 CISS_LUN[8]; /* 0x78 - 0x7F */ struct SGDescriptor SG[IOACCEL1_MAXSGENTRIES]; +#define IOACCEL1_PAD_64 0 +#define IOACCEL1_PAD_32 0 +#define IOACCEL1_PAD (IS_32_BIT * IOACCEL1_PAD_32 + \ + IS_64_BIT * IOACCEL1_PAD_64) + u8 pad[IOACCEL1_PAD]; }; #define IOACCEL1_FUNCTION_SCSIIO 0x00 @@ -407,6 +456,8 @@ struct HostWrite { #define MEMQ_MODE 0x08 #define IOACCEL_MODE_1 0x80 +#define DRIVER_SUPPORT_UA_ENABLE 0x00000001 + struct CfgTable { u8 Signature[4]; u32 SpecValence; @@ -435,8 +486,16 @@ struct CfgTable { u32 misc_fw_support; /* offset 0x78 */ #define MISC_FW_DOORBELL_RESET (0x02) #define MISC_FW_DOORBELL_RESET2 (0x010) +#define MISC_FW_RAID_OFFLOAD_BASIC (0x020) +#define MISC_FW_EVENT_NOTIFY (0x080) u8 driver_version[32]; - + u32 max_cached_write_size; + u8 driver_scratchpad[16]; + u32 max_error_info_length; + u32 io_accel_max_embedded_sg_count; + u32 io_accel_request_size_offset; + u32 event_notify; + u32 clear_event_notify; }; #define NUM_BLOCKFETCH_ENTRIES 8 |