summaryrefslogtreecommitdiff
path: root/drivers/scsi/aacraid/linit.c
diff options
context:
space:
mode:
authorMark Haverkamp <markh@linux-foundation.org>2007-03-15 20:27:45 +0300
committerJames Bottomley <jejb@mulgrave.il.steeleye.com>2007-03-20 18:56:03 +0300
commit03d4433721880bf1972c924b168e4e1dd3c59d53 (patch)
tree820ad2f8aea551d03e7ff8751aba7dfe1f674bcd /drivers/scsi/aacraid/linit.c
parentf2b1a06ad46209c6e631e3099138d1fa3f14d3a8 (diff)
downloadlinux-03d4433721880bf1972c924b168e4e1dd3c59d53.tar.xz
[SCSI] aacraid: Improved error handling
Received from Mark Salyzyn, This set of fixes improve error handling stability of the driver. A popular manifestation of the problems is an NULL pointer reference in the interrupt handler when referencing portions of the scsi command context, or in the scsi_done handling when an offlined device is referenced. The aacraid driver currently does not get notification of orphaned command completions due to devices going offline. The driver also fails to handle the commands that are finished by the error handler, and thus can complete again later at the hands of the adapter causing situations of completion of an invalid scsi command context. Test Unit Ready calls abort assuming that the abort was successful, but are not, and thus when the interrupt from the adapter occurs, they reference invalid command contexts. We add in a TIMED_OUT flag to inform the aacraid FIB context that the interrupt service should merely release the driver resources and not complete the command up. We take advantage of this with the abort handler as well for select abortable commands. And we detect and react if a command that can not be aborted is currently still outstanding to the controller when reissued by the retry mechanism. Signed-off-by: Mark Haverkamp <markh@linux-foundation.org> Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
Diffstat (limited to 'drivers/scsi/aacraid/linit.c')
-rw-r--r--drivers/scsi/aacraid/linit.c64
1 files changed, 60 insertions, 4 deletions
diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c
index 0f948c2fb609..3cf3f6472e94 100644
--- a/drivers/scsi/aacraid/linit.c
+++ b/drivers/scsi/aacraid/linit.c
@@ -5,7 +5,7 @@
* based on the old aacraid driver that is..
* Adaptec aacraid device driver for Linux.
*
- * Copyright (c) 2000 Adaptec, Inc. (aacraid@adaptec.com)
+ * Copyright (c) 2000-2007 Adaptec, Inc. (aacraid@adaptec.com)
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -247,6 +247,19 @@ static struct aac_driver_ident aac_drivers[] = {
static int aac_queuecommand(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *))
{
+ struct Scsi_Host *host = cmd->device->host;
+ struct aac_dev *dev = (struct aac_dev *)host->hostdata;
+ u32 count = 0;
+ cmd->scsi_done = done;
+ for (; count < (host->can_queue + AAC_NUM_MGT_FIB); ++count) {
+ struct fib * fib = &dev->fibs[count];
+ struct scsi_cmnd * command;
+ if (fib->hw_fib_va->header.XferState &&
+ ((command = fib->callback_data)) &&
+ (command == cmd) &&
+ (cmd->SCp.phase == AAC_OWNER_FIRMWARE))
+ return 0; /* Already owned by Adapter */
+ }
cmd->scsi_done = done;
cmd->SCp.phase = AAC_OWNER_LOWLEVEL;
return (aac_scsi_cmd(cmd) ? FAILED : 0);
@@ -446,6 +459,40 @@ static int aac_ioctl(struct scsi_device *sdev, int cmd, void __user * arg)
return aac_do_ioctl(dev, cmd, arg);
}
+static int aac_eh_abort(struct scsi_cmnd* cmd)
+{
+ struct Scsi_Host * host = cmd->device->host;
+ struct aac_dev * aac = (struct aac_dev *)host->hostdata;
+ int count;
+ int ret = FAILED;
+
+ printk(KERN_ERR "%s: Host adapter abort request (%d,%d,%d,%d)\n",
+ AAC_DRIVERNAME,
+ cmd->device->host->host_no, sdev_channel(cmd->device),
+ sdev_id(cmd->device), cmd->device->lun);
+ switch (cmd->cmnd[0]) {
+ case SERVICE_ACTION_IN:
+ if (!(aac->raw_io_interface) ||
+ !(aac->raw_io_64) ||
+ ((cmd->cmnd[1] & 0x1f) != SAI_READ_CAPACITY_16))
+ break;
+ case INQUIRY:
+ case READ_CAPACITY:
+ case TEST_UNIT_READY:
+ /* Mark associated FIB to not complete, eh handler does this */
+ for (count = 0; count < (host->can_queue + AAC_NUM_MGT_FIB); ++count) {
+ struct fib * fib = &aac->fibs[count];
+ if (fib->hw_fib_va->header.XferState &&
+ (fib->callback_data == cmd)) {
+ fib->flags |= FIB_CONTEXT_FLAG_TIMED_OUT;
+ cmd->SCp.phase = AAC_OWNER_ERROR_HANDLER;
+ ret = SUCCESS;
+ }
+ }
+ }
+ return ret;
+}
+
/*
* aac_eh_reset - Reset command handling
* @scsi_cmd: SCSI command block causing the reset
@@ -457,12 +504,20 @@ static int aac_eh_reset(struct scsi_cmnd* cmd)
struct Scsi_Host * host = dev->host;
struct scsi_cmnd * command;
int count;
- struct aac_dev * aac;
+ struct aac_dev * aac = (struct aac_dev *)host->hostdata;
unsigned long flags;
+ /* Mark the associated FIB to not complete, eh handler does this */
+ for (count = 0; count < (host->can_queue + AAC_NUM_MGT_FIB); ++count) {
+ struct fib * fib = &aac->fibs[count];
+ if (fib->hw_fib_va->header.XferState &&
+ (fib->callback_data == cmd)) {
+ fib->flags |= FIB_CONTEXT_FLAG_TIMED_OUT;
+ cmd->SCp.phase = AAC_OWNER_ERROR_HANDLER;
+ }
+ }
printk(KERN_ERR "%s: Host adapter reset request. SCSI hang ?\n",
AAC_DRIVERNAME);
- aac = (struct aac_dev *)host->hostdata;
if ((count = aac_check_health(aac)))
return count;
@@ -496,7 +551,7 @@ static int aac_eh_reset(struct scsi_cmnd* cmd)
ssleep(1);
}
printk(KERN_ERR "%s: SCSI bus appears hung\n", AAC_DRIVERNAME);
- return -ETIMEDOUT;
+ return SUCCESS; /* Cause an immediate retry of the command with a ten second delay after successful tur */
}
/**
@@ -796,6 +851,7 @@ static struct scsi_host_template aac_driver_template = {
.bios_param = aac_biosparm,
.shost_attrs = aac_attrs,
.slave_configure = aac_slave_configure,
+ .eh_abort_handler = aac_eh_abort,
.eh_host_reset_handler = aac_eh_reset,
.can_queue = AAC_NUM_IO_FIB,
.this_id = MAXIMUM_NUM_CONTAINERS,