diff options
Diffstat (limited to 'drivers/scsi/scsi_error.c')
-rw-r--r-- | drivers/scsi/scsi_error.c | 158 |
1 files changed, 49 insertions, 109 deletions
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 0c5b02d4c7f8..c0ae9e965f6f 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -417,43 +417,20 @@ static int scsi_eh_completed_normally(struct scsi_cmnd *scmd) } /** - * scsi_eh_times_out - timeout function for error handling. - * @scmd: Cmd that is timing out. - * - * Notes: - * During error handling, the kernel thread will be sleeping waiting - * for some action to complete on the device. our only job is to - * record that it timed out, and to wake up the thread. - **/ -static void scsi_eh_times_out(struct scsi_cmnd *scmd) -{ - scmd->eh_eflags |= SCSI_EH_REC_TIMEOUT; - SCSI_LOG_ERROR_RECOVERY(3, printk("%s: scmd:%p\n", __FUNCTION__, - scmd)); - - up(scmd->device->host->eh_action); -} - -/** * scsi_eh_done - Completion function for error handling. * @scmd: Cmd that is done. **/ static void scsi_eh_done(struct scsi_cmnd *scmd) { - /* - * if the timeout handler is already running, then just set the - * flag which says we finished late, and return. we have no - * way of stopping the timeout handler from running, so we must - * always defer to it. - */ - if (del_timer(&scmd->eh_timeout)) { - scmd->request->rq_status = RQ_SCSI_DONE; + struct completion *eh_action; - SCSI_LOG_ERROR_RECOVERY(3, printk("%s scmd: %p result: %x\n", - __FUNCTION__, scmd, scmd->result)); + SCSI_LOG_ERROR_RECOVERY(3, + printk("%s scmd: %p result: %x\n", + __FUNCTION__, scmd, scmd->result)); - up(scmd->device->host->eh_action); - } + eh_action = scmd->device->host->eh_action; + if (eh_action) + complete(eh_action); } /** @@ -461,10 +438,6 @@ static void scsi_eh_done(struct scsi_cmnd *scmd) * @scmd: SCSI Cmd to send. * @timeout: Timeout for cmd. * - * Notes: - * The initialization of the structures is quite a bit different in - * this case, and furthermore, there is a different completion handler - * vs scsi_dispatch_cmd. * Return value: * SUCCESS or FAILED or NEEDS_RETRY **/ @@ -472,24 +445,16 @@ static int scsi_send_eh_cmnd(struct scsi_cmnd *scmd, int timeout) { struct scsi_device *sdev = scmd->device; struct Scsi_Host *shost = sdev->host; - DECLARE_MUTEX_LOCKED(sem); + DECLARE_COMPLETION(done); + unsigned long timeleft; unsigned long flags; - int rtn = SUCCESS; + int rtn; - /* - * we will use a queued command if possible, otherwise we will - * emulate the queuing and calling of completion function ourselves. - */ if (sdev->scsi_level <= SCSI_2) scmd->cmnd[1] = (scmd->cmnd[1] & 0x1f) | (sdev->lun << 5 & 0xe0); - scsi_add_timer(scmd, timeout, scsi_eh_times_out); - - /* - * set up the semaphore so we wait for the command to complete. - */ - shost->eh_action = &sem; + shost->eh_action = &done; scmd->request->rq_status = RQ_SCSI_BUSY; spin_lock_irqsave(shost->host_lock, flags); @@ -497,47 +462,29 @@ static int scsi_send_eh_cmnd(struct scsi_cmnd *scmd, int timeout) shost->hostt->queuecommand(scmd, scsi_eh_done); spin_unlock_irqrestore(shost->host_lock, flags); - down(&sem); - scsi_log_completion(scmd, SUCCESS); + timeleft = wait_for_completion_timeout(&done, timeout); + scmd->request->rq_status = RQ_SCSI_DONE; shost->eh_action = NULL; - /* - * see if timeout. if so, tell the host to forget about it. - * in other words, we don't want a callback any more. - */ - if (scmd->eh_eflags & SCSI_EH_REC_TIMEOUT) { - scmd->eh_eflags &= ~SCSI_EH_REC_TIMEOUT; - - /* - * as far as the low level driver is - * concerned, this command is still active, so - * we must give the low level driver a chance - * to abort it. (db) - * - * FIXME(eric) - we are not tracking whether we could - * abort a timed out command or not. not sure how - * we should treat them differently anyways. - */ - if (shost->hostt->eh_abort_handler) - shost->hostt->eh_abort_handler(scmd); - - scmd->request->rq_status = RQ_SCSI_DONE; - rtn = FAILED; - } + scsi_log_completion(scmd, SUCCESS); - SCSI_LOG_ERROR_RECOVERY(3, printk("%s: scmd: %p, rtn:%x\n", - __FUNCTION__, scmd, rtn)); + SCSI_LOG_ERROR_RECOVERY(3, + printk("%s: scmd: %p, timeleft: %ld\n", + __FUNCTION__, scmd, timeleft)); /* - * now examine the actual status codes to see whether the command - * actually did complete normally. + * If there is time left scsi_eh_done got called, and we will + * examine the actual status codes to see whether the command + * actually did complete normally, else tell the host to forget + * about this command. */ - if (rtn == SUCCESS) { + if (timeleft) { rtn = scsi_eh_completed_normally(scmd); SCSI_LOG_ERROR_RECOVERY(3, printk("%s: scsi_eh_completed_normally %x\n", __FUNCTION__, rtn)); + switch (rtn) { case SUCCESS: case NEEDS_RETRY: @@ -547,6 +494,15 @@ static int scsi_send_eh_cmnd(struct scsi_cmnd *scmd, int timeout) rtn = FAILED; break; } + } else { + /* + * FIXME(eric) - we are not tracking whether we could + * abort a timed out command or not. not sure how + * we should treat them differently anyways. + */ + if (shost->hostt->eh_abort_handler) + shost->hostt->eh_abort_handler(scmd); + rtn = FAILED; } return rtn; @@ -1571,50 +1527,41 @@ static void scsi_unjam_host(struct Scsi_Host *shost) } /** - * scsi_error_handler - Handle errors/timeouts of SCSI cmds. + * scsi_error_handler - SCSI error handler thread * @data: Host for which we are running. * * Notes: - * This is always run in the context of a kernel thread. The idea is - * that we start this thing up when the kernel starts up (one per host - * that we detect), and it immediately goes to sleep and waits for some - * event (i.e. failure). When this takes place, we have the job of - * trying to unjam the bus and restarting things. + * This is the main error handling loop. This is run as a kernel thread + * for every SCSI host and handles all error handling activity. **/ int scsi_error_handler(void *data) { - struct Scsi_Host *shost = (struct Scsi_Host *) data; - int rtn; + struct Scsi_Host *shost = data; current->flags |= PF_NOFREEZE; - /* - * Note - we always use TASK_INTERRUPTIBLE even if the module - * was loaded as part of the kernel. The reason is that - * UNINTERRUPTIBLE would cause this thread to be counted in - * the load average as a running process, and an interruptible - * wait doesn't. + * We use TASK_INTERRUPTIBLE so that the thread is not + * counted against the load average as a running process. + * We never actually get interrupted because kthread_run + * disables singal delivery for the created thread. */ set_current_state(TASK_INTERRUPTIBLE); while (!kthread_should_stop()) { if (shost->host_failed == 0 || shost->host_failed != shost->host_busy) { - SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler" - " scsi_eh_%d" - " sleeping\n", - shost->host_no)); + SCSI_LOG_ERROR_RECOVERY(1, + printk("Error handler scsi_eh_%d sleeping\n", + shost->host_no)); schedule(); set_current_state(TASK_INTERRUPTIBLE); continue; } __set_current_state(TASK_RUNNING); - SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler" - " scsi_eh_%d waking" - " up\n",shost->host_no)); - - shost->eh_active = 1; + SCSI_LOG_ERROR_RECOVERY(1, + printk("Error handler scsi_eh_%d waking up\n", + shost->host_no)); /* * We have a host that is failing for some reason. Figure out @@ -1622,12 +1569,10 @@ int scsi_error_handler(void *data) * If we fail, we end up taking the thing offline. */ if (shost->hostt->eh_strategy_handler) - rtn = shost->hostt->eh_strategy_handler(shost); + shost->hostt->eh_strategy_handler(shost); else scsi_unjam_host(shost); - shost->eh_active = 0; - /* * Note - if the above fails completely, the action is to take * individual devices offline and flush the queue of any @@ -1638,15 +1583,10 @@ int scsi_error_handler(void *data) scsi_restart_operations(shost); set_current_state(TASK_INTERRUPTIBLE); } - __set_current_state(TASK_RUNNING); - SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler scsi_eh_%d" - " exiting\n",shost->host_no)); - - /* - * Make sure that nobody tries to wake us up again. - */ + SCSI_LOG_ERROR_RECOVERY(1, + printk("Error handler scsi_eh_%d exiting\n", shost->host_no)); shost->ehandler = NULL; return 0; } |