diff options
Diffstat (limited to 'drivers/ata/libata-eh.c')
-rw-r--r-- | drivers/ata/libata-eh.c | 360 |
1 files changed, 245 insertions, 115 deletions
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 52c85af7fe99..2bff9adcacf1 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -44,10 +44,41 @@ #include "libata.h" +enum { + ATA_EH_SPDN_NCQ_OFF = (1 << 0), + ATA_EH_SPDN_SPEED_DOWN = (1 << 1), + ATA_EH_SPDN_FALLBACK_TO_PIO = (1 << 2), +}; + static void __ata_port_freeze(struct ata_port *ap); static void ata_eh_finish(struct ata_port *ap); +#ifdef CONFIG_PM static void ata_eh_handle_port_suspend(struct ata_port *ap); static void ata_eh_handle_port_resume(struct ata_port *ap); +static int ata_eh_suspend(struct ata_port *ap, + struct ata_device **r_failed_dev); +static void ata_eh_prep_resume(struct ata_port *ap); +static int ata_eh_resume(struct ata_port *ap, struct ata_device **r_failed_dev); +#else /* CONFIG_PM */ +static void ata_eh_handle_port_suspend(struct ata_port *ap) +{ } + +static void ata_eh_handle_port_resume(struct ata_port *ap) +{ } + +static int ata_eh_suspend(struct ata_port *ap, struct ata_device **r_failed_dev) +{ + return 0; +} + +static void ata_eh_prep_resume(struct ata_port *ap) +{ } + +static int ata_eh_resume(struct ata_port *ap, struct ata_device **r_failed_dev) +{ + return 0; +} +#endif /* CONFIG_PM */ static void ata_ering_record(struct ata_ering *ering, int is_io, unsigned int err_mask) @@ -65,12 +96,9 @@ static void ata_ering_record(struct ata_ering *ering, int is_io, ent->timestamp = get_jiffies_64(); } -static struct ata_ering_entry * ata_ering_top(struct ata_ering *ering) +static void ata_ering_clear(struct ata_ering *ering) { - struct ata_ering_entry *ent = &ering->ring[ering->cursor]; - if (!ent->err_mask) - return NULL; - return ent; + memset(ering, 0, sizeof(*ering)); } static int ata_ering_map(struct ata_ering *ering, @@ -585,7 +613,7 @@ static void __ata_port_freeze(struct ata_port *ap) ap->pflags |= ATA_PFLAG_FROZEN; - DPRINTK("ata%u port frozen\n", ap->id); + DPRINTK("ata%u port frozen\n", ap->print_id); } /** @@ -658,7 +686,7 @@ void ata_eh_thaw_port(struct ata_port *ap) spin_unlock_irqrestore(ap->lock, flags); - DPRINTK("ata%u port thawed\n", ap->id); + DPRINTK("ata%u port thawed\n", ap->print_id); } static void ata_eh_scsidone(struct scsi_cmnd *scmd) @@ -954,26 +982,27 @@ static int ata_eh_read_log_10h(struct ata_device *dev, * RETURNS: * 0 on success, AC_ERR_* mask on failure */ -static unsigned int atapi_eh_request_sense(struct ata_device *dev, - unsigned char *sense_buf) +static unsigned int atapi_eh_request_sense(struct ata_queued_cmd *qc) { + struct ata_device *dev = qc->dev; + unsigned char *sense_buf = qc->scsicmd->sense_buffer; struct ata_port *ap = dev->ap; struct ata_taskfile tf; u8 cdb[ATAPI_CDB_LEN]; DPRINTK("ATAPI request sense\n"); - ata_tf_init(dev, &tf); - /* FIXME: is this needed? */ memset(sense_buf, 0, SCSI_SENSE_BUFFERSIZE); - /* XXX: why tf_read here? */ - ap->ops->tf_read(ap, &tf); - - /* fill these in, for the case where they are -not- overwritten */ + /* initialize sense_buf with the error register, + * for the case where they are -not- overwritten + */ sense_buf[0] = 0x70; - sense_buf[2] = tf.feature >> 4; + sense_buf[2] = qc->result_tf.feature >> 4; + + /* some devices time out if garbage left in tf */ + ata_tf_init(dev, &tf); memset(cdb, 0, ATAPI_CDB_LEN); cdb[0] = REQUEST_SENSE; @@ -1027,7 +1056,7 @@ static void ata_eh_analyze_serror(struct ata_port *ap) } if (serror & SERR_INTERNAL) { err_mask |= AC_ERR_SYSTEM; - action |= ATA_EH_SOFTRESET; + action |= ATA_EH_HARDRESET; } if (serror & (SERR_PHYRDY_CHG | SERR_DEV_XCHG)) ata_ehi_hotplugged(&ehc->i); @@ -1122,7 +1151,9 @@ static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc, return ATA_EH_SOFTRESET; } - if (!(qc->err_mask & AC_ERR_DEV)) + if (stat & (ATA_ERR | ATA_DF)) + qc->err_mask |= AC_ERR_DEV; + else return 0; switch (qc->dev->class) { @@ -1137,8 +1168,7 @@ static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc, case ATA_DEV_ATAPI: if (!(qc->ap->pflags & ATA_PFLAG_FROZEN)) { - tmp = atapi_eh_request_sense(qc->dev, - qc->scsicmd->sense_buffer); + tmp = atapi_eh_request_sense(qc); if (!tmp) { /* ATA_QCFLAG_SENSE_VALID is used to * tell atapi_qc_complete() that sense @@ -1159,87 +1189,99 @@ static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc, return action; } -static int ata_eh_categorize_ering_entry(struct ata_ering_entry *ent) +static int ata_eh_categorize_error(int is_io, unsigned int err_mask) { - if (ent->err_mask & (AC_ERR_ATA_BUS | AC_ERR_TIMEOUT)) + if (err_mask & AC_ERR_ATA_BUS) return 1; - if (ent->is_io) { - if (ent->err_mask & AC_ERR_HSM) - return 1; - if ((ent->err_mask & - (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV) + if (err_mask & AC_ERR_TIMEOUT) + return 2; + + if (is_io) { + if (err_mask & AC_ERR_HSM) return 2; + if ((err_mask & + (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV) + return 3; } return 0; } -struct speed_down_needed_arg { +struct speed_down_verdict_arg { u64 since; - int nr_errors[3]; + int nr_errors[4]; }; -static int speed_down_needed_cb(struct ata_ering_entry *ent, void *void_arg) +static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg) { - struct speed_down_needed_arg *arg = void_arg; + struct speed_down_verdict_arg *arg = void_arg; + int cat = ata_eh_categorize_error(ent->is_io, ent->err_mask); if (ent->timestamp < arg->since) return -1; - arg->nr_errors[ata_eh_categorize_ering_entry(ent)]++; + arg->nr_errors[cat]++; return 0; } /** - * ata_eh_speed_down_needed - Determine wheter speed down is necessary + * ata_eh_speed_down_verdict - Determine speed down verdict * @dev: Device of interest * * This function examines error ring of @dev and determines - * whether speed down is necessary. Speed down is necessary if - * there have been more than 3 of Cat-1 errors or 10 of Cat-2 - * errors during last 15 minutes. + * whether NCQ needs to be turned off, transfer speed should be + * stepped down, or falling back to PIO is necessary. + * + * Cat-1 is ATA_BUS error for any command. * - * Cat-1 errors are ATA_BUS, TIMEOUT for any command and HSM - * violation for known supported commands. + * Cat-2 is TIMEOUT for any command or HSM violation for known + * supported commands. * - * Cat-2 errors are unclassified DEV error for known supported + * Cat-3 is is unclassified DEV error for known supported * command. * + * NCQ needs to be turned off if there have been more than 3 + * Cat-2 + Cat-3 errors during last 10 minutes. + * + * Speed down is necessary if there have been more than 3 Cat-1 + + * Cat-2 errors or 10 Cat-3 errors during last 10 minutes. + * + * Falling back to PIO mode is necessary if there have been more + * than 10 Cat-1 + Cat-2 + Cat-3 errors during last 5 minutes. + * * LOCKING: * Inherited from caller. * * RETURNS: - * 1 if speed down is necessary, 0 otherwise + * OR of ATA_EH_SPDN_* flags. */ -static int ata_eh_speed_down_needed(struct ata_device *dev) +static unsigned int ata_eh_speed_down_verdict(struct ata_device *dev) { - const u64 interval = 15LLU * 60 * HZ; - static const int err_limits[3] = { -1, 3, 10 }; - struct speed_down_needed_arg arg; - struct ata_ering_entry *ent; - int err_cat; - u64 j64; + const u64 j5mins = 5LLU * 60 * HZ, j10mins = 10LLU * 60 * HZ; + u64 j64 = get_jiffies_64(); + struct speed_down_verdict_arg arg; + unsigned int verdict = 0; - ent = ata_ering_top(&dev->ering); - if (!ent) - return 0; + /* scan past 10 mins of error history */ + memset(&arg, 0, sizeof(arg)); + arg.since = j64 - min(j64, j10mins); + ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); - err_cat = ata_eh_categorize_ering_entry(ent); - if (err_cat == 0) - return 0; + if (arg.nr_errors[2] + arg.nr_errors[3] > 3) + verdict |= ATA_EH_SPDN_NCQ_OFF; + if (arg.nr_errors[1] + arg.nr_errors[2] > 3 || arg.nr_errors[3] > 10) + verdict |= ATA_EH_SPDN_SPEED_DOWN; + /* scan past 3 mins of error history */ memset(&arg, 0, sizeof(arg)); + arg.since = j64 - min(j64, j5mins); + ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); - j64 = get_jiffies_64(); - if (j64 >= interval) - arg.since = j64 - interval; - else - arg.since = 0; - - ata_ering_map(&dev->ering, speed_down_needed_cb, &arg); + if (arg.nr_errors[1] + arg.nr_errors[2] + arg.nr_errors[3] > 10) + verdict |= ATA_EH_SPDN_FALLBACK_TO_PIO; - return arg.nr_errors[err_cat] > err_limits[err_cat]; + return verdict; } /** @@ -1257,31 +1299,80 @@ static int ata_eh_speed_down_needed(struct ata_device *dev) * Kernel thread context (may sleep). * * RETURNS: - * 0 on success, -errno otherwise + * Determined recovery action. */ -static int ata_eh_speed_down(struct ata_device *dev, int is_io, - unsigned int err_mask) +static unsigned int ata_eh_speed_down(struct ata_device *dev, int is_io, + unsigned int err_mask) { - if (!err_mask) + unsigned int verdict; + unsigned int action = 0; + + /* don't bother if Cat-0 error */ + if (ata_eh_categorize_error(is_io, err_mask) == 0) return 0; /* record error and determine whether speed down is necessary */ ata_ering_record(&dev->ering, is_io, err_mask); + verdict = ata_eh_speed_down_verdict(dev); - if (!ata_eh_speed_down_needed(dev)) - return 0; + /* turn off NCQ? */ + if ((verdict & ATA_EH_SPDN_NCQ_OFF) && + (dev->flags & (ATA_DFLAG_PIO | ATA_DFLAG_NCQ | + ATA_DFLAG_NCQ_OFF)) == ATA_DFLAG_NCQ) { + dev->flags |= ATA_DFLAG_NCQ_OFF; + ata_dev_printk(dev, KERN_WARNING, + "NCQ disabled due to excessive errors\n"); + goto done; + } + + /* speed down? */ + if (verdict & ATA_EH_SPDN_SPEED_DOWN) { + /* speed down SATA link speed if possible */ + if (sata_down_spd_limit(dev->ap) == 0) { + action |= ATA_EH_HARDRESET; + goto done; + } - /* speed down SATA link speed if possible */ - if (sata_down_spd_limit(dev->ap) == 0) - return ATA_EH_HARDRESET; + /* lower transfer mode */ + if (dev->spdn_cnt < 2) { + static const int dma_dnxfer_sel[] = + { ATA_DNXFER_DMA, ATA_DNXFER_40C }; + static const int pio_dnxfer_sel[] = + { ATA_DNXFER_PIO, ATA_DNXFER_FORCE_PIO0 }; + int sel; - /* lower transfer mode */ - if (ata_down_xfermask_limit(dev, 0) == 0) - return ATA_EH_SOFTRESET; + if (dev->xfer_shift != ATA_SHIFT_PIO) + sel = dma_dnxfer_sel[dev->spdn_cnt]; + else + sel = pio_dnxfer_sel[dev->spdn_cnt]; + + dev->spdn_cnt++; + + if (ata_down_xfermask_limit(dev, sel) == 0) { + action |= ATA_EH_SOFTRESET; + goto done; + } + } + } + + /* Fall back to PIO? Slowing down to PIO is meaningless for + * SATA. Consider it only for PATA. + */ + if ((verdict & ATA_EH_SPDN_FALLBACK_TO_PIO) && (dev->spdn_cnt >= 2) && + (dev->ap->cbl != ATA_CBL_SATA) && + (dev->xfer_shift != ATA_SHIFT_PIO)) { + if (ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO) == 0) { + dev->spdn_cnt = 0; + action |= ATA_EH_SOFTRESET; + goto done; + } + } - ata_dev_printk(dev, KERN_ERR, - "speed down requested but no transfer mode left\n"); return 0; + done: + /* device has been slowed down, blow error history */ + ata_ering_clear(&dev->ering); + return action; } /** @@ -1536,8 +1627,14 @@ static int ata_eh_reset(struct ata_port *ap, int classify, rc = prereset(ap); if (rc) { if (rc == -ENOENT) { - ata_port_printk(ap, KERN_DEBUG, "port disabled. ignoring.\n"); + ata_port_printk(ap, KERN_DEBUG, + "port disabled. ignoring.\n"); ap->eh_context.i.action &= ~ATA_EH_RESET_MASK; + + for (i = 0; i < ATA_MAX_DEVICES; i++) + classes[i] = ATA_DEV_NONE; + + rc = 0; } else ata_port_printk(ap, KERN_ERR, "prereset failed (errno=%d)\n", rc); @@ -1574,7 +1671,10 @@ static int ata_eh_reset(struct ata_port *ap, int classify, reset == softreset ? "soft" : "hard"); /* mark that this EH session started with reset */ - ehc->i.flags |= ATA_EHI_DID_RESET; + if (reset == hardreset) + ehc->i.flags |= ATA_EHI_DID_HARDRESET; + else + ehc->i.flags |= ATA_EHI_DID_SOFTRESET; rc = ata_do_reset(ap, reset, classes); @@ -1648,12 +1748,17 @@ static int ata_eh_revalidate_and_attach(struct ata_port *ap, { struct ata_eh_context *ehc = &ap->eh_context; struct ata_device *dev; + unsigned int new_mask = 0; unsigned long flags; int i, rc = 0; DPRINTK("ENTER\n"); - for (i = 0; i < ATA_MAX_DEVICES; i++) { + /* For PATA drive side cable detection to work, IDENTIFY must + * be done backwards such that PDIAG- is released by the slave + * device before the master device is identified. + */ + for (i = ATA_MAX_DEVICES - 1; i >= 0; i--) { unsigned int action, readid_flags = 0; dev = &ap->device[i]; @@ -1665,13 +1770,13 @@ static int ata_eh_revalidate_and_attach(struct ata_port *ap, if (action & ATA_EH_REVALIDATE && ata_dev_ready(dev)) { if (ata_port_offline(ap)) { rc = -EIO; - break; + goto err; } ata_eh_about_to_do(ap, dev, ATA_EH_REVALIDATE); rc = ata_dev_revalidate(dev, readid_flags); if (rc) - break; + goto err; ata_eh_done(ap, dev, ATA_EH_REVALIDATE); @@ -1689,43 +1794,61 @@ static int ata_eh_revalidate_and_attach(struct ata_port *ap, rc = ata_dev_read_id(dev, &dev->class, readid_flags, dev->id); - if (rc == 0) { - ehc->i.flags |= ATA_EHI_PRINTINFO; - rc = ata_dev_configure(dev); - ehc->i.flags &= ~ATA_EHI_PRINTINFO; - } else if (rc == -ENOENT) { + switch (rc) { + case 0: + new_mask |= 1 << i; + break; + case -ENOENT: /* IDENTIFY was issued to non-existent * device. No need to reset. Just * thaw and kill the device. */ ata_eh_thaw_port(ap); dev->class = ATA_DEV_UNKNOWN; - rc = 0; - } - - if (rc) { - dev->class = ATA_DEV_UNKNOWN; break; + default: + dev->class = ATA_DEV_UNKNOWN; + goto err; } + } + } - if (ata_dev_enabled(dev)) { - spin_lock_irqsave(ap->lock, flags); - ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; - spin_unlock_irqrestore(ap->lock, flags); + /* PDIAG- should have been released, ask cable type if post-reset */ + if ((ehc->i.flags & ATA_EHI_DID_RESET) && ap->ops->cable_detect) + ap->cbl = ap->ops->cable_detect(ap); - /* new device discovered, configure xfermode */ - ehc->i.flags |= ATA_EHI_SETMODE; - } - } + /* Configure new devices forward such that user doesn't see + * device detection messages backwards. + */ + for (i = 0; i < ATA_MAX_DEVICES; i++) { + dev = &ap->device[i]; + + if (!(new_mask & (1 << i))) + continue; + + ehc->i.flags |= ATA_EHI_PRINTINFO; + rc = ata_dev_configure(dev); + ehc->i.flags &= ~ATA_EHI_PRINTINFO; + if (rc) + goto err; + + spin_lock_irqsave(ap->lock, flags); + ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; + spin_unlock_irqrestore(ap->lock, flags); + + /* new device discovered, configure xfermode */ + ehc->i.flags |= ATA_EHI_SETMODE; } - if (rc) - *r_failed_dev = dev; + return 0; - DPRINTK("EXIT\n"); + err: + *r_failed_dev = dev; + DPRINTK("EXIT rc=%d\n", rc); return rc; } +#ifdef CONFIG_PM /** * ata_eh_suspend - handle suspend EH action * @ap: target host port @@ -1883,6 +2006,7 @@ static int ata_eh_resume(struct ata_port *ap, struct ata_device **r_failed_dev) DPRINTK("EXIT\n"); return 0; } +#endif /* CONFIG_PM */ static int ata_port_nr_enabled(struct ata_port *ap) { @@ -1964,7 +2088,7 @@ static int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, { struct ata_eh_context *ehc = &ap->eh_context; struct ata_device *dev; - int down_xfermask, i, rc; + int i, rc; DPRINTK("ENTER\n"); @@ -1993,7 +2117,6 @@ static int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, } retry: - down_xfermask = 0; rc = 0; /* if UNLOADING, finish immediately */ @@ -2038,10 +2161,8 @@ static int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, /* configure transfer mode if necessary */ if (ehc->i.flags & ATA_EHI_SETMODE) { rc = ata_set_mode(ap, &dev); - if (rc) { - down_xfermask = 1; + if (rc) goto dev_fail; - } ehc->i.flags &= ~ATA_EHI_SETMODE; } @@ -2053,20 +2174,27 @@ static int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, goto out; dev_fail: + ehc->tries[dev->devno]--; + switch (rc) { - case -ENODEV: - /* device missing, schedule probing */ - ehc->i.probe_mask |= (1 << dev->devno); case -EINVAL: + /* eeek, something went very wrong, give up */ ehc->tries[dev->devno] = 0; break; + + case -ENODEV: + /* device missing or wrong IDENTIFY data, schedule probing */ + ehc->i.probe_mask |= (1 << dev->devno); + /* give it just one more chance */ + ehc->tries[dev->devno] = min(ehc->tries[dev->devno], 1); case -EIO: - sata_down_spd_limit(ap); - default: - ehc->tries[dev->devno]--; - if (down_xfermask && - ata_down_xfermask_limit(dev, ehc->tries[dev->devno] == 1)) - ehc->tries[dev->devno] = 0; + if (ehc->tries[dev->devno] == 1) { + /* This is the last chance, better to slow + * down than lose it. + */ + sata_down_spd_limit(ap); + ata_down_xfermask_limit(dev, ATA_DNXFER_PIO); + } } if (ata_dev_enabled(dev) && !ehc->tries[dev->devno]) { @@ -2181,6 +2309,7 @@ void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset, ata_eh_finish(ap); } +#ifdef CONFIG_PM /** * ata_eh_handle_port_suspend - perform port suspend operation * @ap: port to suspend @@ -2296,3 +2425,4 @@ static void ata_eh_handle_port_resume(struct ata_port *ap) } spin_unlock_irqrestore(ap->lock, flags); } +#endif /* CONFIG_PM */ |