From 187cac04dd44a27076ce0d513accdaf686119a6a Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Fri, 26 Jun 2009 15:31:55 -0700 Subject: [PATCH] SILI - Fix assertion panic during error handling. * The error handling code could not distinguish between the use of the error ccb as part of a softreset sequence and the use of the error ccb as part of a error recovery sequence and asserted in the former case. * Ensure that no commands are active on the port prior to initiating a hard reset through the port multiplier. --- sys/dev/disk/sili/sili.c | 55 +++++++++++++++++---------------------------- sys/dev/disk/sili/sili.h | 8 +++++++ sys/dev/disk/sili/sili_pm.c | 50 +++++++++++++++++++++++++---------------- 3 files changed, 60 insertions(+), 53 deletions(-) diff --git a/sys/dev/disk/sili/sili.c b/sys/dev/disk/sili/sili.c index 3a20d553ae..efe011437e 100644 --- a/sys/dev/disk/sili/sili.c +++ b/sys/dev/disk/sili/sili.c @@ -62,6 +62,7 @@ void sili_unload_prb(struct sili_ccb *); static void sili_load_prb_callback(void *info, bus_dma_segment_t *segs, int nsegs, int error); void sili_start(struct sili_ccb *); +static void sili_port_reinit(struct sili_port *ap); int sili_port_softreset(struct sili_port *ap); int sili_port_hardreset(struct sili_port *ap); void sili_port_hardstop(struct sili_port *ap); @@ -71,10 +72,6 @@ static void sili_ata_cmd_timeout_unserialized(void *); static int sili_core_timeout(struct sili_ccb *ccb, int really_error); void sili_check_active_timeouts(struct sili_port *ap); -#if 0 -void sili_beg_exclusive_access(struct sili_port *ap, struct ata_port *at); -void sili_end_exclusive_access(struct sili_port *ap, struct ata_port *at); -#endif void sili_issue_pending_commands(struct sili_port *ap, struct sili_ccb *ccb); void sili_port_read_ncq_error(struct sili_port *, int); @@ -337,14 +334,13 @@ sili_port_reinit(struct sili_port *ap) int slot; int target; u_int32_t data; - int reentrant; - - reentrant = (ap->ap_flags & AP_F_ERR_CCB_RESERVED) ? 1 : 0; if (bootverbose || 1) { kprintf("%s: reiniting port after error reent=%d " "expired=%08x\n", - PORTNAME(ap), reentrant, ap->ap_expired); + PORTNAME(ap), + (ap->ap_flags & AP_F_REINIT_ACTIVE), + ap->ap_expired); } /* @@ -383,18 +379,23 @@ sili_port_reinit(struct sili_port *ap) * If reentrant, stop here. Otherwise the state for the original * ahci_port_reinit() will get ripped out from under it. */ - if (reentrant) + if (ap->ap_flags & AP_F_REINIT_ACTIVE) return; + ap->ap_flags |= AP_F_REINIT_ACTIVE; /* * Read the LOG ERROR page for targets that returned a specific * D2H FIS with ERR set. + * + * Don't bother if we are already using the error CCB. */ - for (target = 0; target < SILI_MAX_PMPORTS; ++target) { - at = &ap->ap_ata[target]; - if (at->at_features & ATA_PORT_F_READLOG) { - at->at_features &= ~ATA_PORT_F_READLOG; - sili_port_read_ncq_error(ap, target); + if ((ap->ap_flags & AP_F_ERR_CCB_RESERVED) == 0) { + for (target = 0; target < SILI_MAX_PMPORTS; ++target) { + at = &ap->ap_ata[target]; + if (at->at_features & ATA_PORT_F_READLOG) { + at->at_features &= ~ATA_PORT_F_READLOG; + sili_port_read_ncq_error(ap, target); + } } } @@ -414,6 +415,7 @@ sili_port_reinit(struct sili_port *ap) ccb->ccb_done(ccb); ccb->ccb_xa.complete(&ccb->ccb_xa); } + ap->ap_flags &= ~AP_F_REINIT_ACTIVE; /* * Wow. All done. We can get the port moving again. @@ -1312,6 +1314,7 @@ sili_poll(struct sili_ccb *ccb, int timeout, return(ccb->ccb_xa.state); } + KKASSERT((ap->ap_expired & (1 << ccb->ccb_slot)) == 0); sili_start(ccb); do { @@ -1422,35 +1425,19 @@ sili_start(struct sili_ccb *ccb) sili_issue_pending_commands(ap, ccb); } -#if 0 /* - * While holding the port lock acquire exclusive access to the port. - * - * This is used when running the state machine to initialize and identify - * targets over a port multiplier. Setting exclusive access prevents - * sili_port_intr() from activating any requests sitting on the pending - * queue. + * Wait for all commands to complete processing. We hold the lock so no + * new commands will be queued. */ void -sili_beg_exclusive_access(struct sili_port *ap, struct ata_port *at) +sili_exclusive_access(struct sili_port *ap) { - KKASSERT((ap->ap_flags & AP_F_EXCLUSIVE_ACCESS) == 0); - ap->ap_flags |= AP_F_EXCLUSIVE_ACCESS; while (ap->ap_active) { sili_port_intr(ap, 1); sili_os_softsleep(); } } -void -sili_end_exclusive_access(struct sili_port *ap, struct ata_port *at) -{ - KKASSERT((ap->ap_flags & AP_F_EXCLUSIVE_ACCESS) != 0); - ap->ap_flags &= ~AP_F_EXCLUSIVE_ACCESS; - sili_issue_pending_commands(ap, NULL); -} -#endif - /* * If ccb is not NULL enqueue and/or issue it. * @@ -1731,7 +1718,7 @@ sili_port_intr(struct sili_port *ap, int blockable) if ((ccb_at = ccb->ccb_xa.at) == NULL) ccb_at = &ap->ap_ata[0]; if (target == ccb_at->at_target) { - if (ccb->ccb_xa.flags & ATA_F_NCQ && + if ((ccb->ccb_xa.flags & ATA_F_NCQ) && (error == SILI_PREG_CERROR_DEVICE || error == SILI_PREG_CERROR_SDBERROR)) { ccb_at->at_features |= ATA_PORT_F_READLOG; diff --git a/sys/dev/disk/sili/sili.h b/sys/dev/disk/sili/sili.h index bf3fa46a17..a6d4fccc62 100644 --- a/sys/dev/disk/sili/sili.h +++ b/sys/dev/disk/sili/sili.h @@ -753,6 +753,7 @@ struct sili_port { #define AP_F_IGNORE_IFS 0x0040 #define AP_F_UNUSED0200 0x0200 #define AP_F_ERR_CCB_RESERVED 0x0400 +#define AP_F_REINIT_ACTIVE 0x0800 int ap_signal; /* os per-port thread sig */ thread_t ap_thread; /* os per-port thread */ struct lock ap_lock; /* os per-port lock */ @@ -852,8 +853,14 @@ struct sili_device { #define sili_pwait_set_to(_ap, _to, _r, _b) \ sili_pwait_eq((_ap), _to, (_r), (_b), (_b)) +/* + * Misc defines + */ #define SILI_PWAIT_TIMEOUT 1000 +/* + * Prototypes + */ const struct sili_device *sili_lookup_device(device_t dev); int sili_init(struct sili_softc *); int sili_port_init(struct sili_port *ap); @@ -861,6 +868,7 @@ int sili_port_alloc(struct sili_softc *, u_int); void sili_port_state_machine(struct sili_port *ap, int initial); void sili_port_free(struct sili_softc *, u_int); int sili_port_reset(struct sili_port *, struct ata_port *at, int); +void sili_exclusive_access(struct sili_port *ap); u_int32_t sili_read(struct sili_softc *, bus_size_t); void sili_write(struct sili_softc *, bus_size_t, u_int32_t); diff --git a/sys/dev/disk/sili/sili_pm.c b/sys/dev/disk/sili/sili_pm.c index fb308f6e64..5c85b42415 100644 --- a/sys/dev/disk/sili/sili_pm.c +++ b/sys/dev/disk/sili/sili_pm.c @@ -245,12 +245,20 @@ sili_pm_hardreset(struct sili_port *ap, int target, int hard) at = &ap->ap_ata[target]; /* + * Ensure that no other commands are pending. Our HW reset of + * the PM target can skewer the port overall! + */ + sili_exclusive_access(ap); + + /* * Turn off power management and kill the phy on the target * if requested. Hold state for 10ms. */ data = SATA_PM_SCTL_IPM_DISABLED; +#if 0 if (hard == 2) data |= SATA_PM_SCTL_DET_DISABLE; +#endif if (sili_pm_write(ap, target, SATA_PMREG_SERR, -1)) goto err; if (sili_pm_write(ap, target, SATA_PMREG_SCTL, data)) @@ -260,6 +268,14 @@ sili_pm_hardreset(struct sili_port *ap, int target, int hard) /* * Start transmitting COMRESET. COMRESET must be sent for at * least 1ms. + * + * It takes about 100ms for the DET logic to settle down, + * from trial and error testing. If this is too short + * the softreset code will fail. + * + * It is very important to allow the logic to settle before + * we issue any additional commands or the target will interfere + * with our PM commands. */ at->at_probe = ATA_PROBE_FAILED; at->at_type = ATA_PORT_T_NONE; @@ -272,12 +288,6 @@ sili_pm_hardreset(struct sili_port *ap, int target, int hard) } if (sili_pm_write(ap, target, SATA_PMREG_SCTL, data)) goto err; - - /* - * It takes about 100ms for the DET logic to settle down, - * from trial and error testing. If this is too short - * the softreset code will fail. - */ sili_os_sleep(100); if (sili_pm_phy_status(ap, target, &data)) { @@ -288,15 +298,15 @@ sili_pm_hardreset(struct sili_port *ap, int target, int hard) /* * Flush any status, then clear DET to initiate negotiation. * - * We need to give the phy layer a bit of time to settle down - * or we may catch a detection glitch instead of the actual - * device detect. + * It is very important to allow the negotiation to settle before + * we issue any additional commands or the target will interfere + * with our PM commands. */ sili_pm_write(ap, target, SATA_PMREG_SERR, -1); data = SATA_PM_SCTL_IPM_DISABLED | SATA_PM_SCTL_DET_NONE; if (sili_pm_write(ap, target, SATA_PMREG_SCTL, data)) goto err; - sili_os_sleep(10); + sili_os_sleep(100); /* * Try to determine if there is a device on the port. @@ -346,12 +356,14 @@ sili_pm_hardreset(struct sili_port *ap, int target, int hard) * * Wait 200ms to give the device time to send its first D2H FIS. * If we do not wait long enough our softreset sequence can collide - * with the end of the device's reset sequence. + * with the end of the device's reset sequence and brick the port. + * Some devices may need longer and we handle those cases in the + * pm softreset code. * * XXX how do we poll that particular target's BSY status via the * PM? */ - kprintf("%s.%d: Device detected data=%08x\n", + kprintf("%s.%d: PM Device detected ssts=%08x\n", PORTNAME(ap), target, data); sili_os_sleep(200); @@ -364,13 +376,12 @@ err: /* * SILI soft reset through port multiplier. * - * This function keeps port communications intact and attempts to generate - * a reset to the connected device using device commands. Unlike - * hard-port operations we can't do fancy stop/starts or stuff like - * that without messing up other commands that might be running or - * queued. + * This function generates a soft reset through the port multiplier, + * keeping port communications intact. * - * The SII chip will do the whole mess for us. + * The SII chip will do the whole mess for us. However, the command + * can brick the port if the target is still busy from the previous + * COMRESET. */ int sili_pm_softreset(struct sili_port *ap, int target) @@ -386,7 +397,7 @@ sili_pm_softreset(struct sili_port *ap, int target) error = EIO; at = &ap->ap_ata[target]; - DPRINTF(SILI_D_VERBOSE, "%s: soft reset\n", PORTNAME(ap)); + kprintf("%s: PM softreset\n", ATANAME(ap, at)); /* * Prep the special soft-reset SII command. @@ -467,6 +478,7 @@ err: * Target 15 is the PM itself and these registers have * different meanings. */ + kprintf("%s: PM softreset done error %d\n", ATANAME(ap, at), error); if (error == 0 && target != 15) { if (sili_pm_write(ap, target, SATA_PMREG_SERR, -1)) { kprintf("%s: sili_pm_softreset unable to clear SERR\n", -- 2.11.4.GIT