Patch for CAM SIM removal panics

Ian Dowse iedowse at iedowse.com
Thu Jan 5 18:12:14 PST 2006


The following patch, which is also at

	http://people.freebsd.org/~iedowse/cam_remove.diff

attempts to improve CAM's handling of SIMs going away while operations
are still in progress. This appears to be particularly helpful at probe
time for umass devices, as it avoids most of the usual camisr() panics.

The change is not particularly clean, so any suggestions welcome -
basically when the SIM goes away it tries to guarantee that all new
CCBs will completed immediately with a CAM_DEV_NOT_THERE status,
and it also ensures that xpt_schedule() immediately calls the
peripheral periph_start() routine when the SIM is gone.

xpt_bus_deregister() now tries quite hard to flush all out all
outstanding operations. Additionally it fixes a problem where
mid-probe devices could be added after their AC_LOST_DEVICE message
was sent, resulting in the devices never going away (this seemed
to happen a lot with "pass" devices).

The change does add some conditional code to the normal code path,
so it will have a small negative impact on performance.

Ian

Index: cam_periph.c
===================================================================
RCS file: /dump/FreeBSD-CVS/src/sys/cam/cam_periph.c,v
retrieving revision 1.60
diff -u -r1.60 cam_periph.c
--- cam_periph.c	1 Jul 2005 15:21:29 -0000	1.60
+++ cam_periph.c	6 Jan 2006 01:17:19 -0000
@@ -1656,6 +1656,8 @@
 	case CAM_NO_HBA:
 	case CAM_PROVIDE_FAIL:
 	case CAM_REQ_TOO_BIG:
+	case CAM_LUN_INVALID:
+	case CAM_TID_INVALID:
 		error = EINVAL;
 		break;
 	case CAM_SCSI_BUS_RESET:
Index: cam_xpt.c
===================================================================
RCS file: /dump/FreeBSD-CVS/src/sys/cam/cam_xpt.c,v
retrieving revision 1.156
diff -u -r1.156 cam_xpt.c
--- cam_xpt.c	16 Sep 2005 01:26:17 -0000	1.156
+++ cam_xpt.c	6 Jan 2006 01:33:52 -0000
@@ -682,6 +682,18 @@
 
 static struct intr_config_hook *xpt_config_hook;
 
+static void dead_sim_action(struct cam_sim *sim, union ccb *ccb);
+static void dead_sim_poll(struct cam_sim *sim);
+
+/* Dummy SIM that is used when the real one has gone. */
+static struct cam_sim cam_dead_sim = {
+	.sim_action =	dead_sim_action,
+	.sim_poll =	dead_sim_poll,
+	.sim_name =	"dead_sim",
+};
+
+#define SIM_DEAD(sim)	((sim) == &cam_dead_sim)
+
 /* Registered busses */
 static TAILQ_HEAD(,cam_eb) xpt_busses;
 static u_int bus_generation;
@@ -3055,12 +3067,22 @@
 	case XPT_ENG_EXEC:
 	{
 		struct cam_path *path;
+		struct cam_sim *ccbsim;
 		int s;
 		int runq;
 
 		path = start_ccb->ccb_h.path;
 		s = splsoftcam();
 
+		if (SIM_DEAD(path->bus->sim)) {
+			/* The SIM has gone; just execute the CCB directly. */
+			cam_ccbq_send_ccb(&path->device->ccbq, start_ccb);
+			ccbsim = start_ccb->ccb_h.path->bus->sim;
+			(*(ccbsim->sim_action))(ccbsim, start_ccb);
+			splx(s);
+			break;
+		}
+
 		cam_ccbq_insert_ccb(&path->device->ccbq, start_ccb);
 		if (path->device->qfrozen_cnt == 0)
 			runq = xpt_schedule_dev_sendq(path->bus, path->device);
@@ -3641,8 +3663,8 @@
 	dev->ccbq.devq_openings--;
 	dev->ccbq.dev_openings--;	
 	
-	while((devq->send_openings <= 0 || dev->ccbq.dev_openings < 0)
-	   && (--timeout > 0)) {
+	while(((devq != NULL && devq->send_openings <= 0) ||
+	   dev->ccbq.dev_openings < 0) && (--timeout > 0)) {
 		DELAY(1000);
 		(*(sim->sim_poll))(sim);
 		camisr(&cam_bioq);
@@ -3684,6 +3706,7 @@
 xpt_schedule(struct cam_periph *perph, u_int32_t new_priority)
 {
 	struct cam_ed *device;
+	union ccb *work_ccb;
 	int s;
 	int runq;
 
@@ -3702,6 +3725,16 @@
 					     new_priority);
 		}
 		runq = 0;
+	} else if (SIM_DEAD(perph->path->bus->sim)) {
+		/* The SIM is gone so just call periph_start directly. */
+		work_ccb = xpt_get_ccb(perph->path->device);
+		splx(s);
+		if (work_ccb == NULL)
+			return; /* XXX */
+		xpt_setup_ccb(&work_ccb->ccb_h, perph->path, new_priority);
+		perph->pinfo.priority = new_priority;
+		perph->periph_start(perph, work_ccb);
+		return;
 	} else {
 		/* New entry on the queue */
 		CAM_DEBUG(perph->path, CAM_DEBUG_SUBTRACE,
@@ -4337,6 +4370,10 @@
 	} else {
 		SLIST_INSERT_HEAD(&ccb_freeq, &free_ccb->ccb_h, xpt_links.sle);
 	}
+	if (bus->sim->devq == NULL) {
+		splx(s);
+		return;
+	}
 	bus->sim->devq->alloc_openings++;
 	bus->sim->devq->alloc_active--;
 	/* XXX Turn this into an inline function - xpt_run_device?? */
@@ -4422,6 +4459,12 @@
 xpt_bus_deregister(path_id_t pathid)
 {
 	struct cam_path bus_path;
+	struct cam_ed *device;
+	struct cam_ed_qinfo *qinfo;
+	struct cam_devq *devq;
+	struct cam_periph *periph;
+	struct cam_sim *ccbsim;
+	union ccb *work_ccb;
 	cam_status status;
 
 	GIANT_REQUIRED;
@@ -4433,11 +4476,51 @@
 
 	xpt_async(AC_LOST_DEVICE, &bus_path, NULL);
 	xpt_async(AC_PATH_DEREGISTERED, &bus_path, NULL);
-	
+
+	/* The SIM may be gone, so use a dummy SIM for any stray operations. */
+	devq = bus_path.bus->sim->devq;
+	bus_path.bus->sim = &cam_dead_sim;
+
+	/* Execute any pending operations now. */
+	while ((qinfo = (struct cam_ed_qinfo *)camq_remove(&devq->send_queue,
+	    CAMQ_HEAD)) != NULL ||
+	    (qinfo = (struct cam_ed_qinfo *)camq_remove(&devq->alloc_queue,
+	    CAMQ_HEAD)) != NULL) {
+		do {
+			device = qinfo->device;
+			work_ccb = cam_ccbq_peek_ccb(&device->ccbq, CAMQ_HEAD);
+			if (work_ccb != NULL) {
+				devq->active_dev = device;
+				cam_ccbq_remove_ccb(&device->ccbq, work_ccb);
+				cam_ccbq_send_ccb(&device->ccbq, work_ccb);
+				ccbsim = work_ccb->ccb_h.path->bus->sim;
+				(*(ccbsim->sim_action))(ccbsim, work_ccb);
+			}
+
+			periph = (struct cam_periph *)camq_remove(&device->drvq,
+			    CAMQ_HEAD);
+			if (periph != NULL)
+				xpt_schedule(periph, periph->pinfo.priority);
+		} while (work_ccb != NULL || periph != NULL);
+	}
+
+	/* Make sure all completed CCBs are processed. */
+	while (!TAILQ_EMPTY(&cam_bioq)) {
+		camisr(&cam_bioq);
+
+		/* Repeat the async's for the benefit of any new devices. */
+		xpt_async(AC_LOST_DEVICE, &bus_path, NULL);
+		xpt_async(AC_PATH_DEREGISTERED, &bus_path, NULL);
+	}
+
 	/* Release the reference count held while registered. */
 	xpt_release_bus(bus_path.bus);
 	xpt_release_path(&bus_path);
 
+	/* Recheck for more completed CCBs. */
+	while (!TAILQ_EMPTY(&cam_bioq))
+		camisr(&cam_bioq);
+
 	return (CAM_REQ_CMP);
 }
 
@@ -5021,6 +5104,9 @@
 	struct	   cam_devq *devq;
 	cam_status status;
 
+	if (SIM_DEAD(bus->sim))
+		return (NULL);
+
 	/* Make space for us in the device queue on our bus */
 	devq = bus->sim->devq;
 	status = cam_devq_resize(devq, devq->alloc_queue.array_size + 1);
@@ -5131,9 +5217,11 @@
 		TAILQ_REMOVE(&target->ed_entries, device,links);
 		target->generation++;
 		xpt_max_ccbs -= device->ccbq.devq_openings;
-		/* Release our slot in the devq */
-		devq = bus->sim->devq;
-		cam_devq_resize(devq, devq->alloc_queue.array_size - 1);
+		if (!SIM_DEAD(bus->sim)) {
+			/* Release our slot in the devq */
+			devq = bus->sim->devq;
+			cam_devq_resize(devq, devq->alloc_queue.array_size - 1);
+		}
 		splx(s);
 		camq_fini(&device->drvq);
 		camq_fini(&device->ccbq.queue);
@@ -7096,8 +7184,10 @@
 			s = splcam();
 			cam_ccbq_ccb_done(&dev->ccbq, (union ccb *)ccb_h);
 
-			ccb_h->path->bus->sim->devq->send_active--;
-			ccb_h->path->bus->sim->devq->send_openings++;
+			if (!SIM_DEAD(ccb_h->path->bus->sim)) {
+				ccb_h->path->bus->sim->devq->send_active--;
+				ccb_h->path->bus->sim->devq->send_openings++;
+			}
 			splx(s);
 			
 			if (((dev->flags & CAM_DEV_REL_ON_COMPLETE) != 0
@@ -7145,3 +7235,16 @@
 	}
 	splx(s);
 }
+
+static void
+dead_sim_action(struct cam_sim *sim, union ccb *ccb)
+{
+
+	ccb->ccb_h.status = CAM_DEV_NOT_THERE;
+	xpt_done(ccb);
+}
+ 
+static void
+dead_sim_poll(struct cam_sim *sim)
+{
+}


More information about the freebsd-scsi mailing list