cvs commit: src/sys/i386/i386 vm_machdep.c

Bruce Evans bde at zeta.org.au
Wed Dec 15 20:30:44 PST 2004


On Wed, 15 Dec 2004, Nate Lawson wrote:

> Bruce Evans wrote:
> > On Tue, 14 Dec 2004, Nate Lawson wrote:
> >>John Baldwin wrote:
> >>>Erm, well, that's not always easy since sometimes when you panic you can't
> >>>talk to the other CPUs for whatever reason.  Putting back the proxy reset
> >
> > The most common reason is that at least one other CPU is looping with
> > interrupts disabled.  Then it won't see IPIs and stop_cpus() will loop
> > forever.
>
> Yes, the NMI change would fix this.  Please repost your ddb sync patch.

Here it is.

Annotated version:

% Index: db_interface.c
% ===================================================================
% RCS file: /home/ncvs/src/sys/i386/i386/db_interface.c,v
% retrieving revision 1.81
% diff -u -2 -r1.81 db_interface.c
% --- db_interface.c	3 Apr 2004 22:23:36 -0000	1.81
% +++ db_interface.c	4 Apr 2004 04:28:47 -0000
% ...
% @@ -61,4 +69,33 @@
%  static jmp_buf	db_global_jmpbuf;
%
% +#ifdef SMP
% +/* XXX this is cloned from stop_cpus() since that function can hang. */
% +static int
% +attempt_to_stop_cpus(u_int map)
% +{
% +	int i;
% +
% +	if (!smp_started)
% +		return 0;
% +
% +	CTR1(KTR_SMP, "attempt_to_stop_cpus(%x)", map);
% +
% +	/* send the stop IPI to all CPUs in map */
% +	ipi_selected(map, IPI_STOP);
% +
% +	i = 0;
% +	while ((atomic_load_acq_int(&stopped_cpus) & map) != map) {
% +		/* spin */
% +		i++;
% +		if (i == 100000000) {
% +			printf("timeout stopping cpus\n");
% +			break;
% +		}
% +	}
% +
% +	return 1;
% +}
% +#endif /* SMP */
% +
%  /*
%   *  kdb_trap - field a TRACE or BPT trap

Needs to be done better.  Stopping the other CPUs on entry to ddb should
be non-optional and must always work.

% @@ -69,4 +106,8 @@
%  	u_int ef;
%  	volatile int ddb_mode = !(boothowto & RB_GDB);
% +#ifdef SMP
% +	static u_int kdb_trap_lock = NOCPU;
% +	static u_int output_lock;
% +#endif
%
%  	/*

kdb_trap_lock is for preventing concurrent entry.  output_lock is for
debugging.  The VERBOSE_CPUSTOP_ON_DDBBREAK option shouldn't exist
since it gets in the way and stopping should always happen, but
printfs fro different CPUs entering ddb is needed to debug this and
there is the usual problem with interleaved output.  Locking using
output_lock reduces the problem in this context only.

% @@ -91,16 +132,48 @@
%
%  #ifdef SMP
% +	if (atomic_cmpset_int(&kdb_trap_lock, NOCPU, PCPU_GET(cpuid)) == 0 &&
% +	    kdb_trap_lock != PCPU_GET(cpuid)) {

Normal spinlocking without using mutexes; should work OK.

% +		while (atomic_cmpset_int(&output_lock, 0, 1) == 0)
% +			;
% +		db_printf(
% +		    "concurrent ddb entry: type %d trap, code=%x cpu=%d\n",
% +		    type, code, PCPU_GET(cpuid));
% +		atomic_store_rel_int(&output_lock, 0);
% +		if (type == T_BPTFLT)
% +			regs->tf_eip--;

This attempts to restart the other CPU at the breakpoint.  It doesn't
work so well.  Backing up 1 instruction is very MD and doesn't even
always work on i386's since the breakpoint might be a 2-byte "int $3"
(but the kernel should only have 1-byte "int $3"'s).  I forget the
exact reason for restarting instead of continuing after waiting.  It
may be that (without other changes) we would have to wait here with
interrupts disabled, so we wouldn't see IPIs and attempt_to_stop_cpus()
would always fail.  Stopping the other CPUs using NMIs would move this
problem to one of stopping the CPUs that have blocked on kdb_trap_lock
without blocking the one that aquired the lock.  We could afford to
spin-wait here if we maintained some global state to tell
attempt_to_stop_cpus() not to care about CPUs spinning here.  This
would be similar to the state required to tell the NMI handler not to
care about CPUs spinning here.

% +		else {
% +			while (atomic_cmpset_int(&output_lock, 0, 1) == 0)
% +				;
% +			db_printf(
% +"concurrent ddb entry on non-breakpoint: too hard to handle properly\n");
% +			atomic_store_rel_int(&output_lock, 0);

As the comment says, restarting general debug exceptions is too hard.

% +		}
% +		while (atomic_load_acq_int(&kdb_trap_lock) != NOCPU)
% +			;

This always waits, so restarting in the breakpoint case doesn't help.

% +		write_eflags(ef);
% +		return (1);
% +	}
% +#endif
% +
% +#ifdef SMP
%  #ifdef CPUSTOP_ON_DDBBREAK
% +#define	VERBOSE_CPUSTOP_ON_DDBBREAK_NOT

Debugging cruft.  Normally I have VERBOSE_CPUSTOP_ON_DDBBREAK_NOT turned off
as above.

%
%  #if defined(VERBOSE_CPUSTOP_ON_DDBBREAK)
% +	while (atomic_cmpset_int(&output_lock, 0, 1) == 0)
% +		;
%  	db_printf("\nCPU%d stopping CPUs: 0x%08x...", PCPU_GET(cpuid),
%  	    PCPU_GET(other_cpus));
% +	atomic_store_rel_int(&output_lock, 0);
%  #endif /* VERBOSE_CPUSTOP_ON_DDBBREAK */
%
%  	/* We stop all CPUs except ourselves (obviously) */
% -	stop_cpus(PCPU_GET(other_cpus));
% +	attempt_to_stop_cpus(PCPU_GET(other_cpus));

Nothing new.

%
%  #if defined(VERBOSE_CPUSTOP_ON_DDBBREAK)
% -	db_printf(" stopped.\n");
% +	while (atomic_cmpset_int(&output_lock, 0, 1) == 0)
% +		;
% +	db_printf(" stopped\n");
% +	atomic_store_rel_int(&output_lock, 0);
%  #endif /* VERBOSE_CPUSTOP_ON_DDBBREAK */
%

This also backs out part of the residue of the style bugs in rev.1.51
(this commit did nothing good and everything except some of its style
bugs has been backed out).

% @@ -192,18 +265,29 @@
%
%  #if defined(VERBOSE_CPUSTOP_ON_DDBBREAK)
% +	while (atomic_cmpset_int(&output_lock, 0, 1) == 0)
% +		;
%  	db_printf("\nCPU%d restarting CPUs: 0x%08x...", PCPU_GET(cpuid),
%  	    stopped_cpus);
% +	atomic_store_rel_int(&output_lock, 0);
%  #endif /* VERBOSE_CPUSTOP_ON_DDBBREAK */
%
%  	/* Restart all the CPUs we previously stopped */
%  	if (stopped_cpus != PCPU_GET(other_cpus) && smp_started != 0) {
% +		while (atomic_cmpset_int(&output_lock, 0, 1) == 0)
% +			;
%  		db_printf("whoa, other_cpus: 0x%08x, stopped_cpus: 0x%08x\n",
%  			  PCPU_GET(other_cpus), stopped_cpus);
% +		atomic_store_rel_int(&output_lock, 0);

Nothing new.

% +#if 0
%  		panic("stop_cpus() failed");
% +#endif

Failure to stop all the other CPUs is expected now, so don't panic for it.

%  	}
%  	restart_cpus(stopped_cpus);
%
%  #if defined(VERBOSE_CPUSTOP_ON_DDBBREAK)
% -	db_printf(" restarted.\n");
% +	while (atomic_cmpset_int(&output_lock, 0, 1) == 0)
% +		;
% +	db_printf(" restarted\n");
% +	atomic_store_rel_int(&output_lock, 0);
%  #endif /* VERBOSE_CPUSTOP_ON_DDBBREAK */
%

This also backs out the rest of the residue of the style bugs in rev.1.51.

% @@ -211,6 +295,10 @@
%  #endif /* SMP */
%
% +#ifdef SMP
% +	atomic_store_rel_int(&kdb_trap_lock, NOCPU);
% +#endif
% +
%  	write_eflags(ef);
%
%  	return (1);
%  }

Actual patch:

%%%
Index: db_interface.c
===================================================================
RCS file: /home/ncvs/src/sys/i386/i386/db_interface.c,v
retrieving revision 1.81
diff -u -2 -r1.81 db_interface.c
--- db_interface.c	3 Apr 2004 22:23:36 -0000	1.81
+++ db_interface.c	4 Apr 2004 04:28:47 -0000
@@ -35,10 +39,14 @@
 #include <sys/reboot.h>
 #include <sys/cons.h>
+#include <sys/ktr.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
+#ifdef SMP
 #include <sys/smp.h>
+#endif

 #include <machine/cpu.h>
 #ifdef SMP
+#include <machine/smp.h>
 #include <machine/smptests.h>	/** CPUSTOP_ON_DDBBREAK */
 #endif
@@ -61,4 +69,33 @@
 static jmp_buf	db_global_jmpbuf;

+#ifdef SMP
+/* XXX this is cloned from stop_cpus() since that function can hang. */
+static int
+attempt_to_stop_cpus(u_int map)
+{
+	int i;
+
+	if (!smp_started)
+		return 0;
+
+	CTR1(KTR_SMP, "attempt_to_stop_cpus(%x)", map);
+
+	/* send the stop IPI to all CPUs in map */
+	ipi_selected(map, IPI_STOP);
+
+	i = 0;
+	while ((atomic_load_acq_int(&stopped_cpus) & map) != map) {
+		/* spin */
+		i++;
+		if (i == 100000000) {
+			printf("timeout stopping cpus\n");
+			break;
+		}
+	}
+
+	return 1;
+}
+#endif /* SMP */
+
 /*
  *  kdb_trap - field a TRACE or BPT trap
@@ -69,4 +106,8 @@
 	u_int ef;
 	volatile int ddb_mode = !(boothowto & RB_GDB);
+#ifdef SMP
+	static u_int kdb_trap_lock = NOCPU;
+	static u_int output_lock;
+#endif

 	/*
@@ -91,16 +132,48 @@

 #ifdef SMP
+	if (atomic_cmpset_int(&kdb_trap_lock, NOCPU, PCPU_GET(cpuid)) == 0 &&
+	    kdb_trap_lock != PCPU_GET(cpuid)) {
+		while (atomic_cmpset_int(&output_lock, 0, 1) == 0)
+			;
+		db_printf(
+		    "concurrent ddb entry: type %d trap, code=%x cpu=%d\n",
+		    type, code, PCPU_GET(cpuid));
+		atomic_store_rel_int(&output_lock, 0);
+		if (type == T_BPTFLT)
+			regs->tf_eip--;
+		else {
+			while (atomic_cmpset_int(&output_lock, 0, 1) == 0)
+				;
+			db_printf(
+"concurrent ddb entry on non-breakpoint: too hard to handle properly\n");
+			atomic_store_rel_int(&output_lock, 0);
+		}
+		while (atomic_load_acq_int(&kdb_trap_lock) != NOCPU)
+			;
+		write_eflags(ef);
+		return (1);
+	}
+#endif
+
+#ifdef SMP
 #ifdef CPUSTOP_ON_DDBBREAK
+#define	VERBOSE_CPUSTOP_ON_DDBBREAK_NOT

 #if defined(VERBOSE_CPUSTOP_ON_DDBBREAK)
+	while (atomic_cmpset_int(&output_lock, 0, 1) == 0)
+		;
 	db_printf("\nCPU%d stopping CPUs: 0x%08x...", PCPU_GET(cpuid),
 	    PCPU_GET(other_cpus));
+	atomic_store_rel_int(&output_lock, 0);
 #endif /* VERBOSE_CPUSTOP_ON_DDBBREAK */

 	/* We stop all CPUs except ourselves (obviously) */
-	stop_cpus(PCPU_GET(other_cpus));
+	attempt_to_stop_cpus(PCPU_GET(other_cpus));

 #if defined(VERBOSE_CPUSTOP_ON_DDBBREAK)
-	db_printf(" stopped.\n");
+	while (atomic_cmpset_int(&output_lock, 0, 1) == 0)
+		;
+	db_printf(" stopped\n");
+	atomic_store_rel_int(&output_lock, 0);
 #endif /* VERBOSE_CPUSTOP_ON_DDBBREAK */

@@ -192,18 +265,29 @@

 #if defined(VERBOSE_CPUSTOP_ON_DDBBREAK)
+	while (atomic_cmpset_int(&output_lock, 0, 1) == 0)
+		;
 	db_printf("\nCPU%d restarting CPUs: 0x%08x...", PCPU_GET(cpuid),
 	    stopped_cpus);
+	atomic_store_rel_int(&output_lock, 0);
 #endif /* VERBOSE_CPUSTOP_ON_DDBBREAK */

 	/* Restart all the CPUs we previously stopped */
 	if (stopped_cpus != PCPU_GET(other_cpus) && smp_started != 0) {
+		while (atomic_cmpset_int(&output_lock, 0, 1) == 0)
+			;
 		db_printf("whoa, other_cpus: 0x%08x, stopped_cpus: 0x%08x\n",
 			  PCPU_GET(other_cpus), stopped_cpus);
+		atomic_store_rel_int(&output_lock, 0);
+#if 0
 		panic("stop_cpus() failed");
+#endif
 	}
 	restart_cpus(stopped_cpus);

 #if defined(VERBOSE_CPUSTOP_ON_DDBBREAK)
-	db_printf(" restarted.\n");
+	while (atomic_cmpset_int(&output_lock, 0, 1) == 0)
+		;
+	db_printf(" restarted\n");
+	atomic_store_rel_int(&output_lock, 0);
 #endif /* VERBOSE_CPUSTOP_ON_DDBBREAK */

@@ -211,6 +295,10 @@
 #endif /* SMP */

+#ifdef SMP
+	atomic_store_rel_int(&kdb_trap_lock, NOCPU);
+#endif
+
 	write_eflags(ef);

 	return (1);
 }
%%%

Bruce


More information about the cvs-all mailing list