PERFORCE change 152286 for review
Peter Wemm
peter at FreeBSD.org
Fri Oct 31 10:53:16 PDT 2008
http://perforce.freebsd.org/chv.cgi?CH=152286
Change 152286 by peter at peter_daintree on 2008/10/31 17:52:42
Integrate @152285
Affected files ...
.. //depot/projects/valgrind/VEX/priv/guest-generic/bb_to_IR.c#3 integrate
.. //depot/projects/valgrind/cachegrind/docs/cg-manual.xml#3 integrate
.. //depot/projects/valgrind/callgrind/docs/cl-manual.xml#3 integrate
.. //depot/projects/valgrind/callgrind/dump.c#3 integrate
.. //depot/projects/valgrind/coregrind/Makefile.am#7 integrate
.. //depot/projects/valgrind/coregrind/m_aspacemgr/aspacemgr-linux.c#4 integrate
.. //depot/projects/valgrind/coregrind/m_debuginfo/debuginfo.c#6 integrate
.. //depot/projects/valgrind/coregrind/m_debuginfo/priv_storage.h#3 integrate
.. //depot/projects/valgrind/coregrind/m_debuginfo/priv_tytypes.h#2 integrate
.. //depot/projects/valgrind/coregrind/m_debuginfo/readdwarf3.c#2 integrate
.. //depot/projects/valgrind/coregrind/m_debuginfo/readelf.c#4 integrate
.. //depot/projects/valgrind/coregrind/m_debuginfo/storage.c#3 integrate
.. //depot/projects/valgrind/coregrind/m_debuginfo/tytypes.c#2 integrate
.. //depot/projects/valgrind/coregrind/m_demangle/ansidecl.h#2 integrate
.. //depot/projects/valgrind/coregrind/m_demangle/cp-demangle.c#3 integrate
.. //depot/projects/valgrind/coregrind/m_demangle/cp-demangle.h#1 branch
.. //depot/projects/valgrind/coregrind/m_demangle/cplus-dem.c#3 integrate
.. //depot/projects/valgrind/coregrind/m_demangle/demangle.c#3 integrate
.. //depot/projects/valgrind/coregrind/m_demangle/demangle.h#3 integrate
.. //depot/projects/valgrind/coregrind/m_demangle/dyn-string.c#3 integrate
.. //depot/projects/valgrind/coregrind/m_demangle/dyn-string.h#2 integrate
.. //depot/projects/valgrind/coregrind/m_demangle/safe-ctype.c#2 integrate
.. //depot/projects/valgrind/coregrind/m_demangle/safe-ctype.h#2 integrate
.. //depot/projects/valgrind/coregrind/m_demangle/vg_libciface.h#1 branch
.. //depot/projects/valgrind/coregrind/m_errormgr.c#3 integrate
.. //depot/projects/valgrind/coregrind/m_execontext.c#3 integrate
.. //depot/projects/valgrind/coregrind/m_libcbase.c#5 integrate
.. //depot/projects/valgrind/coregrind/m_main.c#9 integrate
.. //depot/projects/valgrind/coregrind/m_stacktrace.c#4 integrate
.. //depot/projects/valgrind/coregrind/m_syswrap/syswrap-amd64-linux.c#3 integrate
.. //depot/projects/valgrind/coregrind/m_syswrap/syswrap-generic.c#7 integrate
.. //depot/projects/valgrind/coregrind/m_syswrap/syswrap-main.c#10 integrate
.. //depot/projects/valgrind/coregrind/m_trampoline.S#5 integrate
.. //depot/projects/valgrind/coregrind/m_xarray.c#2 integrate
.. //depot/projects/valgrind/coregrind/pub_core_debuginfo.h#4 integrate
.. //depot/projects/valgrind/docs/internals/3_3_BUGSTATUS.txt#2 integrate
.. //depot/projects/valgrind/docs/internals/BIG_APP_NOTES.txt#1 branch
.. //depot/projects/valgrind/docs/internals/Makefile.am#3 integrate
.. //depot/projects/valgrind/docs/internals/howto_BUILD_KDE42.txt#1 branch
.. //depot/projects/valgrind/docs/internals/howto_oprofile.txt#1 branch
.. //depot/projects/valgrind/docs/xml/manual-core.xml#3 integrate
.. //depot/projects/valgrind/docs/xml/manual.xml#3 integrate
.. //depot/projects/valgrind/docs/xml/valgrind-manpage.xml#3 integrate
.. //depot/projects/valgrind/exp-ptrcheck/Makefile.am#2 integrate
.. //depot/projects/valgrind/exp-ptrcheck/README.ABOUT.PTRCHECK.txt#2 delete
.. //depot/projects/valgrind/exp-ptrcheck/docs/Makefile.am#2 integrate
.. //depot/projects/valgrind/exp-ptrcheck/docs/pc-manual.xml#1 branch
.. //depot/projects/valgrind/exp-ptrcheck/h_main.c#3 integrate
.. //depot/projects/valgrind/glibc-2.34567-NPTL-helgrind.supp#2 integrate
.. //depot/projects/valgrind/helgrind/Makefile.am#4 integrate
.. //depot/projects/valgrind/helgrind/README_MSMProp2.txt#1 branch
.. //depot/projects/valgrind/helgrind/README_YARD.txt#1 branch
.. //depot/projects/valgrind/helgrind/helgrind.h#3 integrate
.. //depot/projects/valgrind/helgrind/hg_basics.c#1 branch
.. //depot/projects/valgrind/helgrind/hg_basics.h#1 branch
.. //depot/projects/valgrind/helgrind/hg_errors.c#1 branch
.. //depot/projects/valgrind/helgrind/hg_errors.h#1 branch
.. //depot/projects/valgrind/helgrind/hg_intercepts.c#2 integrate
.. //depot/projects/valgrind/helgrind/hg_lock_n_thread.c#1 branch
.. //depot/projects/valgrind/helgrind/hg_lock_n_thread.h#1 branch
.. //depot/projects/valgrind/helgrind/hg_main.c#3 integrate
.. //depot/projects/valgrind/helgrind/hg_wordset.c#2 integrate
.. //depot/projects/valgrind/helgrind/hg_wordset.h#2 integrate
.. //depot/projects/valgrind/helgrind/libhb.h#1 branch
.. //depot/projects/valgrind/helgrind/libhb_core.c#1 branch
.. //depot/projects/valgrind/include/pub_tool_execontext.h#3 integrate
.. //depot/projects/valgrind/include/pub_tool_libcbase.h#3 integrate
.. //depot/projects/valgrind/massif/hp2ps/AreaBelow.c#2 delete
.. //depot/projects/valgrind/massif/hp2ps/AreaBelow.h#2 delete
.. //depot/projects/valgrind/massif/hp2ps/AuxFile.c#2 delete
.. //depot/projects/valgrind/massif/hp2ps/AuxFile.h#2 delete
.. //depot/projects/valgrind/massif/hp2ps/Axes.c#2 delete
.. //depot/projects/valgrind/massif/hp2ps/Axes.h#2 delete
.. //depot/projects/valgrind/massif/hp2ps/CHANGES#2 delete
.. //depot/projects/valgrind/massif/hp2ps/Curves.c#2 delete
.. //depot/projects/valgrind/massif/hp2ps/Curves.h#2 delete
.. //depot/projects/valgrind/massif/hp2ps/Defines.h#2 delete
.. //depot/projects/valgrind/massif/hp2ps/Deviation.c#2 delete
.. //depot/projects/valgrind/massif/hp2ps/Deviation.h#2 delete
.. //depot/projects/valgrind/massif/hp2ps/Dimensions.c#2 delete
.. //depot/projects/valgrind/massif/hp2ps/Dimensions.h#2 delete
.. //depot/projects/valgrind/massif/hp2ps/Error.c#2 delete
.. //depot/projects/valgrind/massif/hp2ps/Error.h#2 delete
.. //depot/projects/valgrind/massif/hp2ps/HpFile.c#2 delete
.. //depot/projects/valgrind/massif/hp2ps/HpFile.h#2 delete
.. //depot/projects/valgrind/massif/hp2ps/INSTALL#2 delete
.. //depot/projects/valgrind/massif/hp2ps/Key.c#2 delete
.. //depot/projects/valgrind/massif/hp2ps/Key.h#2 delete
.. //depot/projects/valgrind/massif/hp2ps/LICENSE#2 delete
.. //depot/projects/valgrind/massif/hp2ps/Main.c#2 delete
.. //depot/projects/valgrind/massif/hp2ps/Main.h#2 delete
.. //depot/projects/valgrind/massif/hp2ps/Makefile.am#2 delete
.. //depot/projects/valgrind/massif/hp2ps/Makefile.old#2 delete
.. //depot/projects/valgrind/massif/hp2ps/Marks.c#2 delete
.. //depot/projects/valgrind/massif/hp2ps/Marks.h#2 delete
.. //depot/projects/valgrind/massif/hp2ps/PsFile.c#2 delete
.. //depot/projects/valgrind/massif/hp2ps/PsFile.h#2 delete
.. //depot/projects/valgrind/massif/hp2ps/README#2 delete
.. //depot/projects/valgrind/massif/hp2ps/Reorder.c#2 delete
.. //depot/projects/valgrind/massif/hp2ps/Reorder.h#2 delete
.. //depot/projects/valgrind/massif/hp2ps/Scale.c#2 delete
.. //depot/projects/valgrind/massif/hp2ps/Scale.h#2 delete
.. //depot/projects/valgrind/massif/hp2ps/Shade.c#2 delete
.. //depot/projects/valgrind/massif/hp2ps/Shade.h#2 delete
.. //depot/projects/valgrind/massif/hp2ps/TopTwenty.c#2 delete
.. //depot/projects/valgrind/massif/hp2ps/TopTwenty.h#2 delete
.. //depot/projects/valgrind/massif/hp2ps/TraceElement.c#2 delete
.. //depot/projects/valgrind/massif/hp2ps/TraceElement.h#2 delete
.. //depot/projects/valgrind/massif/hp2ps/Utilities.c#2 delete
.. //depot/projects/valgrind/massif/hp2ps/Utilities.h#2 delete
.. //depot/projects/valgrind/massif/hp2ps/hp2ps.1#2 delete
.. //depot/projects/valgrind/memcheck/mc_translate.c#3 integrate
.. //depot/projects/valgrind/none/tests/amd64/Makefile.am#3 integrate
.. //depot/projects/valgrind/none/tests/amd64/bug156404-amd64.c#1 branch
.. //depot/projects/valgrind/none/tests/amd64/bug156404-amd64.stderr.exp#1 branch
.. //depot/projects/valgrind/none/tests/amd64/bug156404-amd64.stdout.exp#1 branch
.. //depot/projects/valgrind/none/tests/amd64/bug156404-amd64.vgtest#1 branch
.. //depot/projects/valgrind/xfree-4.supp#4 integrate
Differences ...
==== //depot/projects/valgrind/VEX/priv/guest-generic/bb_to_IR.c#3 (text+ko) ====
@@ -376,9 +376,12 @@
irsb->stmts[selfcheck_idx+3]
= IRStmt_Put( offB_TILEN, IRExpr_RdTmp(tilen_tmp) );
- p_adler_helper = abiinfo_both->host_ppc_calls_use_fndescrs
- ? ((HWord*)(&genericg_compute_adler32))[0]
- : (HWord)&genericg_compute_adler32;
+ if (abiinfo_both->host_ppc_calls_use_fndescrs) {
+ HWord* fndescr = (HWord*)&genericg_compute_adler32;
+ p_adler_helper = fndescr[0];
+ } else {
+ p_adler_helper = (HWord)&genericg_compute_adler32;
+ }
irsb->stmts[selfcheck_idx+4]
= IRStmt_Exit(
==== //depot/projects/valgrind/cachegrind/docs/cg-manual.xml#3 (text+ko) ====
@@ -807,16 +807,12 @@
instructions.</para>
<para>To do this, you just need to assemble your
-<computeroutput>.s</computeroutput> files with assembler-level
-debug information. gcc doesn't do this, but you can use the GNU
-assembler with the <computeroutput>--gstabs</computeroutput>
-option to generate object files with this information, eg:</para>
-
-<programlisting><![CDATA[
-as --gstabs foo.s]]></programlisting>
-
-<para>You can then profile and annotate source files in the same
-way as for C/C++ programs.</para>
+<computeroutput>.s</computeroutput> files with assembly-level debug
+information. You can use <computeroutput>gcc
+-S</computeroutput> to compile C/C++ programs to assembly code, and then
+<computeroutput>gcc -g</computeroutput> on the assembly code files to
+achieve this. You can then profile and annotate the assembly code source
+files in the same way as C/C++ source files.</para>
</sect2>
==== //depot/projects/valgrind/callgrind/docs/cl-manual.xml#3 (text+ko) ====
@@ -197,7 +197,7 @@
<computeroutput>callgrind_control -i on</computeroutput> just before the
interesting code section is executed. To exactly specify
the code position where profiling should start, use the client request
- <computeroutput>CALLGRIND_START_INSTRUMENTATION</computeroutput>.</para>
+ <computeroutput><xref linkend="cr.start-instr"/></computeroutput>.</para>
<para>If you want to be able to see assembly code level annotation, specify
<option><xref linkend="opt.dump-instr"/>=yes</option>. This will produce
@@ -292,18 +292,13 @@
<listitem>
<para><command>Program controlled dumping.</command>
- Put <screen><![CDATA[#include <valgrind/callgrind.h>]]></screen>
- into your source and add
- <computeroutput>CALLGRIND_DUMP_STATS;</computeroutput> when you
- want a dump to happen. Use
- <computeroutput>CALLGRIND_ZERO_STATS;</computeroutput> to only
- zero cost centers.</para>
- <para>In Valgrind terminology, this method is called "Client
- requests". The given macros generate a special instruction
- pattern with no effect at all (i.e. a NOP). When run under
- Valgrind, the CPU simulation engine detects the special
- instruction pattern and triggers special actions like the ones
- described above.</para>
+ Insert
+ <computeroutput><xref linkend="cr.dump-stats"/>;</computeroutput>
+ at the position in your code where you want a profile dump to happen. Use
+ <computeroutput><xref linkend="cr.zero-stats"/>;</computeroutput> to only
+ zero profile counters.
+ See <xref linkend="cl-manual.clientrequests"/> for more information on
+ Callgrind specific client requests.</para>
</listitem>
</itemizedlist>
@@ -338,8 +333,8 @@
with <screen>callgrind_control -i on</screen>
and off by specifying "off" instead of "on".
Furthermore, instrumentation state can be programatically changed with
- the macros <computeroutput>CALLGRIND_START_INSTRUMENTATION;</computeroutput>
- and <computeroutput>CALLGRIND_STOP_INSTRUMENTATION;</computeroutput>.
+ the macros <computeroutput><xref linkend="cr.start-instr"/>;</computeroutput>
+ and <computeroutput><xref linkend="cr.stop-instr"/>;</computeroutput>.
</para>
<para>In addition to enabling instrumentation, you must also enable
@@ -471,6 +466,27 @@
</sect2>
+ <sect2 id="cl-manual.forkingprograms" xreflabel="Forking Programs">
+ <title>Forking Programs</title>
+
+ <para>If your program forks, the child will inherit all the profiling
+ data that has been gathered for the parent. To start with empty profile
+ counter values in the child, the client request
+ <computeroutput><xref linkend="cr.zero-stats"/>;</computeroutput>
+ can be inserted into code to be executed by the child, directly after
+ <computeroutput>fork()</computeroutput>.</para>
+
+ <para>However, you will have to make sure that the output file format string
+ (controlled by <option>--callgrind-out-file</option>) does contain
+ <option>%p</option> (which is true by default). Otherwise, the
+ outputs from the parent and child will overwrite each other or will be
+ intermingled, which almost certainly is not what you want.</para>
+
+ <para>You will be able to control the new child independently from
+ the parent via <computeroutput>callgrind_control</computeroutput>.</para>
+
+ </sect2>
+
</sect1>
@@ -701,7 +717,7 @@
</listitem>
</varlistentry>
- <varlistentry id="opt.collect-atstart">
+ <varlistentry id="opt.collect-atstart" xreflabel="--collect-atstart">
<term>
<option><![CDATA[--collect-atstart=<yes|no> [default: yes] ]]></option>
</term>
@@ -733,13 +749,9 @@
specification of <computeroutput>--toggle-collect</computeroutput>
implicitly sets
<computeroutput>--collect-state=no</computeroutput>.</para>
- <para>Collection state can be toggled also by using a Valgrind
- Client Request in your application. For this, include
- <computeroutput>valgrind/callgrind.h</computeroutput> and specify
- the macro
- <computeroutput>CALLGRIND_TOGGLE_COLLECT</computeroutput> at the
- needed positions. This only will have any effect if run under
- supervision of the Callgrind tool.</para>
+ <para>Collection state can be toggled also by inserting the client request
+ <computeroutput><xref linkend="cr.toggle-collect"/>;</computeroutput>
+ at the needed code positions.</para>
</listitem>
</varlistentry>
@@ -912,4 +924,94 @@
</sect1>
+<sect1 id="cl-manual.clientrequests" xreflabel="Client request reference">
+<title>Callgrind specific client requests</title>
+
+<para>In Valgrind terminology, a client request is a C macro which
+can be inserted into your code to request specific functionality when
+run under Valgrind. For this, special instruction patterns resulting
+in NOPs are used, but which can be detected by Valgrind.</para>
+
+<para>Callgrind provides the following specific client requests.
+To use them, add the line
+<screen><![CDATA[#include <valgrind/callgrind.h>]]></screen>
+into your code for the macro definitions.
+.</para>
+
+<variablelist id="cl.clientrequests.list">
+
+ <varlistentry id="cr.dump-stats" xreflabel="CALLGRIND_DUMP_STATS">
+ <term>
+ <computeroutput>CALLGRIND_DUMP_STATS</computeroutput>
+ </term>
+ <listitem>
+ <para>Force generation of a profile dump at specified position
+ in code, for the current thread only. Written counters will be reset
+ to zero.</para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry id="cr.dump-stats-at" xreflabel="CALLGRIND_DUMP_STATS_AT">
+ <term>
+ <computeroutput>CALLGRIND_DUMP_STATS_AT(string)</computeroutput>
+ </term>
+ <listitem>
+ <para>Same as CALLGRIND_DUMP_STATS, but allows to specify a string
+ to be able to distinguish profile dumps.</para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry id="cr.zero-stats" xreflabel="CALLGRIND_ZERO_STATS">
+ <term>
+ <computeroutput>CALLGRIND_ZERO_STATS</computeroutput>
+ </term>
+ <listitem>
+ <para>Reset the profile counters for the current thread to zero.</para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry id="cr.toggle-collect" xreflabel="CALLGRIND_TOGGLE_COLLECT">
+ <term>
+ <computeroutput>CALLGRIND_TOGGLE_COLLECT</computeroutput>
+ </term>
+ <listitem>
+ <para>Toggle the collection state. This allows to ignore events
+ with regard to profile counters. See also options
+ <xref linkend="opt.collect-atstart"/> and
+ <xref linkend="opt.toggle-collect"/>.</para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry id="cr.start-instr" xreflabel="CALLGRIND_START_INSTRUMENTATION">
+ <term>
+ <computeroutput>CALLGRIND_START_INSTRUMENTATION</computeroutput>
+ </term>
+ <listitem>
+ <para>Start full Callgrind instrumentation if not already switched on.
+ When cache simulation is done, this will flush the simulated cache
+ and lead to an artifical cache warmup phase afterwards with
+ cache misses which would not have happened in reality.
+ See also option <xref linkend="opt.instr-atstart"/>.</para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry id="cr.stop-instr" xreflabel="CALLGRIND_STOP_INSTRUMENTATION">
+ <term>
+ <computeroutput>CALLGRIND_STOP_INSTRUMENTATION</computeroutput>
+ </term>
+ <listitem>
+ <para>Stop full Callgrind instrumentation if not already switched off.
+ This flushes Valgrinds translation cache, and does no additional
+ instrumentation afterwards: it effectivly will run at the same
+ speed as the "none" tool, ie. at minimal slowdown. Use this to
+ speed up the Callgrind run for uninteresting code parts. Use
+ <xref linkend="cr.start-instr"/> to switch on instrumentation again.
+ See also option <xref linkend="opt.instr-atstart"/>.</para>
+ </listitem>
+ </varlistentry>
+
+</variablelist>
+
+</sect1>
+
</chapter>
==== //depot/projects/valgrind/callgrind/dump.c#3 (text+ko) ====
@@ -64,13 +64,13 @@
Char* CLG_(get_out_file)()
{
- CLG_ASSERT(dumps_initialized);
+ CLG_(init_dumps)();
return out_file;
}
Char* CLG_(get_out_directory)()
{
- CLG_ASSERT(dumps_initialized);
+ CLG_(init_dumps)();
return out_directory;
}
@@ -1616,6 +1616,8 @@
CLG_DEBUG(2, "+ dump_profile(Trigger '%s')\n",
trigger ? trigger : (Char*)"Prg.Term.");
+ CLG_(init_dumps)();
+
if (VG_(clo_verbosity) > 1)
VG_(message)(Vg_DebugMsg, "Start dumping at BB %llu (%s)...",
CLG_(stat).bb_executions,
@@ -1673,15 +1675,35 @@
* <out_file> always starts with a full absolute path.
* If the output format string represents a relative path, the current
* working directory at program start is used.
+ *
+ * This function has to be called every time a profile dump is generated
+ * to be able to react on PID changes.
*/
void CLG_(init_dumps)()
{
Int lastSlash, i;
SysRes res;
+ static int thisPID = 0;
+ int currentPID = VG_(getpid)();
+ if (currentPID == thisPID) {
+ /* already initialized, and no PID change */
+ CLG_ASSERT(out_file != 0);
+ return;
+ }
+ thisPID = currentPID;
+
if (!CLG_(clo).out_format)
CLG_(clo).out_format = DEFAULT_OUTFORMAT;
+ /* If a file name was already set, clean up before */
+ if (out_file) {
+ VG_(free)(out_file);
+ VG_(free)(out_directory);
+ VG_(free)(filename);
+ out_counter = 0;
+ }
+
// Setup output filename.
out_file =
VG_(expand_file_name)("--callgrind-out-file", CLG_(clo).out_format);
@@ -1721,7 +1743,8 @@
}
if (!res.isError) VG_(close)( (Int)res.res );
- init_cmdbuf();
+ if (!dumps_initialized)
+ init_cmdbuf();
dumps_initialized = True;
}
==== //depot/projects/valgrind/coregrind/Makefile.am#7 (text+ko) ====
@@ -169,9 +169,11 @@
m_debuginfo/priv_readelf.h \
m_debuginfo/priv_readxcoff.h \
m_demangle/ansidecl.h \
+ m_demangle/cp-demangle.h \
m_demangle/dyn-string.h \
m_demangle/demangle.h \
m_demangle/safe-ctype.h \
+ m_demangle/vg_libciface.h \
m_scheduler/priv_sema.h \
m_syswrap/priv_types_n_macros.h \
m_syswrap/priv_syswrap-generic.h \
==== //depot/projects/valgrind/coregrind/m_aspacemgr/aspacemgr-linux.c#4 (text+ko) ====
@@ -325,6 +325,7 @@
/* ------ end of STATE for the address-space manager ------ */
/* ------ Forwards decls ------ */
+inline
static Int find_nsegment_idx ( Addr a );
static void parse_procselfmaps (
@@ -1101,8 +1102,19 @@
/*-----------------------------------------------------------------*/
/* Binary search the interval array for a given address. Since the
- array covers the entire address space the search cannot fail. */
-static Int find_nsegment_idx ( Addr a )
+ array covers the entire address space the search cannot fail. The
+ _WRK function does the real work. Its caller (just below) caches
+ the results thereof, to save time. With N_CACHE of 63 we get a hit
+ rate exceeding 90% when running OpenOffice.
+
+ Re ">> 12", it doesn't matter that the page size of some targets
+ might be different from 12. Really "(a >> 12) % N_CACHE" is merely
+ a hash function, and the actual cache entry is always validated
+ correctly against the selected cache entry before use.
+*/
+/* Don't call find_nsegment_idx_WRK; use find_nsegment_idx instead. */
+__attribute__((noinline))
+static Int find_nsegment_idx_WRK ( Addr a )
{
Addr a_mid_lo, a_mid_hi;
Int mid,
@@ -1126,6 +1138,52 @@
}
}
+inline static Int find_nsegment_idx ( Addr a )
+{
+# define N_CACHE 63
+ static Addr cache_pageno[N_CACHE];
+ static Int cache_segidx[N_CACHE];
+ static Bool cache_inited = False;
+
+ static UWord n_q = 0;
+ static UWord n_m = 0;
+
+ UWord ix;
+
+ if (LIKELY(cache_inited)) {
+ /* do nothing */
+ } else {
+ for (ix = 0; ix < N_CACHE; ix++) {
+ cache_pageno[ix] = 0;
+ cache_segidx[ix] = -1;
+ }
+ cache_inited = True;
+ }
+
+ ix = (a >> 12) % N_CACHE;
+
+ n_q++;
+ if (0 && 0 == (n_q & 0xFFFF))
+ VG_(debugLog)(0,"xxx","find_nsegment_idx: %lu %lu\n", n_q, n_m);
+
+ if ((a >> 12) == cache_pageno[ix]
+ && cache_segidx[ix] >= 0
+ && cache_segidx[ix] < nsegments_used
+ && nsegments[cache_segidx[ix]].start <= a
+ && a <= nsegments[cache_segidx[ix]].end) {
+ /* hit */
+ /* aspacem_assert( cache_segidx[ix] == find_nsegment_idx_WRK(a) ); */
+ return cache_segidx[ix];
+ }
+ /* miss */
+ n_m++;
+ cache_segidx[ix] = find_nsegment_idx_WRK(a);
+ cache_pageno[ix] = a >> 12;
+ return cache_segidx[ix];
+# undef N_CACHE
+}
+
+
/* Finds the segment containing 'a'. Only returns file/anon/resvn
segments. This returns a 'NSegment const *' - a pointer to
==== //depot/projects/valgrind/coregrind/m_debuginfo/debuginfo.c#6 (text+ko) ====
@@ -99,6 +99,13 @@
/*------------------------------------------------------------*/
+/*--- fwdses ---*/
+/*------------------------------------------------------------*/
+
+static void cfsi_cache__invalidate ( void );
+
+
+/*------------------------------------------------------------*/
/*--- Root structure ---*/
/*------------------------------------------------------------*/
@@ -320,10 +327,11 @@
/* Repeatedly scan debugInfo_list, looking for DebugInfos with text
AVMAs intersecting [start,start+length), and call discard_DebugInfo
to get rid of them. This modifies the list, hence the multiple
- iterations.
+ iterations. Returns True iff any such DebugInfos were found.
*/
-static void discard_syms_in_range ( Addr start, SizeT length )
+static Bool discard_syms_in_range ( Addr start, SizeT length )
{
+ Bool anyFound = False;
Bool found;
DebugInfo* curr;
@@ -347,8 +355,11 @@
}
if (!found) break;
+ anyFound = True;
discard_DebugInfo( curr );
}
+
+ return anyFound;
}
@@ -479,8 +490,86 @@
}
+/* Debuginfo reading for 'di' has just been successfully completed.
+ Check that the invariants stated in
+ "Comment_on_IMPORTANT_CFSI_REPRESENTATIONAL_INVARIANTS" in
+ priv_storage.h are observed. */
+static void check_CFSI_related_invariants ( DebugInfo* di )
+{
+ DebugInfo* di2 = NULL;
+ vg_assert(di);
+ /* This fn isn't called until after debuginfo for this object has
+ been successfully read. And that shouldn't happen until we have
+ both a r-x and rw- mapping for the object. Hence: */
+ vg_assert(di->have_rx_map);
+ vg_assert(di->have_rw_map);
+ /* degenerate case: r-x section is empty */
+ if (di->rx_map_size == 0) {
+ vg_assert(di->cfsi == NULL);
+ return;
+ }
+ /* normal case: r-x section is nonempty */
+ /* invariant (0) */
+ vg_assert(di->rx_map_size > 0);
+ /* invariant (1) */
+ for (di2 = debugInfo_list; di2; di2 = di2->next) {
+ if (di2 == di)
+ continue;
+ if (di2->rx_map_size == 0)
+ continue;
+ vg_assert(di->rx_map_avma + di->rx_map_size <= di2->rx_map_avma
+ || di2->rx_map_avma + di2->rx_map_size <= di->rx_map_avma);
+ }
+ di2 = NULL;
+ /* invariant (2) */
+ if (di->cfsi) {
+ vg_assert(di->cfsi_minavma <= di->cfsi_maxavma); /* duh! */
+ vg_assert(di->cfsi_minavma >= di->rx_map_avma);
+ vg_assert(di->cfsi_maxavma < di->rx_map_avma + di->rx_map_size);
+ }
+ /* invariants (3) and (4) */
+ if (di->cfsi) {
+ Word i;
+ vg_assert(di->cfsi_used > 0);
+ vg_assert(di->cfsi_size > 0);
+ for (i = 0; i < di->cfsi_used; i++) {
+ DiCfSI* cfsi = &di->cfsi[i];
+ vg_assert(cfsi->len > 0);
+ vg_assert(cfsi->base >= di->cfsi_minavma);
+ vg_assert(cfsi->base + cfsi->len - 1 <= di->cfsi_maxavma);
+ if (i > 0) {
+ DiCfSI* cfsip = &di->cfsi[i-1];
+ vg_assert(cfsip->base + cfsip->len <= cfsi->base);
+ }
+ }
+ } else {
+ vg_assert(di->cfsi_used == 0);
+ vg_assert(di->cfsi_size == 0);
+ }
+}
+
+
/*--------------------------------------------------------------*/
/*--- ---*/
+/*--- TOP LEVEL: INITIALISE THE DEBUGINFO SYSTEM ---*/
+/*--- ---*/
+/*--------------------------------------------------------------*/
+
+void VG_(di_initialise) ( void )
+{
+ /* There's actually very little to do here, since everything
+ centers around the DebugInfos in debugInfo_list, they are
+ created and destroyed on demand, and each one is treated more or
+ less independently. */
+ vg_assert(debugInfo_list == NULL);
+
+ /* flush the CFI fast query cache. */
+ cfsi_cache__invalidate();
+}
+
+
+/*--------------------------------------------------------------*/
+/*--- ---*/
/*--- TOP LEVEL: NOTIFICATION (ACQUIRE/DISCARD INFO) (LINUX) ---*/
/*--- ---*/
/*--------------------------------------------------------------*/
@@ -719,6 +808,8 @@
TRACE_SYMTAB("\n------ Canonicalising the "
"acquired info ------\n");
+ /* invalidate the CFI unwind cache. */
+ cfsi_cache__invalidate();
/* prepare read data for use */
ML_(canonicaliseTables)( di );
/* notify m_redir about it */
@@ -728,6 +819,10 @@
di->have_dinfo = True;
tl_assert(di->handle > 0);
di_handle = di->handle;
+ /* Check invariants listed in
+ Comment_on_IMPORTANT_REPRESENTATIONAL_INVARIANTS in
+ priv_storage.h. */
+ check_CFSI_related_invariants(di);
} else {
TRACE_SYMTAB("\n------ ELF reading failed ------\n");
@@ -735,6 +830,7 @@
this DebugInfo? No - it contains info on the rw/rx
mappings, at least. */
di_handle = 0;
+ vg_assert(di->have_dinfo == False);
}
TRACE_SYMTAB("\n");
@@ -751,8 +847,11 @@
[a, a+len). */
void VG_(di_notify_munmap)( Addr a, SizeT len )
{
+ Bool anyFound;
if (0) VG_(printf)("DISCARD %#lx %#lx\n", a, a+len);
- discard_syms_in_range(a, len);
+ anyFound = discard_syms_in_range(a, len);
+ if (anyFound)
+ cfsi_cache__invalidate();
}
@@ -766,8 +865,11 @@
# if defined(VGP_x86_linux) || defined(VGP_x86_freebsd)
exe_ok = exe_ok || toBool(prot & VKI_PROT_READ);
# endif
- if (0 && !exe_ok)
- discard_syms_in_range(a, len);
+ if (0 && !exe_ok) {
+ Bool anyFound = discard_syms_in_range(a, len);
+ if (anyFound)
+ cfsi_cache__invalidate();
+ }
}
#endif /* defined(VGO_linux) */
@@ -798,6 +900,10 @@
{
ULong hdl = 0;
+ /* play safe; always invalidate the CFI cache. Not
+ that it should be used on AIX, but still .. */
+ cfsi_cache__invalidate();
+
if (acquire) {
Bool ok;
@@ -841,6 +947,10 @@
di->have_dinfo = True;
hdl = di->handle;
vg_assert(hdl > 0);
+ /* Check invariants listed in
+ Comment_on_IMPORTANT_REPRESENTATIONAL_INVARIANTS in
+ priv_storage.h. */
+ check_CFSI_related_invariants(di);
} else {
/* Something went wrong (eg. bad XCOFF file). */
discard_DebugInfo( di );
@@ -851,8 +961,11 @@
/* Dump all the debugInfos whose text segments intersect
code_start/code_len. */
+ /* CFI cache is always invalidated at start of this routine.
+ Hence it's safe to ignore the return value of
+ discard_syms_in_range. */
if (code_len > 0)
- discard_syms_in_range( code_start, code_len );
+ (void)discard_syms_in_range( code_start, code_len );
}
@@ -894,11 +1007,11 @@
If findText==False, only data symbols are searched for.
*/
static void search_all_symtabs ( Addr ptr, /*OUT*/DebugInfo** pdi,
- /*OUT*/Int* symno,
+ /*OUT*/Word* symno,
Bool match_anywhere_in_sym,
Bool findText )
{
- Int sno;
+ Word sno;
DebugInfo* di;
Bool inRange;
@@ -945,9 +1058,9 @@
*pdi to the relevant DebugInfo, and *locno to the loctab entry
*number within that. If not found, *pdi is set to NULL. */
static void search_all_loctabs ( Addr ptr, /*OUT*/DebugInfo** pdi,
- /*OUT*/Int* locno )
+ /*OUT*/Word* locno )
{
- Int lno;
+ Word lno;
DebugInfo* di;
for (di = debugInfo_list; di != NULL; di = di->next) {
if (di->text_present
@@ -978,7 +1091,7 @@
Bool findText, /*OUT*/OffT* offsetP )
{
DebugInfo* di;
- Int sno;
+ Word sno;
Int offset;
search_all_symtabs ( a, &di, &sno, match_anywhere_in_sym, findText );
@@ -1020,7 +1133,7 @@
Addr VG_(get_tocptr) ( Addr guest_code_addr )
{
DebugInfo* si;
- Int sno;
+ Word sno;
search_all_symtabs ( guest_code_addr,
&si, &sno,
True/*match_anywhere_in_fun*/,
@@ -1187,7 +1300,7 @@
Bool VG_(get_filename)( Addr a, Char* filename, Int n_filename )
{
DebugInfo* si;
- Int locno;
+ Word locno;
search_all_loctabs ( a, &si, &locno );
if (si == NULL)
return False;
@@ -1199,7 +1312,7 @@
Bool VG_(get_linenum)( Addr a, UInt* lineno )
{
DebugInfo* si;
- Int locno;
+ Word locno;
search_all_loctabs ( a, &si, &locno );
if (si == NULL)
return False;
@@ -1218,7 +1331,7 @@
/*OUT*/UInt* lineno )
{
DebugInfo* si;
- Int locno;
+ Word locno;
vg_assert( (dirname == NULL && dirname_available == NULL)
||
@@ -1542,6 +1655,122 @@
}
+/* Search all the DebugInfos in the entire system, to find the DiCfSI
+ that pertains to 'ip'.
+
+ If found, set *diP to the DebugInfo in which it resides, and
+ *ixP to the index in that DebugInfo's cfsi array.
+
+ If not found, set *diP to (DebugInfo*)1 and *ixP to zero.
+*/
+__attribute__((noinline))
+static void find_DiCfSI ( /*OUT*/DebugInfo** diP,
+ /*OUT*/Word* ixP,
+ Addr ip )
+{
+ DebugInfo* di;
+ Word i = -1;
+
+ static UWord n_search = 0;
+ static UWord n_steps = 0;
+ n_search++;
+
+ if (0) VG_(printf)("search for %#lx\n", ip);
+
+ for (di = debugInfo_list; di != NULL; di = di->next) {
+ Word j;
+ n_steps++;
+
+ /* Use the per-DebugInfo summary address ranges to skip
+ inapplicable DebugInfos quickly. */
+ if (di->cfsi_used == 0)
+ continue;
+ if (ip < di->cfsi_minavma || ip > di->cfsi_maxavma)
+ continue;
+
+ /* It might be in this DebugInfo. Search it. */
+ j = ML_(search_one_cfitab)( di, ip );
+ vg_assert(j >= -1 && j < (Word)di->cfsi_used);
+
+ if (j != -1) {
+ i = j;
+ break; /* found it */
+ }
+ }
+
+ if (i == -1) {
+
+ /* we didn't find it. */
+ *diP = (DebugInfo*)1;
+ *ixP = 0;
+
+ } else {
+
+ /* found it. */
+ /* ensure that di is 4-aligned (at least), so it can't possibly
+ be equal to (DebugInfo*)1. */
+ vg_assert(di && VG_IS_4_ALIGNED(di));
+ vg_assert(i >= 0 && i < di->cfsi_used);
+ *diP = di;
+ *ixP = i;
+
+ /* Start of performance-enhancing hack: once every 64 (chosen
+ hackily after profiling) successful searches, move the found
+ DebugInfo one step closer to the start of the list. This
+ makes future searches cheaper. For starting konqueror on
+ amd64, this in fact reduces the total amount of searching
+ done by the above find-the-right-DebugInfo loop by more than
+ a factor of 20. */
+ if ((n_search & 0xF) == 0) {
+ /* Move di one step closer to the start of the list. */
+ move_DebugInfo_one_step_forward( di );
+ }
+ /* End of performance-enhancing hack. */
+
+ if (0 && ((n_search & 0x7FFFF) == 0))
+ VG_(printf)("find_DiCfSI: %lu searches, "
+ "%lu DebugInfos looked at\n",
+ n_search, n_steps);
+
+ }
+
+}
+
+
+/* Now follows a mechanism for caching queries to find_DiCfSI, since
+ they are extremely frequent on amd64-linux, during stack unwinding.
+
+ Each cache entry binds an ip value to a (di, ix) pair. Possible
+ values:
+
+ di is non-null, ix >= 0 ==> cache slot in use, "di->cfsi[ix]"
+ di is (DebugInfo*)1 ==> cache slot in use, no associated di
+ di is NULL ==> cache slot not in use
+
+ Hence simply zeroing out the entire cache invalidates all
+ entries.
+
+ Why not map ip values directly to DiCfSI*'s? Because this would
+ cause problems if/when the cfsi array is moved due to resizing.
+ Instead we cache .cfsi array index value, which should be invariant
+ across resizing. (That said, I don't think the current
+ implementation will resize whilst during queries, since the DiCfSI
+ records are added all at once, when the debuginfo for an object is
+ read, and is not changed ever thereafter. */
+
+#define N_CFSI_CACHE 511
+
+typedef
+ struct { Addr ip; DebugInfo* di; Word ix; }
+ CFSICacheEnt;
+
+static CFSICacheEnt cfsi_cache[N_CFSI_CACHE];
+
+static void cfsi_cache__invalidate ( void ) {
+ VG_(memset)(&cfsi_cache, 0, sizeof(cfsi_cache));
+}
+
+
/* The main function for DWARF2/3 CFI-based stack unwinding.
Given an IP/SP/FP triple, produce the IP/SP/FP values for the
previous frame, if possible. */
@@ -1554,61 +1783,47 @@
Addr min_accessible,
Addr max_accessible )
{
- Bool ok;
- Int i;
- DebugInfo* si;
- DiCfSI* cfsi = NULL;
- Addr cfa, ipHere, spHere, fpHere, ipPrev, spPrev, fpPrev;
+ Bool ok;
+ DebugInfo* di;
+ DiCfSI* cfsi = NULL;
+ Addr cfa, ipHere, spHere, fpHere, ipPrev, spPrev, fpPrev;
CfiExprEvalContext eec;
- static UInt n_search = 0;
- static UInt n_steps = 0;
- n_search++;
+ static UWord n_q = 0, n_m = 0;
+ n_q++;
+ if (0 && 0 == (n_q & 0x1FFFFF))
+ VG_(printf)("QQQ %lu %lu\n", n_q, n_m);
- if (0) VG_(printf)("search for %#lx\n", *ipP);
+ { UWord hash = (*ipP) % N_CFSI_CACHE;
+ CFSICacheEnt* ce = &cfsi_cache[hash];
- for (si = debugInfo_list; si != NULL; si = si->next) {
- n_steps++;
+ if (LIKELY(ce->ip == *ipP) && LIKELY(ce->di != NULL)) {
+ /* found an entry in the cache .. */
+ } else {
+ /* not found in cache. Search and update. */
+ n_m++;
+ ce->ip = *ipP;
+ find_DiCfSI( &ce->di, &ce->ix, *ipP );
+ }
- /* Use the per-DebugInfo summary address ranges to skip
- inapplicable DebugInfos quickly. */
- if (si->cfsi_used == 0)
>>> TRUNCATED FOR MAIL (1000 lines) <<<
More information about the p4-projects
mailing list