Bug 1479965 - Use mangled names everywhere, recording unmangled names for pattern matching and human-readable outputs, r=jonco
authorSteve Fink <sfink@mozilla.com>
Wed, 25 Jul 2018 16:38:22 -0700
changeset 497305 a188327354ab83702eb9c2fec439f3a7cc019883
parent 497304 d05f029a29ad2adc1d2d952d91f440a3f571e679
child 497306 c7b32ffa822e353b5479b6224194e3cdfd135e65
push id9996
push userarchaeopteryx@coole-files.de
push dateThu, 18 Oct 2018 18:37:15 +0000
treeherdermozilla-beta@8efe26839243 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjonco
bugs1479965
milestone64.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1479965 - Use mangled names everywhere, recording unmangled names for pattern matching and human-readable outputs, r=jonco
js/src/devtools/rootAnalysis/annotations.js
js/src/devtools/rootAnalysis/computeCallgraph.js
js/src/devtools/rootAnalysis/computeGCFunctions.js
js/src/devtools/rootAnalysis/loadCallgraph.js
js/src/devtools/rootAnalysis/t/testlib.py
--- a/js/src/devtools/rootAnalysis/annotations.js
+++ b/js/src/devtools/rootAnalysis/annotations.js
@@ -258,18 +258,21 @@ function isHeapSnapshotMockClass(name)
 
 function isGTest(name)
 {
     return name.match(/\btesting::/);
 }
 
 function ignoreGCFunction(mangled)
 {
-    assert(mangled in readableNames, mangled + " not in readableNames");
-    var fun = readableNames[mangled][0];
+    // Field calls will not be in readableNames
+    if (!(mangled in readableNames))
+        return false;
+
+    const fun = readableNames[mangled][0];
 
     if (fun in ignoreFunctions)
         return true;
 
     // The protobuf library, and [de]serialization code generated by the
     // protobuf compiler, uses a _ton_ of function pointers but they are all
     // internal. Easiest to just ignore that mess here.
     if (isProtobuf(fun))
@@ -390,16 +393,18 @@ function isOverridableField(initialCSU, 
     if (field == 'GetNativeContext')
         return false;
     if (field == "GetGlobalJSObject")
         return false;
     if (field == "GetIsMainThread")
         return false;
     if (field == "GetThreadFromPRThread")
         return false;
+    if (field == "DocAddSizeOfIncludingThis")
+        return false;
     if (field == "ConstructUbiNode")
         return false;
     if (initialCSU == 'nsIXPCScriptable' && field == "GetScriptableFlags")
         return false;
     if (initialCSU == 'nsIXPConnectJSObjectHolder' && field == 'GetJSObject')
         return false;
     if (initialCSU == 'nsIXPConnect' && field == 'GetSafeJSContext')
         return false;
--- a/js/src/devtools/rootAnalysis/computeCallgraph.js
+++ b/js/src/devtools/rootAnalysis/computeCallgraph.js
@@ -13,24 +13,44 @@ if (scriptArgs[0] == '--function') {
 var typeInfo_filename = scriptArgs[0] || "typeInfo.txt";
 var callgraphOut_filename = scriptArgs[1] || "callgraph.txt";
 
 var origOut = os.file.redirect(callgraphOut_filename);
 
 var memoized = new Map();
 var memoizedCount = 0;
 
-function memo(name)
+var unmangled2id = new Set();
+
+function getId(name)
 {
-    if (!memoized.has(name)) {
-        let id = memoized.size + 1;
-        memoized.set(name, "" + id);
-        print(`#${id} ${name}`);
-    }
-    return memoized.get(name);
+    let id = memoized.get(name);
+    if (id !== undefined)
+        return id;
+
+    id = memoized.size + 1;
+    memoized.set(name, id);
+    print(`#${id} ${name}`);
+
+    return id;
+}
+
+function functionId(name)
+{
+    const [mangled, unmangled] = splitFunction(name);
+    const id = getId(mangled);
+
+    // Only produce a mangled -> unmangled mapping once, unless there are
+    // multiple unmangled names for the same mangled name.
+    if (unmangled2id.has(unmangled))
+        return id;
+
+    print(`= ${id} ${unmangled}`);
+    unmangled2id.add(unmangled);
+    return id;
 }
 
 var lastline;
 function printOnce(line)
 {
     if (line != lastline) {
         print(line);
         lastline = line;
@@ -78,17 +98,17 @@ function getAnnotations(functionName, bo
 // recording them in callgraph.txt.
 function processBody(functionName, body)
 {
     if (!('PEdge' in body))
         return;
 
 
     for (var tag of getAnnotations(functionName, body).values())
-        print("T " + memo(functionName) + " " + tag);
+        print("T " + functionId(functionName) + " " + tag);
 
     // Set of all callees that have been output so far, in order to suppress
     // repeated callgraph edges from being recorded. This uses a Map from
     // callees to limit sets, because we don't want a limited edge to prevent
     // an unlimited edge from being recorded later. (So an edge will be skipped
     // if it exists and is at least as limited as the previously seen edge.)
     //
     // Limit sets are implemented as integers interpreted as bitfields.
@@ -106,43 +126,43 @@ function processBody(functionName, body)
         var edgeLimited = body.limits[edge.Index[0]] | 0;
 
         for (var callee of getCallees(edge)) {
             // Individual callees may have additional limits. The only such
             // limit currently is that nsISupports.{AddRef,Release} are assumed
             // to never GC.
             const limits = edgeLimited | callee.limits;
             let prologue = limits ? `/${limits} ` : "";
-            prologue += memo(functionName) + " ";
+            prologue += functionId(functionName) + " ";
             if (callee.kind == 'direct') {
                 const prev_limits = seen.has(callee.name) ? seen.get(callee.name) : LIMIT_UNVISITED;
                 if (prev_limits & ~limits) {
                     // Only output an edge if it loosens a limit.
                     seen.set(callee.name, prev_limits & limits);
-                    printOnce("D " + prologue + memo(callee.name));
+                    printOnce("D " + prologue + functionId(callee.name));
                 }
             } else if (callee.kind == 'field') {
                 var { csu, field, isVirtual } = callee;
                 const tag = isVirtual ? 'V' : 'F';
                 const fullfield = `${csu}.${field}`;
-                printOnce(`${tag} ${prologue}${memo(fullfield)} CLASS ${csu} FIELD ${field}`);
+                printOnce(`${tag} ${prologue}${getId(fullfield)} CLASS ${csu} FIELD ${field}`);
             } else if (callee.kind == 'resolved-field') {
                 // Fully-resolved field (virtual method) call. Record the
                 // callgraph edges. Do not consider limits, since they are
                 // local to this callsite and we are writing out a global
                 // record here.
                 //
                 // Any field call that does *not* have an R entry must be
                 // assumed to call anything.
                 var { csu, field, callees } = callee;
                 var fullFieldName = csu + "." + field;
                 if (!virtualResolutionsSeen.has(fullFieldName)) {
                     virtualResolutionsSeen.add(fullFieldName);
                     for (var target of callees)
-                        printOnce("R " + memo(fullFieldName) + " " + memo(target.name));
+                        printOnce("R " + getId(fullFieldName) + " " + functionId(target.name));
                 }
             } else if (callee.kind == 'indirect') {
                 printOnce("I " + prologue + "VARIABLE " + callee.variable);
             } else if (callee.kind == 'unknown') {
                 printOnce("I " + prologue + "VARIABLE UNKNOWN");
             } else {
                 printErr("invalid " + callee.kind + " callee");
                 debugger;
@@ -194,17 +214,17 @@ function process(functionName, functionB
     // will show up as called but only "foo *INTERNAL* " will be emitted in the
     // case where the constructors are identical.
     //
     // This is slightly conservative in the case where they are *not*
     // identical, but that should be rare enough that we don't care.
     var markerPos = functionName.indexOf(internalMarker);
     if (markerPos > 0) {
         var inChargeXTor = functionName.replace(internalMarker, "");
-        printOnce("D " + memo(inChargeXTor) + " " + memo(functionName));
+        printOnce("D " + functionId(inChargeXTor) + " " + functionId(functionName));
     }
 
     // Further note: from https://itanium-cxx-abi.github.io/cxx-abi/abi.html the
     // different kinds of constructors/destructors are:
     // C1	# complete object constructor
     // C2	# base object constructor
     // C3	# complete object allocating constructor
     // D0	# deleting destructor
@@ -240,30 +260,30 @@ function process(functionName, functionB
         // E terminates the method name (and precedes the method parameters).
         // If eg "C4E" shows up in the mangled name for another reason, this
         // will create bogus edges in the callgraph. But it will affect little
         // and is somewhat difficult to avoid, so we will live with it.
         //
         // Another possibility! A templatized constructor will contain C4I...E
         // for template arguments.
         //
-        for (let [synthetic, variant] of [
-            ['C4E', 'C1E'],
-            ['C4E', 'C2E'],
-            ['C4E', 'C3E'],
-            ['C4I', 'C1I'],
-            ['C4I', 'C2I'],
-            ['C4I', 'C3I']])
+        for (let [synthetic, variant, desc] of [
+            ['C4E', 'C1E', 'complete_ctor'],
+            ['C4E', 'C2E', 'base_ctor'],
+            ['C4E', 'C3E', 'complete_alloc_ctor'],
+            ['C4I', 'C1I', 'complete_ctor'],
+            ['C4I', 'C2I', 'base_ctor'],
+            ['C4I', 'C3I', 'complete_alloc_ctor']])
         {
             if (mangled.indexOf(synthetic) == -1)
                 continue;
 
             let variant_mangled = mangled.replace(synthetic, variant);
-            let variant_full = variant_mangled + "$" + unmangled;
-            printOnce("D " + memo(variant_full) + " " + memo(functionName));
+            let variant_full = `${variant_mangled}$${unmangled} [[${desc}]]`;
+            printOnce("D " + functionId(variant_full) + " " + functionId(functionName));
         }
     }
 
     // For destructors:
     //
     // I've never seen D4Ev() + D4Ev(int32), only one or the other. So
     // for a D4Ev of any sort, create:
     //
@@ -273,22 +293,22 @@ function process(functionName, functionB
     //                 # use whichever one is defined, in-charge or not.
     //                 # ('?') means either () or (int32).
     //
     // Note that this doesn't actually make sense -- D0 and D1 should be
     // in-charge, but gcc doesn't seem to give them the in-charge parameter?!
     //
     if (functionName.indexOf("D4Ev") != -1 && functionName.indexOf("::~") != -1) {
         const not_in_charge_dtor = functionName.replace("(int32)", "()");
-        const D0 = not_in_charge_dtor.replace("D4Ev", "D0Ev");
-        const D1 = not_in_charge_dtor.replace("D4Ev", "D1Ev");
-        const D2 = not_in_charge_dtor.replace("D4Ev", "D2Ev");
-        printOnce("D " + memo(D0) + " " + memo(D1));
-        printOnce("D " + memo(D1) + " " + memo(D2));
-        printOnce("D " + memo(D2) + " " + memo(functionName));
+        const D0 = not_in_charge_dtor.replace("D4Ev", "D0Ev") + " [[deleting_dtor]]";
+        const D1 = not_in_charge_dtor.replace("D4Ev", "D1Ev") + " [[complete_dtor]]";
+        const D2 = not_in_charge_dtor.replace("D4Ev", "D2Ev") + " [[base_dtor]]";
+        printOnce("D " + functionId(D0) + " " + functionId(D1));
+        printOnce("D " + functionId(D1) + " " + functionId(D2));
+        printOnce("D " + functionId(D2) + " " + functionId(functionName));
     }
 }
 
 for (var nameIndex = minStream; nameIndex <= maxStream; nameIndex++) {
     var name = xdb.read_key(nameIndex);
     var data = xdb.read_entry(name);
     process(name.readString(), JSON.parse(data.readString()));
     xdb.free_string(name);
--- a/js/src/devtools/rootAnalysis/computeGCFunctions.js
+++ b/js/src/devtools/rootAnalysis/computeGCFunctions.js
@@ -18,35 +18,39 @@ var gcEdges_filename = scriptArgs[3] || 
 var limitedFunctionsList_filename = scriptArgs[4] || "limitedFunctions.lst";
 
 loadCallgraph(callgraph_filename);
 
 printErr("Writing " + gcFunctions_filename);
 redirect(gcFunctions_filename);
 
 for (var name in gcFunctions) {
-    for (let readable of readableNames[name]) {
+    for (let readable of (readableNames[name] || [])) {
         print("");
         print("GC Function: " + name + "$" + readable);
         let current = name;
         do {
             current = gcFunctions[current];
             if (current in readableNames)
                 print("    " + readableNames[current][0]);
             else
                 print("    " + current);
         } while (current in gcFunctions);
     }
 }
 
 printErr("Writing " + gcFunctionsList_filename);
 redirect(gcFunctionsList_filename);
 for (var name in gcFunctions) {
-    for (var readable of readableNames[name])
-        print(name + "$" + readable);
+    if (name in readableNames) {
+        for (var readable of readableNames[name])
+            print(name + "$" + readable);
+    } else {
+        print(name);
+    }
 }
 
 // gcEdges is a list of edges that can GC for more specific reasons than just
 // calling a function that is in gcFunctions.txt.
 //
 // Right now, it is unused. It was meant for ~AutoRealm when it might
 // wrap an exception, but anything held live across ~AC will have to be held
 // live across the corresponding constructor (and hence the whole scope of the
--- a/js/src/devtools/rootAnalysis/loadCallgraph.js
+++ b/js/src/devtools/rootAnalysis/loadCallgraph.js
@@ -23,40 +23,35 @@ loadRelativeToScript('utility.js');
 //
 // Note that callgraph.txt uses a compressed representation -- each name is
 // mapped to an integer, and those integers are what is recorded in the edges.
 // But the integers depend on the full name, whereas the true edge should only
 // consider the mangled name. And some of the names encoded in callgraph.txt
 // are FieldCalls, not just function names.
 
 var readableNames = {}; // map from mangled name => list of readable names
-var mangledName = {}; // map from demangled names => mangled names. Could be eliminated.
 var calleesOf = {}; // map from mangled => list of tuples of {'callee':mangled, 'limits':intset}
 var callersOf; // map from mangled => list of tuples of {'caller':mangled, 'limits':intset}
 var gcFunctions = {}; // map from mangled callee => reason
 var limitedFunctions = {}; // set of mangled names (map from mangled name => limit intset)
 var gcEdges = {};
 
-// Map from identifier to mangled name (or to a Class.Field)
-var idToMangled = [""];
-
-// Map from identifier to full "mangled$readable" name. Or sometimes to a
-// Class.Field name.
+// "Map" from identifier to mangled name, or sometimes to a Class.Field name.
 var functionNames = [""];
 
 var mangledToId = {};
 
 // Returns whether the function was added. (It will be refused if it was
 // already there, or if limits or annotations say it shouldn't be added.)
 function addGCFunction(caller, reason, functionLimits)
 {
     if (functionLimits[caller] & LIMIT_CANNOT_GC)
         return false;
 
-    if (ignoreGCFunction(idToMangled[caller]))
+    if (ignoreGCFunction(functionNames[caller]))
         return false;
 
     if (!(caller in gcFunctions)) {
         gcFunctions[caller] = reason;
         return true;
     }
 
     return false;
@@ -118,28 +113,32 @@ function loadCallgraph(file)
 
     let numGCCalls = 0;
 
     for (let line of readFileLines_gen(file)) {
         line = line.replace(/\n/, "");
 
         let match;
         if (match = line.charAt(0) == "#" && /^\#(\d+) (.*)/.exec(line)) {
-            assert(functionNames.length == match[1]);
-            functionNames.push(match[2]);
-            const [ mangled, readable ] = splitFunction(match[2]);
+            const [ _, id, mangled ] = match;
+            assert(functionNames.length == id);
+            functionNames.push(mangled);
+            mangledToId[mangled] = id;
+            continue;
+        }
+        if (match = line.charAt(0) == "=" && /^= (\d+) (.*)/.exec(line)) {
+            const [ _, id, readable ] = match;
+            const mangled = functionNames[id];
             if (mangled in readableNames)
                 readableNames[mangled].push(readable);
             else
                 readableNames[mangled] = [ readable ];
-            mangledName[readable] = mangled;
-            mangledToId[mangled] = idToMangled.length;
-            idToMangled.push(mangled);
             continue;
         }
+
         let limits = 0;
         // Example line: D /17 6 7
         //
         // This means a direct call from 6 -> 7, but within a scope that
         // applies limits 0x1 and 0x10 to the callee.
         //
         // Look for a limit and remove it from the line if found.
         if (line.indexOf("/") != -1) {
@@ -183,20 +182,20 @@ function loadCallgraph(file)
             //         Subclass2::foo();
             //     }
             //
             addToKeyedList(calleesOf, callerField, {callee:callee, limits:0});
             // Mark that we resolved this virtual method, so that it isn't
             // assumed to call some random function that might do anything.
             resolvedFieldCalls.add(callerField);
         } else if (match = tag == 'T' && /^T (\d+) (.*)/.exec(line)) {
-            const mangled = match[1]|0;
+            const id = match[1]|0;
             let tag = match[2];
             if (tag == 'GC Call') {
-                addGCFunction(mangled, "GC", functionLimits);
+                addGCFunction(id, "GC", functionLimits);
                 numGCCalls++;
             }
         } else {
             assert(false, "Invalid format in callgraph line: " + line);
         }
     }
 
     // Callers have a list of callees, with duplicates (if the same function is
@@ -260,19 +259,19 @@ function loadCallgraph(file)
     var worklist = [];
     for (const name in gcFunctions)
         worklist.push(name);
 
     // Include all field calls and unresolved virtual method calls.
     for (const [name, csuName] of fieldCallCSU) {
         if (resolvedFieldCalls.has(name))
             continue; // Skip resolved virtual functions.
-        const fullFieldName = idToMangled[name];
+        const fullFieldName = functionNames[name];
         if (!fieldCallCannotGC(csuName, fullFieldName)) {
-            gcFunctions[name] = 'unresolved ' + idToMangled[name];
+            gcFunctions[name] = 'unresolved ' + fullFieldName;
             worklist.push(name);
         }
     }
 
     // Recursively find all callers not always called in a GC suppression
     // context, and add them to the set of gcFunctions.
     while (worklist.length) {
         name = worklist.shift();
@@ -286,17 +285,17 @@ function loadCallgraph(file)
             }
         }
     }
 
     // Convert functionLimits to limitedFunctions (using mangled names instead
     // of ids.)
 
     for (const [id, limits] of Object.entries(functionLimits))
-        limitedFunctions[idToMangled[id]] = limits;
+        limitedFunctions[functionNames[id]] = limits;
 
     // The above code uses integer ids for efficiency. External code uses
     // mangled names. Rewrite the various data structures to convert ids to
     // mangled names.
     remap_ids_to_mangled_names();
 }
 
 // Return a worklist of functions with no callers, and also initialize
@@ -398,27 +397,27 @@ function gather_recursive_roots(function
 
     return roots;
 }
 
 function remap_ids_to_mangled_names() {
     var tmp = gcFunctions;
     gcFunctions = {};
     for (const [caller, reason] of Object.entries(tmp))
-        gcFunctions[idToMangled[caller]] = idToMangled[reason] || reason;
+        gcFunctions[functionNames[caller]] = functionNames[reason] || reason;
 
     tmp = calleesOf;
     calleesOf = {};
     for (const [callerId, callees] of Object.entries(calleesOf)) {
-        const caller = idToMangled[callerId];
+        const caller = functionNames[callerId];
         for (const {calleeId, limits} of callees)
-            calleesOf[caller][idToMangled[calleeId]] = limits;
+            calleesOf[caller][functionNames[calleeId]] = limits;
     }
 
     tmp = callersOf;
     callersOf = {};
     for (const [calleeId, callers] of Object.entries(callersOf)) {
-        const callee = idToMangled[calleeId];
+        const callee = functionNames[calleeId];
         callersOf[callee] = {};
         for (const {callerId, limits} of callers)
-            callersOf[callee][idToMangled[caller]] = limits;
+            callersOf[callee][functionNames[caller]] = limits;
     }
 }
--- a/js/src/devtools/rootAnalysis/t/testlib.py
+++ b/js/src/devtools/rootAnalysis/t/testlib.py
@@ -13,16 +13,18 @@ HazardSummary = namedtuple('HazardSummar
     'variable',
     'type',
     'GCFunction',
     'location'])
 
 Callgraph = namedtuple('Callgraph', [
     'functionNames',
     'nameToId',
+    'mangledToUnmangled',
+    'unmangledToMangled',
     'calleesOf',
     'callersOf',
     'tags',
     'calleeGraph',
     'callerGraph'])
 
 
 def equal(got, expected):
@@ -119,41 +121,51 @@ sixgill_bin = '{bindir}'
 
     def load_gcFunctions(self):
         return self.load_text_file('gcFunctions.lst', extract=extract_unmangled)
 
     def load_callgraph(self):
         data = Callgraph(
             functionNames=['dummy'],
             nameToId={},
+            mangledToUnmangled={},
+            unmangledToMangled={},
             calleesOf=defaultdict(list),
             callersOf=defaultdict(list),
             tags=defaultdict(set),
             calleeGraph=defaultdict(dict),
             callerGraph=defaultdict(dict),
         )
 
         def lookup(id):
-            return data.functionNames[int(id)]
+            mangled = data.functionNames[int(id)]
+            return data.mangledToUnmangled.get(mangled, mangled)
 
         def add_call(caller, callee, limit):
             data.calleesOf[caller].append(callee)
             data.callersOf[callee].append(caller)
             data.calleeGraph[caller][callee] = True
             data.callerGraph[callee][caller] = True
 
         def process(line):
             if line.startswith('#'):
                 name = line.split(" ", 1)[1]
-                if '$' in name:
-                    name = name[name.index('$') + 1:]
                 data.nameToId[name] = len(data.functionNames)
                 data.functionNames.append(name)
                 return
 
+            if line.startswith('='):
+                m = re.match(r'^= (\d+) (.*)', line)
+                mangled = data.functionNames[int(m.group(1))]
+                unmangled = m.group(2)
+                data.nameToId[unmangled] = id
+                data.mangledToUnmangled[mangled] = unmangled
+                data.unmangledToMangled[unmangled] = mangled
+                return
+
             limit = 0
             m = re.match(r'^\w (?:/(\d+))? ', line)
             if m:
                 limit = int(m[1])
 
             tokens = line.split(' ')
             if tokens[0] in ('D', 'R'):
                 _, caller, callee = tokens