Bug 1479965 - Use mangled names everywhere, recording unmangled names for pattern matching and human-readable outputs, r=jonco
authorSteve Fink <sfink@mozilla.com>
Wed, 25 Jul 2018 16:38:22 -0700
changeset 489833 a188327354ab83702eb9c2fec439f3a7cc019883
parent 489832 d05f029a29ad2adc1d2d952d91f440a3f571e679
child 489834 c7b32ffa822e353b5479b6224194e3cdfd135e65
push id247
push userfmarier@mozilla.com
push dateSat, 27 Oct 2018 01:06:44 +0000
reviewersjonco
bugs1479965
milestone64.0a1
Bug 1479965 - Use mangled names everywhere, recording unmangled names for pattern matching and human-readable outputs, r=jonco
js/src/devtools/rootAnalysis/annotations.js
js/src/devtools/rootAnalysis/computeCallgraph.js
js/src/devtools/rootAnalysis/computeGCFunctions.js
js/src/devtools/rootAnalysis/loadCallgraph.js
js/src/devtools/rootAnalysis/t/testlib.py
--- a/js/src/devtools/rootAnalysis/annotations.js
+++ b/js/src/devtools/rootAnalysis/annotations.js
@@ -258,18 +258,21 @@ function isHeapSnapshotMockClass(name)
 
 function isGTest(name)
 {
     return name.match(/\btesting::/);
 }
 
 function ignoreGCFunction(mangled)
 {
-    assert(mangled in readableNames, mangled + " not in readableNames");
-    var fun = readableNames[mangled][0];
+    // Field calls will not be in readableNames
+    if (!(mangled in readableNames))
+        return false;
+
+    const fun = readableNames[mangled][0];
 
     if (fun in ignoreFunctions)
         return true;
 
     // The protobuf library, and [de]serialization code generated by the
     // protobuf compiler, uses a _ton_ of function pointers but they are all
     // internal. Easiest to just ignore that mess here.
     if (isProtobuf(fun))
@@ -390,16 +393,18 @@ function isOverridableField(initialCSU, 
     if (field == 'GetNativeContext')
         return false;
     if (field == "GetGlobalJSObject")
         return false;
     if (field == "GetIsMainThread")
         return false;
     if (field == "GetThreadFromPRThread")
         return false;
+    if (field == "DocAddSizeOfIncludingThis")
+        return false;
     if (field == "ConstructUbiNode")
         return false;
     if (initialCSU == 'nsIXPCScriptable' && field == "GetScriptableFlags")
         return false;
     if (initialCSU == 'nsIXPConnectJSObjectHolder' && field == 'GetJSObject')
         return false;
     if (initialCSU == 'nsIXPConnect' && field == 'GetSafeJSContext')
         return false;
--- a/js/src/devtools/rootAnalysis/computeCallgraph.js
+++ b/js/src/devtools/rootAnalysis/computeCallgraph.js
@@ -13,24 +13,44 @@ if (scriptArgs[0] == '--function') {
 var typeInfo_filename = scriptArgs[0] || "typeInfo.txt";
 var callgraphOut_filename = scriptArgs[1] || "callgraph.txt";
 
 var origOut = os.file.redirect(callgraphOut_filename);
 
 var memoized = new Map();
 var memoizedCount = 0;
 
-function memo(name)
+var unmangled2id = new Set();
+
+function getId(name)
 {
-    if (!memoized.has(name)) {
-        let id = memoized.size + 1;
-        memoized.set(name, "" + id);
-        print(`#${id} ${name}`);
-    }
-    return memoized.get(name);
+    let id = memoized.get(name);
+    if (id !== undefined)
+        return id;
+
+    id = memoized.size + 1;
+    memoized.set(name, id);
+    print(`#${id} ${name}`);
+
+    return id;
+}
+
+function functionId(name)
+{
+    const [mangled, unmangled] = splitFunction(name);
+    const id = getId(mangled);
+
+    // Only produce a mangled -> unmangled mapping once, unless there are
+    // multiple unmangled names for the same mangled name.
+    if (unmangled2id.has(unmangled))
+        return id;
+
+    print(`= ${id} ${unmangled}`);
+    unmangled2id.add(unmangled);
+    return id;
 }
 
 var lastline;
 function printOnce(line)
 {
     if (line != lastline) {
         print(line);
         lastline = line;
@@ -78,17 +98,17 @@ function getAnnotations(functionName, bo
 // recording them in callgraph.txt.
 function processBody(functionName, body)
 {
     if (!('PEdge' in body))
         return;
 
 
     for (var tag of getAnnotations(functionName, body).values())
-        print("T " + memo(functionName) + " " + tag);
+        print("T " + functionId(functionName) + " " + tag);
 
     // Set of all callees that have been output so far, in order to suppress
     // repeated callgraph edges from being recorded. This uses a Map from
     // callees to limit sets, because we don't want a limited edge to prevent
     // an unlimited edge from being recorded later. (So an edge will be skipped
     // if it exists and is at least as limited as the previously seen edge.)
     //
     // Limit sets are implemented as integers interpreted as bitfields.
@@ -106,43 +126,43 @@ function processBody(functionName, body)
         var edgeLimited = body.limits[edge.Index[0]] | 0;
 
         for (var callee of getCallees(edge)) {
             // Individual callees may have additional limits. The only such
             // limit currently is that nsISupports.{AddRef,Release} are assumed
             // to never GC.
             const limits = edgeLimited | callee.limits;
             let prologue = limits ? `/${limits} ` : "";
-            prologue += memo(functionName) + " ";
+            prologue += functionId(functionName) + " ";
             if (callee.kind == 'direct') {
                 const prev_limits = seen.has(callee.name) ? seen.get(callee.name) : LIMIT_UNVISITED;
                 if (prev_limits & ~limits) {
                     // Only output an edge if it loosens a limit.
                     seen.set(callee.name, prev_limits & limits);
-                    printOnce("D " + prologue + memo(callee.name));
+                    printOnce("D " + prologue + functionId(callee.name));
                 }
             } else if (callee.kind == 'field') {
                 var { csu, field, isVirtual } = callee;
                 const tag = isVirtual ? 'V' : 'F';
                 const fullfield = `${csu}.${field}`;
-                printOnce(`${tag} ${prologue}${memo(fullfield)} CLASS ${csu} FIELD ${field}`);
+                printOnce(`${tag} ${prologue}${getId(fullfield)} CLASS ${csu} FIELD ${field}`);
             } else if (callee.kind == 'resolved-field') {
                 // Fully-resolved field (virtual method) call. Record the
                 // callgraph edges. Do not consider limits, since they are
                 // local to this callsite and we are writing out a global
                 // record here.
                 //
                 // Any field call that does *not* have an R entry must be
                 // assumed to call anything.
                 var { csu, field, callees } = callee;
                 var fullFieldName = csu + "." + field;
                 if (!virtualResolutionsSeen.has(fullFieldName)) {
                     virtualResolutionsSeen.add(fullFieldName);
                     for (var target of callees)
-                        printOnce("R " + memo(fullFieldName) + " " + memo(target.name));
+                        printOnce("R " + getId(fullFieldName) + " " + functionId(target.name));
                 }
             } else if (callee.kind == 'indirect') {
                 printOnce("I " + prologue + "VARIABLE " + callee.variable);
             } else if (callee.kind == 'unknown') {
                 printOnce("I " + prologue + "VARIABLE UNKNOWN");
             } else {
                 printErr("invalid " + callee.kind + " callee");
                 debugger;
@@ -194,17 +214,17 @@ function process(functionName, functionB
     // will show up as called but only "foo *INTERNAL* " will be emitted in the
     // case where the constructors are identical.
     //
     // This is slightly conservative in the case where they are *not*
     // identical, but that should be rare enough that we don't care.
     var markerPos = functionName.indexOf(internalMarker);
     if (markerPos > 0) {
         var inChargeXTor = functionName.replace(internalMarker, "");
-        printOnce("D " + memo(inChargeXTor) + " " + memo(functionName));
+        printOnce("D " + functionId(inChargeXTor) + " " + functionId(functionName));
     }
 
     // Further note: from https://itanium-cxx-abi.github.io/cxx-abi/abi.html the
     // different kinds of constructors/destructors are:
     // C1	# complete object constructor
     // C2	# base object constructor
     // C3	# complete object allocating constructor
     // D0	# deleting destructor
@@ -240,30 +260,30 @@ function process(functionName, functionB
         // E terminates the method name (and precedes the method parameters).
         // If eg "C4E" shows up in the mangled name for another reason, this
         // will create bogus edges in the callgraph. But it will affect little
         // and is somewhat difficult to avoid, so we will live with it.
         //
         // Another possibility! A templatized constructor will contain C4I...E
         // for template arguments.
         //
-        for (let [synthetic, variant] of [
-            ['C4E', 'C1E'],
-            ['C4E', 'C2E'],
-            ['C4E', 'C3E'],
-            ['C4I', 'C1I'],
-            ['C4I', 'C2I'],
-            ['C4I', 'C3I']])
+        for (let [synthetic, variant, desc] of [
+            ['C4E', 'C1E', 'complete_ctor'],
+            ['C4E', 'C2E', 'base_ctor'],
+            ['C4E', 'C3E', 'complete_alloc_ctor'],
+            ['C4I', 'C1I', 'complete_ctor'],
+            ['C4I', 'C2I', 'base_ctor'],
+            ['C4I', 'C3I', 'complete_alloc_ctor']])
         {
             if (mangled.indexOf(synthetic) == -1)
                 continue;
 
             let variant_mangled = mangled.replace(synthetic, variant);
-            let variant_full = variant_mangled + "$" + unmangled;
-            printOnce("D " + memo(variant_full) + " " + memo(functionName));
+            let variant_full = `${variant_mangled}$${unmangled} [[${desc}]]`;
+            printOnce("D " + functionId(variant_full) + " " + functionId(functionName));
         }
     }
 
     // For destructors:
     //
     // I've never seen D4Ev() + D4Ev(int32), only one or the other. So
     // for a D4Ev of any sort, create:
     //
@@ -273,22 +293,22 @@ function process(functionName, functionB
     //                 # use whichever one is defined, in-charge or not.
     //                 # ('?') means either () or (int32).
     //
     // Note that this doesn't actually make sense -- D0 and D1 should be
     // in-charge, but gcc doesn't seem to give them the in-charge parameter?!
     //
     if (functionName.indexOf("D4Ev") != -1 && functionName.indexOf("::~") != -1) {
         const not_in_charge_dtor = functionName.replace("(int32)", "()");
-        const D0 = not_in_charge_dtor.replace("D4Ev", "D0Ev");
-        const D1 = not_in_charge_dtor.replace("D4Ev", "D1Ev");
-        const D2 = not_in_charge_dtor.replace("D4Ev", "D2Ev");
-        printOnce("D " + memo(D0) + " " + memo(D1));
-        printOnce("D " + memo(D1) + " " + memo(D2));
-        printOnce("D " + memo(D2) + " " + memo(functionName));
+        const D0 = not_in_charge_dtor.replace("D4Ev", "D0Ev") + " [[deleting_dtor]]";
+        const D1 = not_in_charge_dtor.replace("D4Ev", "D1Ev") + " [[complete_dtor]]";
+        const D2 = not_in_charge_dtor.replace("D4Ev", "D2Ev") + " [[base_dtor]]";
+        printOnce("D " + functionId(D0) + " " + functionId(D1));
+        printOnce("D " + functionId(D1) + " " + functionId(D2));
+        printOnce("D " + functionId(D2) + " " + functionId(functionName));
     }
 }
 
 for (var nameIndex = minStream; nameIndex <= maxStream; nameIndex++) {
     var name = xdb.read_key(nameIndex);
     var data = xdb.read_entry(name);
     process(name.readString(), JSON.parse(data.readString()));
     xdb.free_string(name);
--- a/js/src/devtools/rootAnalysis/computeGCFunctions.js
+++ b/js/src/devtools/rootAnalysis/computeGCFunctions.js
@@ -18,35 +18,39 @@ var gcEdges_filename = scriptArgs[3] || 
 var limitedFunctionsList_filename = scriptArgs[4] || "limitedFunctions.lst";
 
 loadCallgraph(callgraph_filename);
 
 printErr("Writing " + gcFunctions_filename);
 redirect(gcFunctions_filename);
 
 for (var name in gcFunctions) {
-    for (let readable of readableNames[name]) {
+    for (let readable of (readableNames[name] || [])) {
         print("");
         print("GC Function: " + name + "$" + readable);
         let current = name;
         do {
             current = gcFunctions[current];
             if (current in readableNames)
                 print("    " + readableNames[current][0]);
             else
                 print("    " + current);
         } while (current in gcFunctions);
     }
 }
 
 printErr("Writing " + gcFunctionsList_filename);
 redirect(gcFunctionsList_filename);
 for (var name in gcFunctions) {
-    for (var readable of readableNames[name])
-        print(name + "$" + readable);
+    if (name in readableNames) {
+        for (var readable of readableNames[name])
+            print(name + "$" + readable);
+    } else {
+        print(name);
+    }
 }
 
 // gcEdges is a list of edges that can GC for more specific reasons than just
 // calling a function that is in gcFunctions.txt.
 //
 // Right now, it is unused. It was meant for ~AutoRealm when it might
 // wrap an exception, but anything held live across ~AC will have to be held
 // live across the corresponding constructor (and hence the whole scope of the
--- a/js/src/devtools/rootAnalysis/loadCallgraph.js
+++ b/js/src/devtools/rootAnalysis/loadCallgraph.js
@@ -23,40 +23,35 @@ loadRelativeToScript('utility.js');
 //
 // Note that callgraph.txt uses a compressed representation -- each name is
 // mapped to an integer, and those integers are what is recorded in the edges.
 // But the integers depend on the full name, whereas the true edge should only
 // consider the mangled name. And some of the names encoded in callgraph.txt
 // are FieldCalls, not just function names.
 
 var readableNames = {}; // map from mangled name => list of readable names
-var mangledName = {}; // map from demangled names => mangled names. Could be eliminated.
 var calleesOf = {}; // map from mangled => list of tuples of {'callee':mangled, 'limits':intset}
 var callersOf; // map from mangled => list of tuples of {'caller':mangled, 'limits':intset}
 var gcFunctions = {}; // map from mangled callee => reason
 var limitedFunctions = {}; // set of mangled names (map from mangled name => limit intset)
 var gcEdges = {};
 
-// Map from identifier to mangled name (or to a Class.Field)
-var idToMangled = [""];
-
-// Map from identifier to full "mangled$readable" name. Or sometimes to a
-// Class.Field name.
+// "Map" from identifier to mangled name, or sometimes to a Class.Field name.
 var functionNames = [""];
 
 var mangledToId = {};
 
 // Returns whether the function was added. (It will be refused if it was
 // already there, or if limits or annotations say it shouldn't be added.)
 function addGCFunction(caller, reason, functionLimits)
 {
     if (functionLimits[caller] & LIMIT_CANNOT_GC)
         return false;
 
-    if (ignoreGCFunction(idToMangled[caller]))
+    if (ignoreGCFunction(functionNames[caller]))
         return false;
 
     if (!(caller in gcFunctions)) {
         gcFunctions[caller] = reason;
         return true;
     }
 
     return false;
@@ -118,28 +113,32 @@ function loadCallgraph(file)
 
     let numGCCalls = 0;
 
     for (let line of readFileLines_gen(file)) {
         line = line.replace(/\n/, "");
 
         let match;
         if (match = line.charAt(0) == "#" && /^\#(\d+) (.*)/.exec(line)) {
-            assert(functionNames.length == match[1]);
-            functionNames.push(match[2]);
-            const [ mangled, readable ] = splitFunction(match[2]);
+            const [ _, id, mangled ] = match;
+            assert(functionNames.length == id);
+            functionNames.push(mangled);
+            mangledToId[mangled] = id;
+            continue;
+        }
+        if (match = line.charAt(0) == "=" && /^= (\d+) (.*)/.exec(line)) {
+            const [ _, id, readable ] = match;
+            const mangled = functionNames[id];
             if (mangled in readableNames)
                 readableNames[mangled].push(readable);
             else
                 readableNames[mangled] = [ readable ];
-            mangledName[readable] = mangled;
-            mangledToId[mangled] = idToMangled.length;
-            idToMangled.push(mangled);
             continue;
         }
+
         let limits = 0;
         // Example line: D /17 6 7
         //
         // This means a direct call from 6 -> 7, but within a scope that
         // applies limits 0x1 and 0x10 to the callee.
         //
         // Look for a limit and remove it from the line if found.
         if (line.indexOf("/") != -1) {
@@ -183,20 +182,20 @@ function loadCallgraph(file)
             //         Subclass2::foo();
             //     }
             //
             addToKeyedList(calleesOf, callerField, {callee:callee, limits:0});
             // Mark that we resolved this virtual method, so that it isn't
             // assumed to call some random function that might do anything.
             resolvedFieldCalls.add(callerField);
         } else if (match = tag == 'T' && /^T (\d+) (.*)/.exec(line)) {
-            const mangled = match[1]|0;
+            const id = match[1]|0;
             let tag = match[2];
             if (tag == 'GC Call') {
-                addGCFunction(mangled, "GC", functionLimits);
+                addGCFunction(id, "GC", functionLimits);
                 numGCCalls++;
             }
         } else {
             assert(false, "Invalid format in callgraph line: " + line);
         }
     }
 
     // Callers have a list of callees, with duplicates (if the same function is
@@ -260,19 +259,19 @@ function loadCallgraph(file)
     var worklist = [];
     for (const name in gcFunctions)
         worklist.push(name);
 
     // Include all field calls and unresolved virtual method calls.
     for (const [name, csuName] of fieldCallCSU) {
         if (resolvedFieldCalls.has(name))
             continue; // Skip resolved virtual functions.
-        const fullFieldName = idToMangled[name];
+        const fullFieldName = functionNames[name];
         if (!fieldCallCannotGC(csuName, fullFieldName)) {
-            gcFunctions[name] = 'unresolved ' + idToMangled[name];
+            gcFunctions[name] = 'unresolved ' + fullFieldName;
             worklist.push(name);
         }
     }
 
     // Recursively find all callers not always called in a GC suppression
     // context, and add them to the set of gcFunctions.
     while (worklist.length) {
         name = worklist.shift();
@@ -286,17 +285,17 @@ function loadCallgraph(file)
             }
         }
     }
 
     // Convert functionLimits to limitedFunctions (using mangled names instead
     // of ids.)
 
     for (const [id, limits] of Object.entries(functionLimits))
-        limitedFunctions[idToMangled[id]] = limits;
+        limitedFunctions[functionNames[id]] = limits;
 
     // The above code uses integer ids for efficiency. External code uses
     // mangled names. Rewrite the various data structures to convert ids to
     // mangled names.
     remap_ids_to_mangled_names();
 }
 
 // Return a worklist of functions with no callers, and also initialize
@@ -398,27 +397,27 @@ function gather_recursive_roots(function
 
     return roots;
 }
 
 function remap_ids_to_mangled_names() {
     var tmp = gcFunctions;
     gcFunctions = {};
     for (const [caller, reason] of Object.entries(tmp))
-        gcFunctions[idToMangled[caller]] = idToMangled[reason] || reason;
+        gcFunctions[functionNames[caller]] = functionNames[reason] || reason;
 
     tmp = calleesOf;
     calleesOf = {};
     for (const [callerId, callees] of Object.entries(calleesOf)) {
-        const caller = idToMangled[callerId];
+        const caller = functionNames[callerId];
         for (const {calleeId, limits} of callees)
-            calleesOf[caller][idToMangled[calleeId]] = limits;
+            calleesOf[caller][functionNames[calleeId]] = limits;
     }
 
     tmp = callersOf;
     callersOf = {};
     for (const [calleeId, callers] of Object.entries(callersOf)) {
-        const callee = idToMangled[calleeId];
+        const callee = functionNames[calleeId];
         callersOf[callee] = {};
         for (const {callerId, limits} of callers)
-            callersOf[callee][idToMangled[caller]] = limits;
+            callersOf[callee][functionNames[caller]] = limits;
     }
 }
--- a/js/src/devtools/rootAnalysis/t/testlib.py
+++ b/js/src/devtools/rootAnalysis/t/testlib.py
@@ -13,16 +13,18 @@ HazardSummary = namedtuple('HazardSummar
     'variable',
     'type',
     'GCFunction',
     'location'])
 
 Callgraph = namedtuple('Callgraph', [
     'functionNames',
     'nameToId',
+    'mangledToUnmangled',
+    'unmangledToMangled',
     'calleesOf',
     'callersOf',
     'tags',
     'calleeGraph',
     'callerGraph'])
 
 
 def equal(got, expected):
@@ -119,41 +121,51 @@ sixgill_bin = '{bindir}'
 
     def load_gcFunctions(self):
         return self.load_text_file('gcFunctions.lst', extract=extract_unmangled)
 
     def load_callgraph(self):
         data = Callgraph(
             functionNames=['dummy'],
             nameToId={},
+            mangledToUnmangled={},
+            unmangledToMangled={},
             calleesOf=defaultdict(list),
             callersOf=defaultdict(list),
             tags=defaultdict(set),
             calleeGraph=defaultdict(dict),
             callerGraph=defaultdict(dict),
         )
 
         def lookup(id):
-            return data.functionNames[int(id)]
+            mangled = data.functionNames[int(id)]
+            return data.mangledToUnmangled.get(mangled, mangled)
 
         def add_call(caller, callee, limit):
             data.calleesOf[caller].append(callee)
             data.callersOf[callee].append(caller)
             data.calleeGraph[caller][callee] = True
             data.callerGraph[callee][caller] = True
 
         def process(line):
             if line.startswith('#'):
                 name = line.split(" ", 1)[1]
-                if '$' in name:
-                    name = name[name.index('$') + 1:]
                 data.nameToId[name] = len(data.functionNames)
                 data.functionNames.append(name)
                 return
 
+            if line.startswith('='):
+                m = re.match(r'^= (\d+) (.*)', line)
+                mangled = data.functionNames[int(m.group(1))]
+                unmangled = m.group(2)
+                data.nameToId[unmangled] = id
+                data.mangledToUnmangled[mangled] = unmangled
+                data.unmangledToMangled[unmangled] = mangled
+                return
+
             limit = 0
             m = re.match(r'^\w (?:/(\d+))? ', line)
             if m:
                 limit = int(m[1])
 
             tokens = line.split(' ')
             if tokens[0] in ('D', 'R'):
                 _, caller, callee = tokens