begone, old school loading
authorMike Shaver <shaver@mozilla.org>
Tue, 01 Feb 2011 16:24:23 +0000
changeset 31 ad67b2adaa87
parent 30 1d783f4418e5
child 32 33b9573c5757
push id24
push usershaver@mozilla.com
push date2011-02-01 16:25 +0000
begone, old school loading
load/HOW_TO_LOAD
load/get-components.js
load/index-json.js
load/load-bug-table.js
load/load-redis.js
load/make-wget.js
load/search-redis.js
deleted file mode 100644
--- a/load/HOW_TO_LOAD
+++ /dev/null
@@ -1,8 +0,0 @@
-for each product you want to index:
-- node get-components.js $PRODUCT > somefile.json
-  e.g. $ node get-components.js Thunderbird > Thunderbird-components.json
-- node make-wget.js somefile.json > wget.sh
-sh wget.sh
-node index-json.js summaries-*.json
-node load-redis.js bug-index.json
-for i in summaries-*.json; do node load-bug-table.js $i; done
deleted file mode 100644
--- a/load/get-components.js
+++ /dev/null
@@ -1,21 +0,0 @@
-var http = require('http');
-var util = require('util');
-
-var bzapi = http.createClient(443, 'api-dev.bugzilla.mozilla.org', true);
-
-var product = process.argv[2];
-
-var request = bzapi.request('GET', '/latest/count?product=' + product + '&status=UNCONFIRMED&status=NEW&status=ASSIGNED&status=REOPENED&x_axis_field=component', {Host: 'api-dev.bugzilla.mozilla.org'});
-    
-request.end();
-
-request.on('response', function(response) {
-	var data = '';
-	response.on('data', function (chunk) {
-		data += chunk;
-	    });
-	response.on('end', function() {
-		util.puts(data);
-	    });
-    });
-
deleted file mode 100644
--- a/load/index-json.js
+++ /dev/null
@@ -1,58 +0,0 @@
-var util = require("util");
-var fs = require("fs");
-var indexer = require("../lib/indexer");
-
-if (process.argv.length < 3)
-    throw new Error("need to specify at least one summary/id file on command line");
-
-var startTime = Date.now();
-
-var files = process.argv.slice(2);
-
-var invertedIndex = Object.create(null);
-
-var uniqueTerms = 0;
-var totalTerms = 0;
-
-function indexBug(bug)
-{
-    function insertIntoIndex(word)
-    {
-        if (!(word in invertedIndex)) {
-            invertedIndex[word] = [bug.id];
-            uniqueTerms++;
-        } else {
-            invertedIndex[word].push(bug.id); // redis will eat the duplicates
-        }
-    }
-    var keywords = indexer.keywordsForSummary(bug.summary);
-    keywords.forEach(insertIntoIndex);
-    totalTerms += keywords.length;
-}
-
-var totalBugs = 0;
-
-function processFile(fname) {
-    var data = JSON.parse(fs.readFileSync(fname));
-    console.log("Loaded " + data.bugs.length + " bugs from " + fname);
-
-    var bugs = data.bugs;
-    for (var i = 0; i < bugs.length; i++) {
-        indexBug(bugs[i]);
-    }
-    totalBugs += bugs.length;
-}
-
-files.forEach(processFile);
-
-function dumpInvertedIndex()
-{
-    fs.writeFileSync("bug-index.json", JSON.stringify(invertedIndex));
-}
-
-var duration = Date.now() - startTime;
-
-util.puts("indexed " + totalBugs + " bugs from " + files.length + " files in " + duration +
-          " ms (" + totalTerms + " terms, " + uniqueTerms + " unique)");
-
-dumpInvertedIndex();
deleted file mode 100644
--- a/load/load-bug-table.js
+++ /dev/null
@@ -1,24 +0,0 @@
-var redis = require("redis");
-var fs = require("fs");
-
-if (process.argv.length < 3)
-    throw new Error("need to specify at least one bug file on command line");
-
-var fname = process.argv[2];
-
-var data = JSON.parse(fs.readFileSync(fname));
-console.log("Loaded " + data.bugs.length + " bugs from " + fname);
-var bugs = data.bugs;
-
-client = redis.createClient();
-
-client.on("error", function (err) {
-	      console.log("Redis connection error to " + client.host + ":" + client.port + " - " + err);
-	      client.quit();
-});
-
-var multi = client.multi();
-for (var i = 0; i < bugs.length; i++) {
-    multi.set("bug:" + bugs[i].id, bugs[i].summary);
-}
-multi.exec(function (err, results) { client.quit(); });
deleted file mode 100644
--- a/load/load-redis.js
+++ /dev/null
@@ -1,59 +0,0 @@
-var redis = require("redis");
-var fs = require("fs");
-var sys = require("sys");
-
-if (process.argv.length < 3)
-    throw new Error("need to specify at least one JSON index file on command line");
-
-client = redis.createClient();
-
-client.on("error", function (err) {
-    console.log("Redis connection error to " + client.host + ":" + client.port + " - " + err);
-});
-
-var startTime = Date.now();
-var invertedIndex = JSON.parse(fs.readFileSync(process.argv[2]));
-var duration = Date.now() - startTime;
-sys.puts("Loaded in " + duration + "ms");
-
-const END_TOKEN = "   END   ";
-
-invertedIndex[END_TOKEN] = "$$";
-
-startTime = Date.now();
-
-function loadOneWord(err) {
-    if (err) {
-	sys.puts("ERROR: " + err);
-	return;
-    }
-
-    if (!(END_TOKEN in invertedIndex))
-	return done();
-    
-    for (var word in invertedIndex) {
-	if (word == END_TOKEN)
-	    return done();
-
-	var multi = client.multi();
-	var bugs = invertedIndex[word];
-	var wordKey = "word:" + word;
-	for (var i = 0; i < bugs.length; i++)
-	    multi.sadd(wordKey, bugs[i]);
-	multi.exec(loadOneWord);
-//	sys.puts(word + ": " + bugs.length);
-	
-	// the enumeration will start over at the next entry on the callback
-	delete invertedIndex[word];
-	break;
-    }
-}
-
-loadOneWord();
-
-
-function done() {
-    client.quit();
-    duration = Date.now() - startTime;
-    sys.puts("loaded index in " + duration + "ms");
-}
deleted file mode 100644
--- a/load/make-wget.js
+++ /dev/null
@@ -1,18 +0,0 @@
-var qs = require("querystring"), fs = require("fs");
-
-var product = process.argv[2];
-var components = JSON.parse(fs.readFileSync(process.argv[3])).x_labels;
-
-var STATUS_OPEN = ['NEW', 'ASSIGNED', 'UNCONFIRMED', 'REOPENED'];
-
-components.forEach(
-    function(component) {
-	var fname = "summaries-" + component.replace(RegExp("[ &:/()]", "g"), '_') + ".json";
-	var qsData = { status: STATUS_OPEN,
-		       include_fields: 'id,summary',
-		       component: component,
-		       product: product};
-	var uri = '/latest/bug?' + qs.stringify(qsData, '&', '=', false);
-	console.log('wget -nv -O ' + fname + " 'https://api-dev.bugzilla.mozilla.org" + uri + "'");
-    }
-);
deleted file mode 100644
--- a/load/search-redis.js
+++ /dev/null
@@ -1,27 +0,0 @@
-var redis = require("redis");
-var util = require("util");
-var indexer = require("../lib/indexer");
-
-var words = process.argv.slice(2);
-
-console.log("Searching for `" + words + "'");
-words = indexer.processKeywords(words);
-console.log("Processed to `" + words + "'");
-
-var client = redis.createClient();
-
-client.on("error", function (err) {
-    console.log("Redis connection error to " + client.host + ":" + client.port + " - " + err);
-});
-
-var startTime = Date.now();
-
-var results = client.sinter(words.map(function (v) { return "word:" + v; }),
-			    function (err, results) {
-				if (err) 
-    console.log("Redis connection error to " + client.host + ":" + client.port + " - " + err);
-				    
-				util.puts("Completed search in " + (Date.now() - startTime) + " ms");
-				util.puts(results);
-				client.quit();
-			    });