No bug: update readability libs to the up-to-date github versions to include significant perf and quality improvements, rs=me a=readinglist
authorGijs Kruitbosch <gijskruitbosch@gmail.com>
Fri, 20 Mar 2015 20:50:45 -0700
changeset 248458 4f9d6ba4a684ad19c207c3240740a4b0fde1fde3
parent 248457 79bc8606e3155585c3f61f1abe70180d48f90970
child 248459 54db0a4c777fc2e2f2d33229d2b2754be1dbfb29
push id7837
push userjwein@mozilla.com
push dateFri, 27 Mar 2015 00:27:16 +0000
treeherdermozilla-aurora@cb0db44ce60e [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersme, readinglist
milestone38.0a2
No bug: update readability libs to the up-to-date github versions to include significant perf and quality improvements, rs=me a=readinglist
toolkit/components/reader/JSDOMParser.js
toolkit/components/reader/Readability.js
--- a/toolkit/components/reader/JSDOMParser.js
+++ b/toolkit/components/reader/JSDOMParser.js
@@ -1,8 +1,15 @@
+/*
+ * DO NOT MODIFY THIS FILE DIRECTLY!
+ *
+ * This is a shared library that is maintained in an external repo:
+ * https://github.com/mozilla/readability
+ */
+
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this file,
  * You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 /**
  * This is a relatively lightweight DOMParser that is safe to use in a web
  * worker. This is far from a complete DOM implementation; however, it should
  * contain the minimal set of functionality necessary for Readability.js.
@@ -26,17 +33,17 @@
  */
 (function (global) {
 
   function error(m) {
     dump("JSDOMParser error: " + m + "\n");
   }
 
   // When a style is set in JS, map it to the corresponding CSS attribute
-  let styleMap = {
+  var styleMap = {
     "alignmentBaseline": "alignment-baseline",
     "background": "background",
     "backgroundAttachment": "background-attachment",
     "backgroundClip": "background-clip",
     "backgroundColor": "background-color",
     "backgroundImage": "background-image",
     "backgroundOrigin": "background-origin",
     "backgroundPosition": "background-position",
@@ -218,197 +225,309 @@
     "wordSpacing": "word-spacing",
     "wordWrap": "word-wrap",
     "writingMode": "writing-mode",
     "zIndex": "z-index",
     "zoom": "zoom",
   };
 
   // Elements that can be self-closing
-  let voidElems = {
+  var voidElems = {
     "area": true,
     "base": true,
     "br": true,
     "col": true,
     "command": true,
     "embed": true,
     "hr": true,
     "img": true,
     "input": true,
     "link": true,
     "meta": true,
     "param": true,
     "source": true,
   };
 
+  var whitespace = [" ", "\t", "\n", "\r"];
+
   // See http://www.w3schools.com/dom/dom_nodetype.asp
-  let nodeTypes = {
+  var nodeTypes = {
     ELEMENT_NODE: 1,
     ATTRIBUTE_NODE: 2,
     TEXT_NODE: 3,
     CDATA_SECTION_NODE: 4,
     ENTITY_REFERENCE_NODE: 5,
     ENTITY_NODE: 6,
     PROCESSING_INSTRUCTION_NODE: 7,
     COMMENT_NODE: 8,
     DOCUMENT_NODE: 9,
     DOCUMENT_TYPE_NODE: 10,
     DOCUMENT_FRAGMENT_NODE: 11,
     NOTATION_NODE: 12
   };
 
   function getElementsByTagName(tag) {
     tag = tag.toUpperCase();
-    let elems = [];
-    let allTags = (tag === "*");
+    var elems = [];
+    var allTags = (tag === "*");
     function getElems(node) {
-      let length = node.childNodes.length;
-      for (let i = 0; i < length; i++) {
-        let child = node.childNodes[i];
-        if (child.nodeType !== 1)
-          continue;
+      var length = node.children.length;
+      for (var i = 0; i < length; i++) {
+        var child = node.children[i];
         if (allTags || (child.tagName === tag))
           elems.push(child);
         getElems(child);
       }
     }
     getElems(this);
     return elems;
   }
 
-  let Node = function () {};
+  var Node = function () {};
 
   Node.prototype = {
     attributes: null,
     childNodes: null,
     localName: null,
     nodeName: null,
     parentNode: null,
     textContent: null,
+    nextSibling: null,
+    previousSibling: null,
 
     get firstChild() {
       return this.childNodes[0] || null;
     },
 
-    get nextSibling() {
-      if (this.parentNode) {
-        let childNodes = this.parentNode.childNodes;
-        return childNodes[childNodes.indexOf(this) + 1] || null;
-      }
+    get firstElementChild() {
+      return this.children[0] || null;
+    },
 
-      return null;
+    get lastChild() {
+      return this.childNodes[this.childNodes.length - 1] || null;
+    },
+
+    get lastElementChild() {
+      return this.children[this.children.length - 1] || null;
     },
 
     appendChild: function (child) {
       if (child.parentNode) {
         child.parentNode.removeChild(child);
       }
 
+      var last = this.lastChild;
+      if (last)
+        last.nextSibling = child;
+      child.previousSibling = last;
+
+      if (child.nodeType === Node.ELEMENT_NODE) {
+        child.previousElementSibling = this.children[this.children.length - 1] || null;
+        this.children.push(child);
+        child.previousElementSibling && (child.previousElementSibling.nextElementSibling = child);
+      }
       this.childNodes.push(child);
       child.parentNode = this;
     },
 
     removeChild: function (child) {
-      let childNodes = this.childNodes;
-      let childIndex = childNodes.indexOf(child);
+      var childNodes = this.childNodes;
+      var childIndex = childNodes.indexOf(child);
       if (childIndex === -1) {
         throw "removeChild: node not found";
       } else {
         child.parentNode = null;
+        var prev = child.previousSibling;
+        var next = child.nextSibling;
+        if (prev)
+          prev.nextSibling = next;
+        if (next)
+          next.previousSibling = prev;
+
+        if (child.nodeType === Node.ELEMENT_NODE) {
+          prev = child.previousElementSibling;
+          next = child.nextElementSibling;
+          if (prev)
+            prev.nextElementSibling = next;
+          if (next)
+            next.previousElementSibling = prev;
+          this.children.splice(this.children.indexOf(child), 1);
+        }
+
+        child.previousSibling = child.nextSibling = null;
+        child.previousElementSibling = child.nextElementSibling = null;
+
         return childNodes.splice(childIndex, 1)[0];
       }
     },
 
     replaceChild: function (newNode, oldNode) {
-      let childNodes = this.childNodes;
-      let childIndex = childNodes.indexOf(oldNode);
+      var childNodes = this.childNodes;
+      var childIndex = childNodes.indexOf(oldNode);
       if (childIndex === -1) {
         throw "replaceChild: node not found";
       } else {
+        // This will take care of updating the new node if it was somewhere else before:
         if (newNode.parentNode)
           newNode.parentNode.removeChild(newNode);
 
         childNodes[childIndex] = newNode;
+
+        // update the new node's sibling properties, and its new siblings' sibling properties
+        newNode.nextSibling = oldNode.nextSibling;
+        newNode.previousSibling = oldNode.previousSibling;
+        if (newNode.nextSibling)
+          newNode.nextSibling.previousSibling = newNode;
+        if (newNode.previousSibling)
+          newNode.previousSibling.nextSibling = newNode;
+
         newNode.parentNode = this;
+
+        // Now deal with elements before we clear out those values for the old node,
+        // because it can help us take shortcuts here:
+        if (newNode.nodeType === Node.ELEMENT_NODE) {
+          if (oldNode.nodeType === Node.ELEMENT_NODE) {
+            // Both were elements, which makes this easier, we just swap things out:
+            newNode.previousElementSibling = oldNode.previousElementSibling;
+            newNode.nextElementSibling = oldNode.nextElementSibling;
+            if (newNode.previousElementSibling)
+              newNode.previousElementSibling.nextElementSibling = newNode;
+            if (newNode.nextElementSibling)
+              newNode.nextElementSibling.previousElementSibling = newNode;
+            this.children[this.children.indexOf(oldNode)] = newNode;
+          } else {
+            // Hard way:
+            newNode.previousElementSibling = (function() {
+              for (var i = childIndex - 1; i >= 0; i--) {
+                if (childNodes[i].nodeType === Node.ELEMENT_NODE)
+                  return childNodes[i];
+              }
+              return null;
+            })();
+            if (newNode.previousElementSibling) {
+              newNode.nextElementSibling = newNode.previousElementSibling.nextElementSibling;
+            } else {
+              newNode.nextElementSibling = (function() {
+                for (var i = childIndex + 1; i < childNodes.length; i++) {
+                  if (childNodes[i].nodeType === Node.ELEMENT_NODE)
+                    return childNodes[i];
+                }
+                return null;
+              })();
+            }
+            if (newNode.previousElementSibling)
+              newNode.previousElementSibling.nextElementSibling = newNode;
+            if (newNode.nextElementSibling)
+              newNode.nextElementSibling.previousElementSibling = newNode;
+
+            if (newNode.nextElementSibling)
+              this.children.splice(this.children.indexOf(newNode.nextElementSibling), 0, newNode);
+            else
+              this.children.push(newNode);
+          }
+        } else {
+          // new node is not an element node.
+          // if the old one was, update its element siblings:
+          if (oldNode.nodeType === Node.ELEMENT_NODE) {
+            if (oldNode.previousElementSibling)
+              oldNode.previousElementSibling.nextElementSibling = oldNode.nextElementSibling;
+            if (oldNode.nextElementSibling)
+              oldNode.nextElementSibling.previousElementSibling = oldNode.previousElementSibling;
+            this.children.splice(this.children.indexOf(oldNode), 1);
+          }
+          // If the old node wasn't an element, neither the new nor the old node was an element,
+          // and the children array and its members shouldn't need any updating.
+        }
+
+
         oldNode.parentNode = null;
+        oldNode.previousSibling = null;
+        oldNode.nextSibling = null;
+        if (oldNode.nodeType === Node.ELEMENT_NODE) {
+          oldNode.previousElementSibling = null;
+          oldNode.nextElementSibling = null;
+        }
         return oldNode;
       }
     }
   };
 
-  for (let i in nodeTypes) {
+  for (var i in nodeTypes) {
     Node[i] = Node.prototype[i] = nodeTypes[i];
   }
 
-  let Attribute = function (name, value) {
+  var Attribute = function (name, value) {
     this.name = name;
     this.value = value;
   };
 
-  let Comment = function () {
+  var Comment = function () {
     this.childNodes = [];
   };
 
   Comment.prototype = {
     __proto__: Node.prototype,
 
     nodeName: "#comment",
     nodeType: Node.COMMENT_NODE
   };
 
-  let Text = function () {
+  var Text = function () {
     this.childNodes = [];
   };
 
   Text.prototype = {
     __proto__: Node.prototype,
 
     nodeName: "#text",
     nodeType: Node.TEXT_NODE,
     textContent: ""
   }
 
-  let Document = function () {
+  var Document = function () {
     this.styleSheets = [];
     this.childNodes = [];
+    this.children = [];
   };
 
   Document.prototype = {
     __proto__: Node.prototype,
 
     nodeName: "#document",
     nodeType: Node.DOCUMENT_NODE,
     title: "",
 
     getElementsByTagName: getElementsByTagName,
 
     getElementById: function (id) {
       function getElem(node) {
-        let length = node.childNodes.length;
+        var length = node.children.length;
         if (node.id === id)
           return node;
-        for (let i = 0; i < length; i++) {
-          let el = getElem(node.childNodes[i]);
+        for (var i = 0; i < length; i++) {
+          var el = getElem(node.children[i]);
           if (el)
             return el;
         }
         return null;
       }
       return getElem(this);
     },
 
     createElement: function (tag) {
-      let node = new Element(tag);
+      var node = new Element(tag);
       return node;
     }
   };
 
-  let Element = function (tag) {
+  var Element = function (tag) {
     this.attributes = [];
     this.childNodes = [];
+    this.children = [];
+    this.nextElementSibling = this.previousElementSibling = null;
     this.localName = tag.toLowerCase();
     this.tagName = tag.toUpperCase();
     this.style = new Style(this);
   };
 
   Element.prototype = {
     __proto__: Node.prototype,
 
@@ -449,26 +568,26 @@
     },
 
     get nodeName() {
       return this.tagName;
     },
 
     get innerHTML() {
       function getHTML(node) {
-        let i = 0;
+        var i = 0;
         for (i = 0; i < node.childNodes.length; i++) {
-          let child = node.childNodes[i];
+          var child = node.childNodes[i];
           if (child.localName) {
             arr.push("<" + child.localName);
 
             // serialize attribute list
-            for (let j = 0; j < child.attributes.length; j++) {
-              let attr = child.attributes[j];
-              let quote = (attr.value.indexOf('"') === -1 ? '"' : "'");
+            for (var j = 0; j < child.attributes.length; j++) {
+              var attr = child.attributes[j];
+              var quote = (attr.value.indexOf('"') === -1 ? '"' : "'");
               arr.push(" " + attr.name + '=' + quote + attr.value + quote);
             }
 
             if (child.localName in voidElems) {
               // if this is a self-closing element, end it here
               arr.push("/>");
             } else {
               // otherwise, add its children
@@ -479,155 +598,155 @@
           } else {
             arr.push(child.textContent);
           }
         }
       }
 
       // Using Array.join() avoids the overhead from lazy string concatenation.
       // See http://blog.cdleary.com/2012/01/string-representation-in-spidermonkey/#ropes
-      let arr = [];
+      var arr = [];
       getHTML(this);
       return arr.join("");
     },
 
     set innerHTML(html) {
-      let parser = new JSDOMParser();
-      let node = parser.parse(html);
-      for (let i = this.childNodes.length; --i >= 0;) {
+      var parser = new JSDOMParser();
+      var node = parser.parse(html);
+      for (var i = this.childNodes.length; --i >= 0;) {
         this.childNodes[i].parentNode = null;
       }
       this.childNodes = node.childNodes;
-      for (let i = this.childNodes.length; --i >= 0;) {
+      for (var i = this.childNodes.length; --i >= 0;) {
         this.childNodes[i].parentNode = this;
       }
     },
 
     set textContent(text) {
       // clear parentNodes for existing children
-      for (let i = this.childNodes.length; --i >= 0;) {
+      for (var i = this.childNodes.length; --i >= 0;) {
         this.childNodes[i].parentNode = null;
       }
 
-      let node = new Text();
+      var node = new Text();
       this.childNodes = [ node ];
       node.textContent = text;
       node.parentNode = this;
     },
 
     get textContent() {
       function getText(node) {
-        let nodes = node.childNodes;
-        for (let i = 0; i < nodes.length; i++) {
-          let child = nodes[i];
+        var nodes = node.childNodes;
+        for (var i = 0; i < nodes.length; i++) {
+          var child = nodes[i];
           if (child.nodeType === 3) {
             text.push(child.textContent);
           } else {
             getText(child);
           }
         }
       }
 
       // Using Array.join() avoids the overhead from lazy string concatenation.
       // See http://blog.cdleary.com/2012/01/string-representation-in-spidermonkey/#ropes
-      let text = [];
+      var text = [];
       getText(this);
       return text.join("");
     },
 
     getAttribute: function (name) {
-      for (let i = this.attributes.length; --i >= 0;) {
-        let attr = this.attributes[i];
+      for (var i = this.attributes.length; --i >= 0;) {
+        var attr = this.attributes[i];
         if (attr.name === name)
           return attr.value;
       }
       return undefined;
     },
 
     setAttribute: function (name, value) {
-      for (let i = this.attributes.length; --i >= 0;) {
-        let attr = this.attributes[i];
+      for (var i = this.attributes.length; --i >= 0;) {
+        var attr = this.attributes[i];
         if (attr.name === name) {
           attr.value = value;
           return;
         }
       }
       this.attributes.push(new Attribute(name, value));
     },
 
     removeAttribute: function (name) {
-      for (let i = this.attributes.length; --i >= 0;) {
-        let attr = this.attributes[i];
+      for (var i = this.attributes.length; --i >= 0;) {
+        var attr = this.attributes[i];
         if (attr.name === name) {
           this.attributes.splice(i, 1);
           break;
         }
       }
     }
   };
 
-  let Style = function (node) {
+  var Style = function (node) {
     this.node = node;
   };
 
   // getStyle() and setStyle() use the style attribute string directly. This
   // won't be very efficient if there are a lot of style manipulations, but
   // it's the easiest way to make sure the style attribute string and the JS
   // style property stay in sync. Readability.js doesn't do many style
   // manipulations, so this should be okay.
   Style.prototype = {
     getStyle: function (styleName) {
-      let attr = this.node.getAttribute("style");
+      var attr = this.node.getAttribute("style");
       if (!attr)
         return undefined;
 
-      let styles = attr.split(";");
-      for (let i = 0; i < styles.length; i++) {
-        let style = styles[i].split(":");
-        let name = style[0].trim();
+      var styles = attr.split(";");
+      for (var i = 0; i < styles.length; i++) {
+        var style = styles[i].split(":");
+        var name = style[0].trim();
         if (name === styleName)
           return style[1].trim();
       }
 
       return undefined;
     },
 
     setStyle: function (styleName, styleValue) {
-      let value = this.node.getAttribute("style") || "";
-      let index = 0;
+      var value = this.node.getAttribute("style") || "";
+      var index = 0;
       do {
-        let next = value.indexOf(";", index) + 1;
-        let length = next - index - 1;
-        let style = (length > 0 ? value.substr(index, length) : value.substr(index));
+        var next = value.indexOf(";", index) + 1;
+        var length = next - index - 1;
+        var style = (length > 0 ? value.substr(index, length) : value.substr(index));
         if (style.substr(0, style.indexOf(":")).trim() === styleName) {
           value = value.substr(0, index).trim() + (next ? " " + value.substr(next).trim() : "");
           break;
         }
         index = next;
       } while (index);
 
       value += " " + styleName + ": " + styleValue + ";";
       this.node.setAttribute("style", value.trim());
     }
   };
 
   // For each item in styleMap, define a getter and setter on the style
   // property.
-  for (let jsName in styleMap) {
+  for (var jsName in styleMap) {
     (function (cssName) {
       Style.prototype.__defineGetter__(jsName, function () {
         return this.getStyle(cssName);
       });
       Style.prototype.__defineSetter__(jsName, function (value) {
         this.setStyle(cssName, value);
       });
     }) (styleMap[jsName]);
   }
 
-  let JSDOMParser = function () {
+  var JSDOMParser = function () {
     this.currentChar = 0;
 
     // In makeElementNode() we build up many strings one char at a time. Using
     // += for this results in lots of short-lived intermediate strings. It's
     // better to build an array of single-char strings and then join() them
     // together at the end. And reusing a single array (i.e. |this.strBuf|)
     // over and over for this purpose uses less memory than using a new array
     // for each string.
@@ -654,57 +773,57 @@
       return this.html[this.currentChar++];
     },
 
     /**
      * Called after a quote character is read. This finds the next quote
      * character and returns the text string in between.
      */
     readString: function (quote) {
-      let str;
-      let n = this.html.indexOf(quote, this.currentChar);
+      var str;
+      var n = this.html.indexOf(quote, this.currentChar);
       if (n === -1) {
         this.currentChar = this.html.length;
         str = null;
       } else {
         str = this.html.substring(this.currentChar, n);
         this.currentChar = n + 1;
       }
 
       return str;
     },
 
     /**
      * Called when parsing a node. This finds the next name/value attribute
      * pair and adds the result to the attributes list.
      */
     readAttribute: function (node) {
-      let name = "";
+      var name = "";
 
-      let n = this.html.indexOf("=", this.currentChar);
+      var n = this.html.indexOf("=", this.currentChar);
       if (n === -1) {
         this.currentChar = this.html.length;
       } else {
         // Read until a '=' character is hit; this will be the attribute key
         name = this.html.substring(this.currentChar, n);
         this.currentChar = n + 1;
       }
 
       if (!name)
         return;
 
       // After a '=', we should see a '"' for the attribute value
-      let c = this.nextChar();
+      var c = this.nextChar();
       if (c !== '"' && c !== "'") {
-        error("expecting '\"'");
+        error("Error reading attribute " + name + ", expecting '\"'");
         return;
       }
 
       // Read the attribute value (and consume the matching quote)
-      let value = this.readString(c);
+      var value = this.readString(c);
 
       if (!value)
         return;
 
       node.attributes.push(new Attribute(name, value));
 
       return;
     },
@@ -713,119 +832,131 @@
      * Parses and returns an Element node. This is called after a '<' has been
      * read.
      *
      * @returns an array; the first index of the array is the parsed node;
      *          the second index is a boolean indicating whether this is a void
      *          Element
      */
     makeElementNode: function (retPair) {
-      let c = this.nextChar();
+      var c = this.nextChar();
 
       // Read the Element tag name
-      let strBuf = this.strBuf;
+      var strBuf = this.strBuf;
       strBuf.length = 0;
-      while (c !== " " && c !== ">" && c !== "/") {
+      while (whitespace.indexOf(c) == -1 && c !== ">" && c !== "/") {
         if (c === undefined)
           return false;
         strBuf.push(c);
         c = this.nextChar();
       }
-      let tag = strBuf.join('');
+      var tag = strBuf.join('');
 
       if (!tag)
         return false;
 
-      let node = new Element(tag);
+      var node = new Element(tag);
 
       // Read Element attributes
       while (c !== "/" && c !== ">") {
         if (c === undefined)
           return false;
-        while (this.match(" "));
+        while (whitespace.indexOf(this.html[this.currentChar++]) != -1);
+        this.currentChar--;
         c = this.nextChar();
         if (c !== "/" && c !== ">") {
           --this.currentChar;
           this.readAttribute(node);
         }
       }
 
       // If this is a self-closing tag, read '/>'
-      let closed = tag in voidElems;
+      var closed = tag in voidElems;
       if (c === "/") {
         closed = true;
         c = this.nextChar();
         if (c !== ">") {
-          error("expected '>'");
+          error("expected '>' to close " + tag);
           return false;
         }
       }
 
       retPair[0] = node;
       retPair[1] = closed;
       return true
     },
 
     /**
      * If the current input matches this string, advance the input index;
      * otherwise, do nothing.
      *
      * @returns whether input matched string
      */
     match: function (str) {
-      let strlen = str.length;
+      var strlen = str.length;
       if (this.html.substr(this.currentChar, strlen) === str) {
         this.currentChar += strlen;
         return true;
       }
       return false;
     },
 
     /**
      * Searches the input until a string is found and discards all input up to
      * and including the matched string.
      */
     discardTo: function (str) {
-      let index = this.html.indexOf(str, this.currentChar) + str.length;
+      var index = this.html.indexOf(str, this.currentChar) + str.length;
       if (index === -1)
         this.currentChar = this.html.length;
       this.currentChar = index;
     },
 
     /**
      * Reads child nodes for the given node.
      */
     readChildren: function (node) {
-      let child;
+      var child;
       while ((child = this.readNode())) {
         // Don't keep Comment nodes
         if (child.nodeType !== 8) {
-          node.childNodes.push(child);
-          child.parentNode = node;
+          node.appendChild(child);
         }
       }
     },
 
+    readScript: function (node) {
+      var index = this.html.indexOf("</script>", this.currentChar);
+      if (index === -1) {
+        index = this.html.length;
+      }
+      var txt = new Text();
+      txt.textContent = this.html.substring(this.currentChar, index === -1 ? this.html.length : index);
+      node.appendChild(txt);
+      this.currentChar = index;
+    },
+
+
     /**
      * Reads the next child node from the input. If we're reading a closing
      * tag, or if we've reached the end of input, return null.
      *
      * @returns the node
      */
     readNode: function () {
-      let c = this.nextChar();
+      var c = this.nextChar();
  
       if (c === undefined)
         return null;
 
       // Read any text as Text node
       if (c !== "<") {
         --this.currentChar;
-        let node = new Text();
-        let n = this.html.indexOf("<", this.currentChar);
+        var node = new Text();
+        var n = this.html.indexOf("<", this.currentChar);
         if (n === -1) {
           node.textContent = this.html.substring(this.currentChar, this.html.length);
           this.currentChar = this.html.length;
         } else {
           node.textContent = this.html.substring(this.currentChar, n);
           this.currentChar = n;
         }
         return node;
@@ -837,17 +968,17 @@
       // textContent, but we don't really care about Comment nodes (we throw
       // them away in readChildren()). So just returning an empty Comment node
       // here is sufficient.
       if (c === "!" || c === "?") {
         this.currentChar++;
         if (this.match("--")) {
           this.discardTo("-->");
         } else {
-          let c = this.nextChar();
+          var c = this.nextChar();
           while (c !== ">") {
             if (c === undefined)
               return null;
             if (c === '"' || c === "'")
               this.readString(c);
             c = this.nextChar();
           }
         }
@@ -857,35 +988,42 @@
       // If we're reading a closing tag, return null. This means we've reached
       // the end of this set of child nodes.
       if (c === "/") {
         --this.currentChar;
         return null;
       }
 
       // Otherwise, we're looking at an Element node
-      let result = this.makeElementNode(this.retPair);
+      var result = this.makeElementNode(this.retPair);
       if (!result)
         return null;
 
-      let node = this.retPair[0];
-      let closed = this.retPair[1];
-      let localName = node.localName;
+      var node = this.retPair[0];
+      var closed = this.retPair[1];
+      var localName = node.localName;
 
       // If this isn't a void Element, read its child nodes
       if (!closed) {
-        this.readChildren(node);
-        let closingTag = "</" + localName + ">";
+        if (localName == "script") {
+          this.readScript(node);
+        } else {
+          this.readChildren(node);
+        }
+        var closingTag = "</" + localName + ">";
         if (!this.match(closingTag)) {
           error("expected '" + closingTag + "'");
           return null;
         }
       }
 
-      if (localName === "title") {
+      // Only use the first title, because SVG might have other
+      // title elements which we don't care about (medium.com
+      // does this, at least).
+      if (localName === "title" && !this.doc.title) {
         this.doc.title = node.textContent.trim();
       } else if (localName === "head") {
         this.doc.head = node;
       } else if (localName === "body") {
         this.doc.body = node;
       } else if (localName === "html") {
         this.doc.documentElement = node;
       }
@@ -893,24 +1031,24 @@
       return node;
     },
 
     /**
      * Parses an HTML string and returns a JS implementation of the Document.
      */
     parse: function (html) {
       this.html = html;
-      let doc = this.doc = new Document();
+      var doc = this.doc = new Document();
       this.readChildren(doc);
 
       // If this is an HTML document, remove root-level children except for the
       // <html> node
       if (doc.documentElement) {
-        for (let i = doc.childNodes.length; --i >= 0;) {
-          let child = doc.childNodes[i];
+        for (var i = doc.childNodes.length; --i >= 0;) {
+          var child = doc.childNodes[i];
           if (child !== doc.documentElement) {
             doc.removeChild(child);
           }
         }
       }
 
       return doc;
     }
--- a/toolkit/components/reader/Readability.js
+++ b/toolkit/components/reader/Readability.js
@@ -97,26 +97,28 @@ Readability.prototype = {
   REGEXPS: {
     unlikelyCandidates: /combx|comment|community|disqus|extra|foot|header|menu|remark|rss|shoutbox|sidebar|sponsor|ad-break|agegate|pagination|pager|popup|tweet|twitter/i,
     okMaybeItsACandidate: /and|article|body|column|main|shadow/i,
     positive: /article|body|content|entry|hentry|main|page|pagination|post|text|blog|story/i,
     negative: /hidden|combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|outbrain|promo|related|scroll|shoutbox|sidebar|sponsor|shopping|tags|tool|widget/i,
     extraneous: /print|archive|comment|discuss|e[\-]?mail|share|reply|all|login|sign|single|utility/i,
     byline: /byline|author|dateline|writtenby/i,
     replaceFonts: /<(\/?)font[^>]*>/gi,
-    trim: /^\s+|\s+$/g,
     normalize: /\s{2,}/g,
-    videos: /http:\/\/(www\.)?(youtube|vimeo)\.com/i,
+    videos: /https?:\/\/(www\.)?(youtube|vimeo)\.com/i,
     nextLink: /(next|weiter|continue|>([^\|]|$)|»([^\|]|$))/i,
     prevLink: /(prev|earl|old|new|<|«)/i,
-    whitespace: /^\s*$/
+    whitespace: /^\s*$/,
+    hasContent: /\S$/,
   },
 
   DIV_TO_P_ELEMS: [ "A", "BLOCKQUOTE", "DL", "DIV", "IMG", "OL", "P", "PRE", "TABLE", "UL", "SELECT" ],
 
+  ALTER_TO_DIV_EXCEPTIONS: ["DIV", "ARTICLE", "SECTION", "P"],
+
   /**
    * Run any post-process modifications to article content as necessary.
    *
    * @param Element
    * @return void
   **/
   _postProcessContent: function(articleContent) {
     // Readability cannot open relative uris so we convert them to absolute uris.
@@ -199,17 +201,17 @@ Readability.prototype = {
         curTitle = origTitle.replace(/[^:]*[:](.*)/gi,'$1');
     } else if (curTitle.length > 150 || curTitle.length < 15) {
       var hOnes = doc.getElementsByTagName('h1');
 
       if (hOnes.length === 1)
         curTitle = this._getInnerText(hOnes[0]);
     }
 
-    curTitle = curTitle.replace(this.REGEXPS.trim, "");
+    curTitle = curTitle.trim();
 
     if (curTitle.split(' ').length <= 4)
       curTitle = origTitle;
 
     return curTitle;
   },
 
   /**
@@ -218,18 +220,18 @@ Readability.prototype = {
    *
    * @return void
    **/
   _prepDocument: function() {
     var doc = this._doc;
 
     // Remove all style tags in head
     var styleTags = doc.getElementsByTagName("style");
-    for (var st = 0; st < styleTags.length; st += 1) {
-      styleTags[st].textContent = "";
+    for (var st = styleTags.length - 1; st >= 0; st -= 1) {
+      styleTags[st].parentNode.removeChild(styleTags[st]);
     }
 
     if (doc.body) {
       this._replaceBrs(doc.body);
     }
 
     var fonts = doc.getElementsByTagName("FONT");
     for (var i = fonts.length; --i >=0;) {
@@ -300,16 +302,18 @@ Readability.prototype = {
           p.appendChild(next);
           next = sibling;
         }
       }
     }
   },
 
   _setNodeTag: function (node, tag) {
+    // FIXME this doesn't work on anything but JSDOMParser (ie the node's tag
+    // won't actually be set).
     node.localName = tag.toLowerCase();
     node.tagName = tag.toUpperCase();
   },
 
   /**
    * Prepare the article node for display. Clean out any inline styles,
    * iframes, forms, strip extraneous <p> tags, etc.
    *
@@ -402,16 +406,64 @@ Readability.prototype = {
       case 'TH':
         node.readability.contentScore -= 5;
         break;
     }
 
     node.readability.contentScore += this._getClassWeight(node);
   },
 
+  _removeAndGetNext: function(node) {
+    var nextNode = this._getNextNode(node, true);
+    node.parentNode.removeChild(node);
+    return nextNode;
+  },
+
+  /**
+   * Traverse the DOM from node to node, starting at the node passed in.
+   * Pass true for the second parameter to indicate this node itself
+   * (and its kids) are going away, and we want the next node over.
+   *
+   * Calling this in a loop will traverse the DOM depth-first.
+   */
+  _getNextNode: function(node, ignoreSelfAndKids) {
+    // First check for kids if those aren't being ignored
+    if (!ignoreSelfAndKids && node.firstElementChild) {
+      return node.firstElementChild;
+    }
+    // Then for siblings...
+    if (node.nextElementSibling) {
+      return node.nextElementSibling;
+    }
+    // And finally, move up the parent chain *and* find a sibling
+    // (because this is depth-first traversal, we will have already
+    // seen the parent nodes themselves).
+    do {
+      node = node.parentNode;
+    } while (node && !node.nextElementSibling);
+    return node && node.nextElementSibling;
+  },
+
+  _checkByline: function(node, matchString) {
+    if (this._articleByline) {
+      return false;
+    }
+
+    if (node.getAttribute !== undefined) {
+      var rel = node.getAttribute("rel");
+    }
+
+    if ((rel === "author" || this.REGEXPS.byline.test(matchString)) && this._isValidByline(node.textContent)) {
+      this._articleByline = node.textContent.trim();
+      return true;
+    }
+
+    return false;
+  },
+
   /***
    * grabArticle - Using a variety of metrics (content score, classname, element types), find the content that is
    *         most likely to be the stuff a user wants to read. Then return it wrapped up in a div.
    *
    * @param page a document to run upon. Needs to be a full document, complete with body.
    * @return Element
   **/
   _grabArticle: function (page) {
@@ -425,123 +477,89 @@ Readability.prototype = {
       return null;
     }
 
     var pageCacheHtml = page.innerHTML;
 
     // Check if any "dir" is set on the toplevel document element
     this._articleDir = doc.documentElement.getAttribute("dir");
 
-    //helper function used below in the 'while' loop:
-    function purgeNode(node, allElements) {
-      for (var i = node.childNodes.length; --i >= 0;) {
-        purgeNode(node.childNodes[i], allElements);
-      }
-      if (node._index !== undefined && allElements[node._index] == node)
-        delete allElements[node._index];
-    }
     while (true) {
       var stripUnlikelyCandidates = this._flagIsActive(this.FLAG_STRIP_UNLIKELYS);
-      var allElements = page.getElementsByTagName('*');
 
       // First, node prepping. Trash nodes that look cruddy (like ones with the
       // class name "comment", etc), and turn divs into P tags where they have been
       // used inappropriately (as in, where they contain no other block level elements.)
-      //
-      // Note: Assignment from index for performance. See http://www.peachpit.com/articles/article.aspx?p=31567&seqNum=5
-      // TODO: Shouldn't this be a reverse traversal?
-      var node = null;
-      var nodesToScore = [];
-
-      // var each node know its index in the allElements array.
-      for (var i = allElements.length; --i >= 0;) {
-        allElements[i]._index = i;
-      }
+      var elementsToScore = [];
+      var node = this._doc.documentElement;
 
-      /**
-       * JSDOMParser returns static node lists, not live ones. When we remove
-       * an element from the document, we need to manually remove it - and all
-       * of its children - from the allElements array.
-       */
-      for (var nodeIndex = 0; nodeIndex < allElements.length; nodeIndex++) {
-        if (!(node = allElements[nodeIndex]))
-          continue;
+      while (node) {
+        var matchString = node.className + " " + node.id;
 
-        var matchString = node.className + node.id;
-        if (matchString.search(this.REGEXPS.byline) !== -1 && !this._articleByline) {
-          if (this._isValidByline(node.textContent)) {
-            this._articleByline = node.textContent.trim();
-            node.parentNode.removeChild(node);
-            purgeNode(node, allElements);
-            continue;
-          }
+        // Check to see if this node is a byline, and remove it if it is.
+        if (this._checkByline(node, matchString)) {
+          node = this._removeAndGetNext(node);
+          continue;
         }
 
         // Remove unlikely candidates
         if (stripUnlikelyCandidates) {
-          if (matchString.search(this.REGEXPS.unlikelyCandidates) !== -1 &&
-            matchString.search(this.REGEXPS.okMaybeItsACandidate) === -1 &&
-            node.tagName !== "BODY") {
+          if (this.REGEXPS.unlikelyCandidates.test(matchString) &&
+              !this.REGEXPS.okMaybeItsACandidate.test(matchString) &&
+              node.tagName !== "BODY") {
             this.log("Removing unlikely candidate - " + matchString);
-            node.parentNode.removeChild(node);
-            purgeNode(node, allElements);
+            node = this._removeAndGetNext(node);
             continue;
           }
         }
 
         if (node.tagName === "P" || node.tagName === "TD" || node.tagName === "PRE")
-          nodesToScore[nodesToScore.length] = node;
+          elementsToScore.push(node);
 
         // Turn all divs that don't have children block level elements into p's
         if (node.tagName === "DIV") {
           // Sites like http://mobile.slate.com encloses each paragraph with a DIV
           // element. DIVs with only a P element inside and no text content can be
           // safely converted into plain P elements to avoid confusing the scoring
           // algorithm with DIVs with are, in practice, paragraphs.
-          var pIndex = this._getSinglePIndexInsideDiv(node);
-
-          if (pIndex >= 0 || !this._hasChildBlockElement(node)) {
-            if (pIndex >= 0) {
-              var newNode = node.childNodes[pIndex];
-              node.parentNode.replaceChild(newNode, node);
-              purgeNode(node, allElements);
-            } else {
-              this._setNodeTag(node, "P");
-              nodesToScore[nodesToScore.length] = node;
-            }
+          if (this._hasSinglePInsideElement(node)) {
+            var newNode = node.firstElementChild;
+            node.parentNode.replaceChild(newNode, node);
+            node = newNode;
+          } else if (!this._hasChildBlockElement(node)) {
+            this._setNodeTag(node, "P");
+            elementsToScore.push(node);
           } else {
             // EXPERIMENTAL
             for (var i = 0, il = node.childNodes.length; i < il; i += 1) {
               var childNode = node.childNodes[i];
-              if (!childNode)
-                continue;
-
-              if (childNode.nodeType === 3) { // Node.TEXT_NODE
+              if (childNode.nodeType === Node.TEXT_NODE) {
                 var p = doc.createElement('p');
                 p.textContent = childNode.textContent;
                 p.style.display = 'inline';
                 p.className = 'readability-styled';
-                childNode.parentNode.replaceChild(p, childNode);
+                node.replaceChild(p, childNode);
               }
             }
           }
         }
+        node = this._getNextNode(node);
       }
 
       /**
        * Loop through all paragraphs, and assign a score to them based on how content-y they look.
        * Then add their score to their parent node.
        *
        * A score is determined by things like number of commas, class names, etc. Maybe eventually link density.
       **/
       var candidates = [];
-      for (var pt = 0; pt < nodesToScore.length; pt += 1) {
-        var parentNode = nodesToScore[pt].parentNode;
+      for (var pt = 0; pt < elementsToScore.length; pt += 1) {
+        var parentNode = elementsToScore[pt].parentNode;
         var grandParentNode = parentNode ? parentNode.parentNode : null;
-        var innerText = this._getInnerText(nodesToScore[pt]);
+        var innerText = this._getInnerText(elementsToScore[pt]);
 
         if (!parentNode || typeof(parentNode.tagName) === 'undefined')
           continue;
 
         // If this paragraph is less than 25 characters, don't even count it.
         if (innerText.length < 25)
           continue;
 
@@ -607,101 +625,125 @@ Readability.prototype = {
       var neededToCreateTopCandidate = false;
 
       // If we still have no top candidate, just use the body as a last resort.
       // We also have to copy the body node so it is something we can modify.
       if (topCandidate === null || topCandidate.tagName === "BODY") {
         // Move all of the page's children into topCandidate
         topCandidate = doc.createElement("DIV");
         neededToCreateTopCandidate = true;
-        var children = page.childNodes;
-        while (children.length) {
-          this.log("Moving child out:", children[0]);
-          topCandidate.appendChild(children[0]);
+        // Move everything (not just elements, also text nodes etc.) into the container
+        // so we even include text directly in the body:
+        var kids = page.childNodes;
+        while (kids.length) {
+          this.log("Moving child out:", kids[0]);
+          topCandidate.appendChild(kids[0]);
         }
 
         page.appendChild(topCandidate);
 
         this._initializeNode(topCandidate);
+      } else if (topCandidate) {
+        // Because of our bonus system, parents of candidates might have scores
+        // themselves. They get half of the node. There won't be nodes with higher
+        // scores than our topCandidate, but if we see the score going *up* in the first
+        // few steps up the tree, that's a decent sign that there might be more content
+        // lurking in other places that we want to unify in. The sibling stuff
+        // below does some of that - but only if we've looked high enough up the DOM
+        // tree.
+        var parentOfTopCandidate = topCandidate.parentNode;
+        // The scores shouldn't get too low.
+        var scoreThreshold = topCandidate.readability.contentScore / 3;
+        var lastScore = parentOfTopCandidate.readability.contentScore;
+        while (parentOfTopCandidate && parentOfTopCandidate.readability) {
+          var parentScore = parentOfTopCandidate.readability.contentScore;
+          if (parentScore < scoreThreshold)
+            break;
+          if (parentScore > lastScore) {
+            // Alright! We found a better parent to use.
+            topCandidate = parentOfTopCandidate;
+            break;
+          }
+          parentOfTopCandidate = parentOfTopCandidate.parentNode;
+        }
       }
 
       // Now that we have the top candidate, look through its siblings for content
       // that might also be related. Things like preambles, content split by ads
       // that we removed, etc.
       var articleContent = doc.createElement("DIV");
       if (isPaging)
         articleContent.id = "readability-content";
 
       var siblingScoreThreshold = Math.max(10, topCandidate.readability.contentScore * 0.2);
-      var siblingNodes = topCandidate.parentNode.childNodes;
+      var siblings = topCandidate.parentNode.children;
 
-      for (var s = 0, sl = siblingNodes.length; s < sl; s += 1) {
-        var siblingNode = siblingNodes[s];
+      for (var s = 0, sl = siblings.length; s < sl; s++) {
+        var sibling = siblings[s];
         var append = false;
 
-        this.log("Looking at sibling node:", siblingNode, ((typeof siblingNode.readability !== 'undefined') ? ("with score " + siblingNode.readability.contentScore) : ''));
-        this.log("Sibling has score " + (siblingNode.readability ? siblingNode.readability.contentScore : 'Unknown'));
-
-        if (siblingNode === topCandidate)
-          append = true;
+        this.log("Looking at sibling node:", sibling, sibling.readability ? ("with score " + sibling.readability.contentScore) : '');
+        this.log("Sibling has score", sibling.readability ? sibling.readability.contentScore : 'Unknown');
 
-        var contentBonus = 0;
+        if (sibling === topCandidate) {
+          append = true;
+        } else {
+          var contentBonus = 0;
 
-        // Give a bonus if sibling nodes and top candidates have the example same classname
-        if (siblingNode.className === topCandidate.className && topCandidate.className !== "")
-          contentBonus += topCandidate.readability.contentScore * 0.2;
+          // Give a bonus if sibling nodes and top candidates have the example same classname
+          if (sibling.className === topCandidate.className && topCandidate.className !== "")
+            contentBonus += topCandidate.readability.contentScore * 0.2;
 
-        if (typeof siblingNode.readability !== 'undefined' &&
-          (siblingNode.readability.contentScore+contentBonus) >= siblingScoreThreshold)
-          append = true;
+          if (sibling.readability &&
+              ((sibling.readability.contentScore + contentBonus) >= siblingScoreThreshold)) {
+            append = true;
+          } else if (sibling.nodeName === "P") {
+            var linkDensity = this._getLinkDensity(sibling);
+            var nodeContent = this._getInnerText(sibling);
+            var nodeLength = nodeContent.length;
 
-        if (siblingNode.nodeName === "P") {
-          var linkDensity = this._getLinkDensity(siblingNode);
-          var nodeContent = this._getInnerText(siblingNode);
-          var nodeLength = nodeContent.length;
-
-          if (nodeLength > 80 && linkDensity < 0.25) {
-            append = true;
-          } else if (nodeLength < 80 && linkDensity === 0 && nodeContent.search(/\.( |$)/) !== -1) {
-            append = true;
+            if (nodeLength > 80 && linkDensity < 0.25) {
+              append = true;
+            } else if (nodeLength < 80 && linkDensity === 0 && nodeContent.search(/\.( |$)/) !== -1) {
+              append = true;
+            }
           }
         }
 
         if (append) {
-          this.log("Appending node:", siblingNode);
+          this.log("Appending node:", sibling);
 
-          // siblingNodes is a reference to the childNodes array, and
-          // siblingNode is removed from the array when we call appendChild()
-          // below. As a result, we must revisit this index since the nodes
-          // have been shifted.
-          s -= 1;
-          sl -= 1;
+          if (this.ALTER_TO_DIV_EXCEPTIONS.indexOf(sibling.nodeName) === -1) {
+            // We have a node that isn't a common block level element, like a form or td tag.
+            // Turn it into a div so it doesn't get filtered out later by accident.
+            this.log("Altering sibling:", sibling, 'to div.');
 
-          if (siblingNode.nodeName !== "DIV" && siblingNode.nodeName !== "P") {
-            // We have a node that isn't a common block level element, like a form or td tag.
-            // Turn it into a div so it doesn't get filtered out later by accident. */
-            this.log("Altering siblingNode:", siblingNode, 'to div.');
-
-            this._setNodeTag(siblingNode, "DIV");
+            this._setNodeTag(sibling, "DIV");
           }
 
           // To ensure a node does not interfere with readability styles,
           // remove its classnames.
-          siblingNode.removeAttribute("class");
+          sibling.removeAttribute("class");
 
-          // Append sibling and subtract from our list because it removes
-          // the node when you append to another node.
-          articleContent.appendChild(siblingNode);
+          articleContent.appendChild(sibling);
+          // siblings is a reference to the children array, and
+          // sibling is removed from the array when we call appendChild().
+          // As a result, we must revisit this index since the nodes
+          // have been shifted.
+          s -= 1;
+          sl -= 1;
         }
       }
 
-      this.log("Article content pre-prep: " + articleContent.innerHTML);
+      if (this.ENABLE_LOGGING)
+        this.log("Article content pre-prep: " + articleContent.innerHTML);
       // So we have all of the content that we need. Now we clean it up for presentation.
       this._prepArticle(articleContent);
-      this.log("Article content post-prep: " + articleContent.innerHTML);
+      if (this.ENABLE_LOGGING)
+        this.log("Article content post-prep: " + articleContent.innerHTML);
 
       if (this._curPageNum === 1) {
         if (neededToCreateTopCandidate) {
           // We already created a fake div thing, and there wouldn't have been any siblings left
           // for the previous loop, so there's no point trying to create a new div, and then
           // move all the children over. Just assign IDs and class names here. No need to append
           // because that already happened anyway.
           topCandidate.id = "readability-page-1";
@@ -713,17 +755,18 @@ Readability.prototype = {
           var children = articleContent.childNodes;
           while (children.length) {
             div.appendChild(children[0]);
           }
           articleContent.appendChild(div);
         }
       }
 
-      this.log("Article content after paging: " + articleContent.innerHTML);
+      if (this.ENABLE_LOGGING)
+        this.log("Article content after paging: " + articleContent.innerHTML);
 
       // Now that we've gone through the full algorithm, check to see if
       // we got any meaningful content. If we didn't, we may need to re-run
       // grabArticle with different flags set. This gives us a higher likelihood of
       // finding the content, and the sieve approach gives us a higher likelihood of
       // finding the -right- content.
       if (this._getInnerText(articleContent, true).length < 500) {
         page.innerHTML = pageCacheHtml;
@@ -755,46 +798,44 @@ Readability.prototype = {
     if (typeof byline == 'string' || byline instanceof String) {
       byline = byline.trim();
       return (byline.length > 0) && (byline.length < 100);
     }
     return false;
   },
 
   /**
-   * Attempts to get the excerpt from these
-   * sources in the following order:
-   * - meta description tag
-   * - open-graph description
-   * - twitter cards description
-   * - article's first paragraph
-   * If no excerpt is found, an empty string will be
-   * returned.
-   *
-   * @param Element - root element of the processed version page
-   * @return String - excerpt of the article
-  **/
-  _getExcerpt: function(articleContent) {
+   * Attempts to get excerpt and byline metadata for the article.
+   * 
+   * @return Object with optional "excerpt" and "byline" properties
+   */
+  _getArticleMetadata: function() {
+    var metadata = {};
     var values = {};
     var metaElements = this._doc.getElementsByTagName("meta");
 
     // Match "description", or Twitter's "twitter:description" (Cards)
     // in name attribute.
     var namePattern = /^\s*((twitter)\s*:\s*)?description\s*$/gi;
 
     // Match Facebook's og:description (Open Graph) in property attribute.
     var propertyPattern = /^\s*og\s*:\s*description\s*$/gi;
 
     // Find description tags.
     for (var i = 0; i < metaElements.length; i++) {
       var element = metaElements[i];
       var elementName = element.getAttribute("name");
       var elementProperty = element.getAttribute("property");
 
-      var name;
+      if (elementName === "author") {
+        metadata.byline = element.getAttribute("content");
+        continue;
+      }
+
+      var name = null;
       if (namePattern.test(elementName)) {
         name = elementName;
       } else if (propertyPattern.test(elementProperty)) {
         name = elementProperty;
       }
 
       if (name) {
         var content = element.getAttribute("content");
@@ -803,36 +844,26 @@ Readability.prototype = {
           // so we can match below.
           name = name.toLowerCase().replace(/\s/g, '');
           values[name] = content.trim();
         }
       }
     }
 
     if ("description" in values) {
-      return values["description"];
-    }
-
-    if ("og:description" in values) {
+      metadata.excerpt = values["description"];
+    } else if ("og:description" in values) {
       // Use facebook open graph description.
-      return values["og:description"];
+      metadata.excerpt = values["og:description"];
+    } else if ("twitter:description" in values) {
+      // Use twitter cards description.
+      metadata.excerpt = values["twitter:description"];
     }
 
-    if ("twitter:description" in values) {
-      // Use twitter cards description.
-      return values["twitter:description"];
-    }
-
-    // No description meta tags, use the article's first paragraph.
-    var paragraphs = articleContent.getElementsByTagName("p");
-    if (paragraphs.length > 0) {
-      return paragraphs[0].textContent;
-    }
-
-    return "";
+    return metadata;
   },
 
   /**
    * Removes script tags from the document.
    *
    * @param Element
   **/
   _removeScripts: function(doc) {
@@ -842,72 +873,64 @@ Readability.prototype = {
       scripts[i].removeAttribute('src');
 
       if (scripts[i].parentNode)
           scripts[i].parentNode.removeChild(scripts[i]);
     }
   },
 
   /**
-   * Get child index of the only P element inside a DIV with no
-   * text content. Returns -1 if the DIV node contains non-empty
-   * text nodes or if it contains other element nodes.
+   * Check if this node has only whitespace and a single P element
+   * Returns false if the DIV node contains non-empty text nodes
+   * or if it contains no P or more than 1 element.
    *
    * @param Element
   **/
-  _getSinglePIndexInsideDiv: function(e) {
+  _hasSinglePInsideElement: function(e) {
+    // There should be exactly 1 element child which is a P:
+    if (e.children.length != 1 || e.firstElementChild.tagName !== "P") {
+      return false;
+    }
+    // And there should be no text nodes with real content
     var childNodes = e.childNodes;
-    var pIndex = -1;
-
     for (var i = childNodes.length; --i >= 0;) {
       var node = childNodes[i];
-
-      if (node.nodeType === Node.ELEMENT_NODE) {
-        if (node.tagName !== "P")
-          return -1;
-
-        if (pIndex >= 0)
-          return -1;
-
-        pIndex = i;
-      } else if (node.nodeType == Node.TEXT_NODE && this._getInnerText(node, false)) {
-        return -1;
+      if (node.nodeType == Node.TEXT_NODE &&
+          this.REGEXPS.hasContent.test(node.textContent)) {
+        return false;
       }
     }
 
-    return pIndex;
+    return true;
   },
 
   /**
    * Determine whether element has any children block level elements.
    *
    * @param Element
    */
   _hasChildBlockElement: function (e) {
-    var length = e.childNodes.length;
+    var length = e.children.length;
     for (var i = 0; i < length; i++) {
-      var child = e.childNodes[i];
-      if (child.nodeType != 1)
-        continue;
-
+      var child = e.children[i];
       if (this.DIV_TO_P_ELEMS.indexOf(child.tagName) !== -1 || this._hasChildBlockElement(child))
         return true;
     }
     return false;
   },
 
   /**
    * Get the inner text of a node - cross browser compatibly.
    * This also strips out any excess whitespace to be found.
    *
    * @param Element
    * @return string
   **/
   _getInnerText: function(e, normalizeSpaces) {
-    var textContent = e.textContent.replace(this.REGEXPS.trim, "");
+    var textContent = e.textContent.trim();
     normalizeSpaces = (typeof normalizeSpaces === 'undefined') ? true : normalizeSpaces;
 
     if (normalizeSpaces) {
       return textContent.replace(this.REGEXPS.normalize, " ");
     } else {
       return textContent;
     }
   },
@@ -928,28 +951,27 @@ Readability.prototype = {
    * Remove the style attribute on every e and under.
    * TODO: Test if getElementsByTagName(*) is faster.
    *
    * @param Element
    * @return void
   **/
   _cleanStyles: function(e) {
     e = e || this._doc;
-    var cur = e.firstChild;
-
     if (!e)
       return;
+    var cur = e.firstChild;
 
     // Remove any root styles, if we're able.
     if (typeof e.removeAttribute === 'function' && e.className !== 'readability-styled')
       e.removeAttribute('style');
 
     // Go until there are no more child nodes
     while (cur !== null) {
-      if (cur.nodeType === 1) {
+      if (cur.nodeType === cur.ELEMENT_NODE) {
         // Remove style attribute(s) :
         if (cur.className !== "readability-styled")
           cur.removeAttribute("style");
 
         this._cleanStyles(cur);
       }
 
       cur = cur.nextSibling;
@@ -1350,29 +1372,29 @@ Readability.prototype = {
   _getClassWeight: function(e) {
     if (!this._flagIsActive(this.FLAG_WEIGHT_CLASSES))
       return 0;
 
     var weight = 0;
 
     // Look for a special classname
     if (typeof(e.className) === 'string' && e.className !== '') {
-      if (e.className.search(this.REGEXPS.negative) !== -1)
+      if (this.REGEXPS.negative.test(e.className))
         weight -= 25;
 
-      if (e.className.search(this.REGEXPS.positive) !== -1)
+      if (this.REGEXPS.positive.test(e.className))
         weight += 25;
     }
 
     // Look for a special ID
     if (typeof(e.id) === 'string' && e.id !== '') {
-      if (e.id.search(this.REGEXPS.negative) !== -1)
+      if (this.REGEXPS.negative.test(e.id))
         weight -= 25;
 
-      if (e.id.search(this.REGEXPS.positive) !== -1)
+      if (this.REGEXPS.positive.test(e.id))
         weight += 25;
     }
 
     return weight;
   },
 
   /**
    * Clean a node of all elements of type "tag".
@@ -1390,21 +1412,21 @@ Readability.prototype = {
       // Allow youtube and vimeo videos through as people usually want to see those.
       if (isEmbed) {
         var attributeValues = "";
         for (var i = 0, il = targetList[y].attributes.length; i < il; i += 1) {
           attributeValues += targetList[y].attributes[i].value + '|';
         }
 
         // First, check the elements attributes to see if any of them contain youtube or vimeo
-        if (attributeValues.search(this.REGEXPS.videos) !== -1)
+        if (this.REGEXPS.videos.test(attributeValues))
           continue;
 
         // Then check the elements inside this element for the same.
-        if (targetList[y].innerHTML.search(this.REGEXPS.videos) !== -1)
+        if (this.REGEXPS.videos.test(targetList[y].innerHTML))
           continue;
       }
 
       targetList[y].parentNode.removeChild(targetList[y]);
     }
   },
 
   /**
@@ -1440,17 +1462,17 @@ Readability.prototype = {
         var p = tagsList[i].getElementsByTagName("p").length;
         var img = tagsList[i].getElementsByTagName("img").length;
         var li = tagsList[i].getElementsByTagName("li").length-100;
         var input = tagsList[i].getElementsByTagName("input").length;
 
         var embedCount = 0;
         var embeds = tagsList[i].getElementsByTagName("embed");
         for (var ei = 0, il = embeds.length; ei < il; ei += 1) {
-          if (embeds[ei].src.search(this.REGEXPS.videos) === -1)
+          if (!this.REGEXPS.videos.test(embeds[ei].src))
             embedCount += 1;
         }
 
         var linkDensity = this._getLinkDensity(tagsList[i]);
         var contentLength = this._getInnerText(tagsList[i]).length;
         var toRemove = false;
 
         if (img > p) {
@@ -1527,35 +1549,45 @@ Readability.prototype = {
     // this._parsedPages[uri.spec.replace(/\/$/, '')] = true;
 
     // Pull out any possible next page link first.
     // var nextPageLink = this._findNextPageLink(doc.body);
 
     this._prepDocument();
 
     var articleTitle = this._getArticleTitle();
+    var metadata = this._getArticleMetadata();
+
     var articleContent = this._grabArticle();
     if (!articleContent)
       return null;
 
     this.log("Grabbed: " + articleContent.innerHTML);
 
     this._postProcessContent(articleContent);
 
     // if (nextPageLink) {
     //   // Append any additional pages after a small timeout so that people
     //   // can start reading without having to wait for this to finish processing.
     //   setTimeout((function() {
     //     this._appendNextPage(nextPageLink);
     //   }).bind(this), 500);
     // }
 
-    var excerpt = this._getExcerpt(articleContent);
+    // If we haven't found an excerpt in the article's metadata, use the article's
+    // first paragraph as the excerpt. This is used for displaying a preview of
+    // the article's content.
+    if (!metadata.excerpt) {
+      var paragraphs = articleContent.getElementsByTagName("p");
+      if (paragraphs.length > 0) {
+        metadata.excerpt = paragraphs[0].textContent;
+      }
+    }
 
     return { uri: this._uri,
              title: articleTitle,
-             byline: this._articleByline,
+             byline: metadata.byline || this._articleByline,
              dir: this._articleDir,
              content: articleContent.innerHTML,
              length: articleContent.textContent.length,
-             excerpt: excerpt };
+             excerpt: metadata.excerpt };
   }
 };