author | Gijs Kruitbosch <gijskruitbosch@gmail.com> |
Fri, 08 May 2015 12:05:33 +0100 | |
changeset 242960 | c18720d4d55926541d62c6a2b4b5f69a3ab3aeaa |
parent 242959 | 081eeb6d7241f389ae7225340257fa7ef31c49ea |
child 242961 | 9a5bd1aa9bcef8d603de4aa684f34f0801a68e5f |
push id | 28714 |
push user | kwierso@gmail.com |
push date | Fri, 08 May 2015 17:29:48 +0000 |
treeherder | mozilla-central@5e8adf0e7f2c [default view] [failures only] |
perfherder | [talos] [build metrics] [platform microbench] (compared to previous push) |
reviewers | me |
bugs | 1162917 |
milestone | 40.0a1 |
first release with | nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
|
last release without | nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
|
toolkit/components/reader/JSDOMParser.js | file | annotate | diff | comparison | revisions | |
toolkit/components/reader/Readability.js | file | annotate | diff | comparison | revisions |
--- a/toolkit/components/reader/JSDOMParser.js +++ b/toolkit/components/reader/JSDOMParser.js @@ -278,16 +278,17 @@ "embed": true, "hr": true, "img": true, "input": true, "link": true, "meta": true, "param": true, "source": true, + "wbr": true }; var whitespace = [" ", "\t", "\n", "\r"]; // See http://www.w3schools.com/dom/dom_nodetype.asp var nodeTypes = { ELEMENT_NODE: 1, ATTRIBUTE_NODE: 2,
--- a/toolkit/components/reader/Readability.js +++ b/toolkit/components/reader/Readability.js @@ -104,30 +104,30 @@ Readability.prototype = { // tight the competition is among candidates. DEFAULT_N_TOP_CANDIDATES: 5, // The maximum number of pages to loop through before we call // it quits and just show a link. DEFAULT_MAX_PAGES: 5, // Element tags to score by default. - DEFAULT_TAGS_TO_SCORE: ["SECTION", "P", "TD", "PRE"], + DEFAULT_TAGS_TO_SCORE: "section,h2,h3,h4,h5,h6,p,td,pre".toUpperCase().split(","), // All of the regular expressions in use within readability. // Defined up here so we don't instantiate them repeatedly in loops. REGEXPS: { - unlikelyCandidates: /banner|combx|comment|community|disqus|extra|foot|header|menu|remark|rss|share|shoutbox|sidebar|skyscraper|sponsor|ad-break|agegate|pagination|pager|popup/i, + unlikelyCandidates: /banner|combx|comment|community|disqus|extra|foot|header|menu|related|remark|rss|share|shoutbox|sidebar|skyscraper|sponsor|ad-break|agegate|pagination|pager|popup/i, okMaybeItsACandidate: /and|article|body|column|main|shadow/i, positive: /article|body|content|entry|hentry|main|page|pagination|post|text|blog|story/i, negative: /hidden|banner|combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget/i, extraneous: /print|archive|comment|discuss|e[\-]?mail|share|reply|all|login|sign|single|utility/i, byline: /byline|author|dateline|writtenby/i, replaceFonts: /<(\/?)font[^>]*>/gi, normalize: /\s{2,}/g, - videos: /https?:\/\/(www\.)?(dailymotion|youtube|youtube-nocookie|player\.vimeo)\.com/i, + videos: /\/\/(www\.)?(dailymotion|youtube|youtube-nocookie|player\.vimeo)\.com/i, nextLink: /(next|weiter|continue|>([^\|]|$)|»([^\|]|$))/i, prevLink: /(prev|earl|old|new|<|«)/i, whitespace: /^\s*$/, hasContent: /\S$/, }, DIV_TO_P_ELEMS: [ "A", "BLOCKQUOTE", "DL", "DIV", "IMG", "OL", "P", "PRE", "TABLE", "UL", "SELECT" ], @@ -734,17 +734,22 @@ Readability.prototype = { if (!ancestor.tagName) return; if (typeof(ancestor.readability) === 'undefined') { this._initializeNode(ancestor); candidates.push(ancestor); } - ancestor.readability.contentScore += contentScore / (level === 0 ? 1 : level * 2); + // Node score divider: + // - parent: 1 (no division) + // - grandparent: 2 + // - great grandparent+: ancestor level * 3 + var scoreDivider = level === 0 ? 1 : level === 1 ? 2 : level * 3; + ancestor.readability.contentScore += contentScore / scoreDivider; }); }); // After we've calculated scores, loop through all of the possible // candidate nodes we found and find the one with the highest score. var topCandidates = []; for (var c = 0, cl = candidates.length; c < cl; c += 1) { var candidate = candidates[c];