Bug 784682 - Penalize nodes with "hidden" class or id in Readability.js (r=bnicholson)
authorLucas Rocha <lucasr@mozilla.com>
Thu, 23 Aug 2012 17:08:13 +0100
changeset 105217 6c3457b601d8021ce7b6793445cb091a47b0b3b2
parent 105216 ca491b4dc1727a5c88829a27d4b371829762cc9e
child 105218 d720637283ebc849076442a338e5670ab468bb7b
push id55
push usershu@rfrn.org
push dateThu, 30 Aug 2012 01:33:09 +0000
reviewersbnicholson
bugs784682
milestone17.0a1
Bug 784682 - Penalize nodes with "hidden" class or id in Readability.js (r=bnicholson)
mobile/android/chrome/content/Readability.js
--- a/mobile/android/chrome/content/Readability.js
+++ b/mobile/android/chrome/content/Readability.js
@@ -67,17 +67,17 @@ Readability.prototype = {
   MAX_PAGES: 5,
 
   // All of the regular expressions in use within readability.
   // Defined up here so we don't instantiate them repeatedly in loops.
   REGEXPS: {
     unlikelyCandidates: /combx|comment|community|disqus|extra|foot|header|menu|remark|rss|shoutbox|sidebar|sponsor|ad-break|agegate|pagination|pager|popup|tweet|twitter/i,
     okMaybeItsACandidate: /and|article|body|column|main|shadow/i,
     positive: /article|body|content|entry|hentry|main|page|pagination|post|text|blog|story/i,
-    negative: /combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|outbrain|promo|related|scroll|shoutbox|sidebar|sponsor|shopping|tags|tool|widget/i,
+    negative: /hidden|combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|outbrain|promo|related|scroll|shoutbox|sidebar|sponsor|shopping|tags|tool|widget/i,
     extraneous: /print|archive|comment|discuss|e[\-]?mail|share|reply|all|login|sign|single|utility/i,
     byline: /byline|author|dateline|writtenby/i,
     divToPElements: /<(a|blockquote|dl|div|img|ol|p|pre|table|ul)/i,
     replaceFonts: /<(\/?)font[^>]*>/gi,
     trim: /^\s+|\s+$/g,
     normalize: /\s{2,}/g,
     killBreaks: /(<br\s*\/?>(\s|&nbsp;?)*){1,}/g,
     videos: /http:\/\/(www\.)?(youtube|vimeo)\.com/i,