More tweaks; skip 'NoSelect' folders, encoding work (or not work!); twisty
authorMark Hammond <mhammond@skippinet.com.au>
Thu, 12 Mar 2009 11:45:12 +1100
branchtwisty
changeset 82 5e4590a60a3f5e3b94f6161b4d6f6c908fc6cc4c
parent 81 51049ab6b44f4465ed5627b3021c7d9ddc11d5e2
child 83 fe8391db813f12ec730860c862b8c8411d9195d1
push id1
push userroot
push dateWed, 08 Apr 2009 01:46:05 +0000
More tweaks; skip 'NoSelect' folders, encoding work (or not work!); store 'folder path' as a list of elts rather than assuming a delim.
server/python/junius/proto/imap.py
--- a/server/python/junius/proto/imap.py
+++ b/server/python/junius/proto/imap.py
@@ -4,16 +4,21 @@ import logging
 
 from ..proc import base
 from ..model import get_db
 
 brat = base.Rat
 
 logger = logging.getLogger(__name__)
 
+# It would appear twisted assumes 'imap4-utf-7' (which would appear to be
+# simply utf-7), but gmail apparently doesn't. This clearly should not be a
+# global; we need to grok this better...
+imap_encoding = 'utf-8'
+
 class ImapClient(imap4.IMAP4Client):
   '''
   Much of our logic here should perhaps be in another class that holds a
   reference to the IMAP4Client instance subclass.  Consider refactoring if we
   don't turn out to benefit from the subclassing relationship.
   '''
   def serverGreeting(self, caps):
     logger.debug("IMAP server greeting: capabilities are %s", caps)
@@ -49,36 +54,49 @@ class ImapClient(imap4.IMAP4Client):
     return self._processNextFolder()
 
   def _processNextFolder(self):
     if not self.folder_infos:
       # yay - all done!
       logger.info("Finished synchronizing IMAP folders")
       # need to report we are done somewhere?
       from ..sync import get_conductor
-      return get_conductor().accountFinishedSync(self)
+      return get_conductor().accountFinishedSync(self.account)
+
+    flags, delim, name = self.folder_infos.pop()
+    self.current_folder_path = cfp = name.split(delim)
+    logger.debug('Processing folder %s (flags=%s)', name, flags)
+    if r"\Noselect" in flags:
+      logger.debug("'%s' is unselectable - skipping", name)
+      return self._processNextFolder()
 
-    self.current_folder_info = self.folder_infos.pop()
-    flags, delim, name = self.current_folder_info
-    logger.debug('Processing folder %s (flags=%s, delim=%s)', name, flags, delim)
+    # XXX - sob - markh sees:
+    # 'Folder [Gmail]/All Mail has 38163 messages, 36391 of which we haven't seen'
+    # although we obviously have seen them already in the other folders.
+    if cfp and cfp[0].startswith('[') and cfp[0].endswith(']'):
+      logger.info("'%s' appears special -skipping", name)
+      return self._processNextFolder()
+
     return self.examine(name
-                 ).addCallback(self._examineFolder, name
-                 ).addErrback(self._cantExamineFolder, name)
+                 ).addCallback(self._examineFolder, cfp
+                 ).addErrback(self._cantExamineFolder, cfp)
 
   def _examineFolder(self, result, name):
-    logger.debug('Looking for messages already fetched for mailbox %s', name)
+    logger.debug('Looking for messages already fetched for folder %s', name)
     startkey=[self.account.details['_id'], name, 0]
     endkey=[self.account.details['_id'], name, 4000000000]
     get_db().openView('raindrop!messages!by', 'by_storage'
         ).addCallback(self._fetchAndProcess, name)
 
   def _fetchAndProcess(self, rows, name):
     allMessages = imap4.MessageSet(1, None)
     key_check = [self.account.details['_id'], name]
     seen_ids = [r['key'][2] for r in rows if r['key'][0:2]==key_check]
+    logger.debug("%d messages already exist from %s",
+                 len(seen_ids), name)
     return self.fetchUID(allMessages, True).addCallback(
             self._gotUIDs, name, seen_ids)
 
   def _gotUIDs(self, uidResults, name, seen_uids):
     uids = set([result['UID'] for result in uidResults.values()])
     need = uids - set(seen_uids)
     logger.info("Folder %s has %d messages, %d of which we haven't seen",
                  name, len(uids), len(need))
@@ -97,38 +115,48 @@ class ImapClient(imap4.IMAP4Client):
     logger.debug("fetching rfc822 for message %s", to_fetch)
     return self.fetchMessage(to_fetch, uid=True
                 ).addCallback(self._gotBody, to_fetch
                 )
 
   def _gotBody(self, result, to_fetch):
     _, result = result.popitem()
     try:
-      body = result['RFC822'].decode('utf8')
+      body = result['RFC822'].decode(imap_encoding)
     except UnicodeError, why:
-      logger.error("Failed to decode a message as UTF8: %s", why)
-      body = result['RFC822'].decode('utf8', 'ignore')
+      logger.error("Failed to decode message "
+                   "(but will re-decode ignoring errors) : %s", why)
+      # heh - 'ignore' and 'replace' are apparently ignored for the 'utf-7'
+      # codecs...
+      try:
+        body = result['RFC822'].decode(imap_encoding, 'ignore')
+      except UnicodeError, why:
+        logger.error("and failed to 'ignore' unicode errors - skipping it: %s",
+                     why)
+        return self._processNextMessage()
+
     # grr - get the flags
     logger.debug("fetching flags for message %s", to_fetch)
     return self.fetchFlags(to_fetch, uid=True
                 ).addCallback(self._gotMessage, body
                 ).addErrback(self._cantGetMessage
                 )
 
   def _gotMessage(self, result, body):
     # not sure about this - can we ever get more?
+    logger.debug("flags are %s", result)
     assert len(result)==1, result
     _, result = result.popitem()
     flags = result['FLAGS']
     # put the 'raw' document object together and save it.
     doc = dict(
       type='rawMessage',
       subtype='rfc822',
       account_id=self.account.details['_id'],
-      storage_path=self.current_folder_info[2],
+      storage_path=self.current_folder_path,
       storage_id=result['UID'],
       rfc822=body,
       read=r'\Seen' in flags,
       )
     get_db().saveDoc(doc
             ).addCallback(self._savedDocument
             ).addErrback(self._cantSaveDocument
             )