cleanup/ruggedizing refactoring status commit
authorAndrew Sutherland <asutherland@asutherland.org>
Sun, 01 Feb 2009 03:48:06 -0800
changeset 51 c18b5b2c8c377cb16276258c428a5275f87e7a9d
parent 41 0d8aa961206ddf2e1a3331c87eefeb3b2d26179c
child 52 e9ccf6fc90654c0c915abe4576e0ad67dca264de
push id1
push userroot
push dateWed, 08 Apr 2009 01:46:05 +0000
cleanup/ruggedizing refactoring status commit
schema/views/accounts/all/map.js
schema/views/contacts/by_identity/map.js
schema/views/contacts/by_suffix/map.js
schema/views/conversations/by_involves/map.js
schema/views/conversations/by_mailing_list/map.js
schema/views/conversations/by_tags/map.js
schema/views/mailing_lists/by_list_id/map.js
schema/views/mailing_lists/by_list_id/reduce.js
schema/views/messages/by_conversation/map.js
schema/views/messages/by_header_message_id/map.js
schema/views/messages/by_storage/map.js
schema/views/tags/all/map.reuse
schema/views/tags/all/reduce.js
server/python/junius/config.py
server/python/junius/model.py
server/python/junius/proc/__init__.py
server/python/junius/proc/base.py
server/python/junius/proc/contact.py
server/python/junius/proc/mail.py
server/python/junius/proc/message.py
server/python/junius/proto/__init__.py
server/python/junius/proto/imap.py
server/python/junius/proto/twitter.py
server/python/junius/sync.py
tests/__init__.py
new file mode 100644
--- /dev/null
+++ b/schema/views/accounts/all/map.js
@@ -0,0 +1,5 @@
+function(doc) {
+  if (doc.type == "account") {
+    emit(null, doc);
+  }
+}
\ No newline at end of file
new file mode 100644
--- /dev/null
+++ b/schema/views/contacts/by_identity/map.js
@@ -0,0 +1,7 @@
+function(doc) {
+  if (doc.type == "contact") {
+    for each (var identity in doc.identities) {
+      emit([identity.kind, identity.value], doc);
+    }
+  }
+}
\ No newline at end of file
new file mode 100644
--- /dev/null
+++ b/schema/views/contacts/by_suffix/map.js
@@ -0,0 +1,16 @@
+function(doc) {
+  if (doc.type == "contact") {
+    var i, suffix;
+    for (i = 0; i < doc.name.length; i++) {
+      suffix = doc.name.substring(i);
+      if (suffix && suffix[0] != " ")
+        emit(suffix, null);
+    }
+    for each (var identity in doc.identities) {
+      for (i = 0; i < identity.value.length; i++)
+        suffix = identity.value.substring(i);
+      if (suffix && suffix[0] != " ")
+        emit(suffix, null);
+    }
+  }
+}
\ No newline at end of file
new file mode 100644
--- /dev/null
+++ b/schema/views/conversations/by_involves/map.js
@@ -0,0 +1,6 @@
+function(doc) {
+  if (doc.type == "message") {
+    for each (var contact_id in doc.involves_contact_ids)
+      emit([contact_id, doc.timestamp], doc.conversation_id);
+  }
+}
\ No newline at end of file
new file mode 100644
--- /dev/null
+++ b/schema/views/conversations/by_mailing_list/map.js
@@ -0,0 +1,8 @@
+function(doc) {
+  if (doc.type == 'message') {
+    if (doc.headers && doc.headers["List-Id"]) {
+      var parts = doc.headers["List-Id"].match(/[\W\w\s]*<(.+)>.*/);
+      emit([parts[1], doc.timestamp], doc.conversation_id);
+    }
+  }
+}
new file mode 100644
--- /dev/null
+++ b/schema/views/conversations/by_tags/map.js
@@ -0,0 +1,8 @@
+function(doc) {
+  if (doc.type == "message") {
+    if (doc.tags) {
+      for (var i = 0; i < doc.tags.length; i++)
+        emit([doc.tags[i], doc.timestamp], doc.conversation_id);
+    }
+  }
+}
\ No newline at end of file
new file mode 100644
--- /dev/null
+++ b/schema/views/mailing_lists/by_list_id/map.js
@@ -0,0 +1,18 @@
+function(doc) {
+  if (doc.type == 'message') {
+    if (doc.headers && doc.headers["List-Id"]) {
+      var parts = doc.headers["List-Id"].match(/([\W\w]*)\s*<(.+)>.*/);
+      var values = {
+        "List-Id" : doc.headers["List-Id"],
+        "id" : parts[2],
+        "name" : parts[1]
+      };
+      for each (var headerId in ["List-Post","List-Archive","List-Help",
+                                 "List-Subscribe","List-Unsubscribe"]) {
+        if (doc.headers[headerId])
+          values[headerId] = doc.headers[headerId];
+      }
+      emit(parts[2], values);
+    }
+  }
+}
\ No newline at end of file
new file mode 100644
--- /dev/null
+++ b/schema/views/mailing_lists/by_list_id/reduce.js
@@ -0,0 +1,10 @@
+function(keys, values, rereduce) {
+  var output = {};
+  output.count = values.length;
+  for (var idx in values) {
+    for (var elm in values[idx]) {
+      output[elm] = values[idx][elm];
+    }
+  }
+  return output;
+}
\ No newline at end of file
new file mode 100644
--- /dev/null
+++ b/schema/views/messages/by_conversation/map.js
@@ -0,0 +1,6 @@
+function(doc) {
+  if (doc.type == "message") {
+    if (doc.timestamp)
+      emit(doc.conversation_id, null);
+  }
+}
\ No newline at end of file
new file mode 100644
--- /dev/null
+++ b/schema/views/messages/by_header_message_id/map.js
@@ -0,0 +1,5 @@
+function(doc) {
+  if (doc.type == "message" && doc.subtype == "rfc822") {
+    emit(doc.header_message_id, null);
+  }
+}
\ No newline at end of file
new file mode 100644
--- /dev/null
+++ b/schema/views/messages/by_storage/map.js
@@ -0,0 +1,5 @@
+function(doc) {
+  if (doc.type == 'message' && doc.subtype == "rfc822")
+    if (doc.storage_path && doc.storage_id)
+      emit([doc.account_id, doc.storage_path, doc.storage_id], null);
+}
\ No newline at end of file
new file mode 100644
--- /dev/null
+++ b/schema/views/tags/all/map.reuse
@@ -0,0 +1,1 @@
+conversations/by_tags
\ No newline at end of file
new file mode 100644
--- /dev/null
+++ b/schema/views/tags/all/reduce.js
@@ -0,0 +1,19 @@
+function(keys, values, rereduce) {
+  var keySet = {}, i, j;
+  if (!rereduce) {
+    for (i = 0; i < keys.length; i++)
+      keySet[keys[i][0][0]] = true;
+  }
+  else {
+    for (i = 0; i < values.length; i++) {
+      var inSet = values[i];
+      for (j = 0; j < inSet.length; j++)
+        keySet[inSet[j]] = true;
+    }
+  }
+  var out = [];
+  for (var key in keySet)
+    out.push(key);
+  out.sort();
+  return out;
+}
\ No newline at end of file
new file mode 100644
--- /dev/null
+++ b/server/python/junius/config.py
@@ -0,0 +1,68 @@
+import ConfigParser, logging, os, os.path
+
+__all__ = ['get_config']
+
+class Config(object):
+  COUCH_DEFAULTS = {'host': 'localhost', 'port': 5984, 'name': 'raindrop'}
+  def __init__(self):
+    self.parser = ConfigParser.SafeConfigParser()
+    self.load()
+
+    self.couches = {'local': self.COUCH_DEFAULTS.copy()}
+    self.accounts = {}
+
+    # configuration and logging.  two great flavors that go together.
+    logging.basicConfig()
+
+
+  def dictifySection(self, section_name, defaults=None, name=None):
+    '''
+    Given a config section name, suck up its contents into a dictionary.  Poor
+    man's type detection turns lowercase true/false into the boolean of that
+    type, things that can be int()ed into ints, and otherwise things get to
+    stay strings.  Defaults are applied before dictification, and the name is
+    an optional default for 'name' if specified (which overrides the defaults
+    dict.)
+    '''
+    results = {}
+    if defaults:
+      results.update(defaults)
+    if name:
+      results['name'] = name
+    for name, value in self.parser.items(section_name):
+      if value.lower() in ('true', 'false'):
+        value = (value.lower() == 'true')
+      else:
+        try:
+          value = int(value)
+        except:
+          pass
+
+      results[key] = value
+    return results
+
+  def load(self):
+    self.parser.read([os.path.expanduser('~/.raindrop')])
+
+    self.local_couch = self.parser.get('couches', 'local')
+    self.remote_couch = self.parser.get('couches', 'remote')
+
+    COUCH_PREFIX = 'couch-'
+    ACCOUNT_PREFIX = 'account-'
+    for section_name in self.parser.sections():
+      if section_name.startswith(COUCH_PREFIX):
+        couch_name = section_name[len(COUCH_NAME):]
+        self.couches[couch_name] = self.dictifySection(section_name,
+                                                       self.COUCH_DEFAULTS)
+
+      if section_name.startswith(ACCOUNT_PREFIX):
+        account_name = section_name[len(ACCOUNT_PREFIX):]
+        self.accounts[account_name] = self.dictifySection(section_name, None,
+                                                          account_name)
+
+CONFIG = None
+def get_config():
+  global CONFIG
+  if CONFIG is None:
+    CONFIG = Config()
+  return CONFIG
--- a/server/python/junius/model.py
+++ b/server/python/junius/model.py
@@ -7,31 +7,44 @@ class WildField(schema.Field):
     '''
     def _to_python(self, value):
         return value
     
     def _to_json(self, value):
         return value
 
 class Account(schema.Document):
-    kind = schema.TextField()
-    host = schema.TextField(default='')
-    port = schema.IntegerField(default=0)
-    username = schema.TextField()
-    password = schema.TextField()
-    ssl = schema.BooleanField(default=False)
-    
-    folderStatuses = WildField(default={})
-    
-    # could we just do _all_docs?  I don't want the damn design docs though...
-    # (ironically, this is the first one :)
-    all = schema.View('all', '''\
-        function(doc) {
-            emit(null, doc);
-        }''')
+  '''
+  Accounts correspond to instances of protocols to send/receive messages.
+  Although they may correlate with the various identities of the user, they
+  are not the same.  Just because you have a facebook account does not mean
+  you get an account instance; you would always want the info on the facebook
+  account in the identity list for the user, but it doesn't get to be an
+  account until we are capable of doing something with it.  (In the specific
+  facebook case, having the account info to be able to do Facebook Connect-type
+  things is an example of a case where an account should exist.)
+  '''
+  kind = schema.TextField()
+  host = schema.TextField(default='')
+  port = schema.IntegerField(default=0)
+  username = schema.TextField()
+  password = schema.TextField()
+  ssl = schema.BooleanField(default=False)
+
+  #: Have we ever successfully connected to this account?
+  verified = schema.BooleanField(default=False)
+
+  folderStatuses = WildField(default={})
+
+  # could we just do _all_docs?  I don't want the damn design docs though...
+  # (ironically, this is the first one :)
+  all = schema.View('all', '''\
+      function(doc) {
+          emit(null, doc);
+      }''')
 
 class Contact(schema.Document):
     name = schema.TextField()
     identities = schema.ListField(schema.DictField(schema.Schema.build(
         kind = schema.TextField(),
         value = schema.TextField()
     )))
     #: expose contacts by their identities
@@ -122,23 +135,16 @@ class Message(schema.Document):
 
     # -- message (id) views
     # ghosts are okay!
     by_header_id = schema.View('by_header_id', '''\
         function(doc) {
             emit(doc.header_message_id, null);
         }''', include_docs=True)    
 
-    # no ghosts!
-    by_timestamp = schema.View('by_timestamp', '''\
-        function(doc) {
-            if (doc.timestamp)
-                emit(doc.timestamp, null);
-        }''', include_docs=True)    
-
     # the key includes the timestamp so we can use it to limit our queries plus
     #  pick up where we left off if we need to page/chunk.
     # we expose the conversation id as the value because set intersection
     #  on a conversation-basis demands it, and it would theoretically be too
     #  expensive to just return the whole document via include_docs.
     # (no ghosts!)
     by_involves = schema.View('by_involves', '''\
         function(doc) {
@@ -183,17 +189,17 @@ class Message(schema.Document):
     # so, this key is theoretically just wildly expensive
     # no ghosts!
     by_storage = schema.View('by_storage', '''\
         function(doc) {
             if (doc.timestamp)
                 emit([doc.account_id, doc.storage_path, doc.storage_id], null);
         }''', include_docs=False)
         
-    by_mailing_list = schema.View('by_header_id', '''\
+    by_mailing_list = schema.View('by_header_id', r'''\
         function(doc) {
           if (doc.headers && doc.headers["List-Id"]) {
             var parts = doc.headers["List-Id"].match(/([\W\w]*)\s*<(.+)>.*/);
             var values = {"List-Id" : doc.headers["List-Id"],
                           "id" : parts[2],
                           "name" : parts[1] };
             for each (var headerId in ["List-Post","List-Archive","List-Help",
                                        "List-Subscribe","List-Unsubscribe"]) {
@@ -209,17 +215,17 @@ class Message(schema.Document):
           for (var idx in values) {
             for (var elm in values[idx]) {
               output[elm] = values[idx][elm];
             }
           }
           return output;
         }''', include_docs=False, group=True, group_level=1)
 
-    by_list_id = schema.View('by_mailing_list', '''\
+    by_list_id = schema.View('by_mailing_list', r'''\
         function(doc) {
           if (doc.headers && doc.headers["List-Id"]) {
             var parts = doc.headers["List-Id"].match(/[\W\w\s]*<(.+)>.*/);
             emit([parts[1], doc.timestamp], doc.conversation_id);
           }
         }''', include_docs=True)    
         
 DATABASES = {
new file mode 100644
new file mode 100644
--- /dev/null
+++ b/server/python/junius/proc/base.py
@@ -0,0 +1,93 @@
+import logging
+
+__all__ = ['Rat', 'AccountBase']
+
+class Rat(object):
+  '''
+  Account reasons rationale... this is here to make typing easier...
+  '''
+  #: all whats for this account
+  EVERYTHING = 'everything'
+  #: the problem is with the server (or the network)
+  SERVER = 'server'
+  #: the problem is with the account
+  ACCOUNT = 'account'
+
+  UNREACHABLE = 'unreachable'
+  PASSWORD = 'password'
+  MAINTENANCE = 'maintenace'
+  BUSY = 'busy'
+  #: something is up with the crypto; this needs to be exploded
+  CRYPTO = 'crypto'
+
+  #: good indicates that all-is-well
+  GOOD = 'good'
+  '''
+  Neutral indicates an expected transient lack of success (maintenance,
+   overloaded servers, etc.)  It is tracked (rather than silently not updating
+   good) because it potentially allows for higher-level logic to escalate
+   continued inability to connect to something user-visible.
+
+  For example, twitter being down for short periods of time (at least in the
+   past) was business as usual; there would be no reason to notify the user.
+   Howerver, if twitter is down for an extended period of time, we want to let
+   the user know (in an ambient sort of way) that there's a problem with
+   twitter, and that's why they're not getting any messages.
+
+  The primary difference between a TEMPORARY BAD thing and a TEMPORARY NEUTRAL
+   thing is that we will let the user know about a TEMPORARY BAD thing
+   when it happens.
+  '''
+  NEUTRAL = 'neutral'
+  '''
+  Bad indicates an unexpected problem which may be TEMPORARY or PERMANENT.
+   Temporary problems are expressed to the user in an ambient fashion when
+   they happen, but may not require any action.  If a temporary problem stays
+   a problem for an extended period of time, it will be escalated to a
+   more explicit notification.  A permanent problem requires user action and
+   the user will be immediately notified.
+
+  For example, bad passwords and suspended accounts are permanent problems.  The
+   former is actionable within the UI, whereas the latter is not.  However, it
+   is important that the user be notified at the earliest opportunity so they
+   can take real-world action promptly.  A server being inaccessible is a
+   TEMPORARY BAD problem rather than a TEMPORARY NEUTRAL thing because a user
+   may benefit from knowing their connection or server is flakey.  (Note:
+   temporarily lacking an internet connection is different from a flakey one;
+   we don't want to bother the user if we know they don't have a connection.)
+  '''
+  BAD = 'bad'
+
+  #: temporary implies it may fix itself without user intervention
+  TEMPORARY = 'temporary'
+  #: permanent implies the user must take some action to correct the problem
+  PERMANENT = 'permanent'
+  #: unknown means it either doesn't matter or it could be temporary but the
+  #:  user should potentially still be informed
+  UNKNOWN = 'unknown'
+
+
+class AccountBase(Rat):
+  def reportStatus(self, what, state, why=AccountBase.UNKNOWN,
+                   expectedDuration=AccountBase.UNKNOWN):
+    '''
+    Report status relating to this account.
+
+    Everything is peachy: EVERYTHING GOOD
+    Wrong password: ACCOUNT BAD PASSWORD PERMANENT
+    (may be temporary if a bad password can mean many things)
+    Can't contact server: SERVER BAD UNREACHABLE TEMPORARY
+    Server maintenance: SERVER NEUTRAL MAINTENANCE TEMPORARY
+    (indicates a temporary lapse in service but there's not much we can do)
+    Server busy: SERVER NEUTRAL BUSY TEMPORARY
+    (for example, last.fm will sometimes refuse submission requests)
+    '''
+    pass
+
+  def sync(self):
+    pass
+
+  def verify(self):
+    '''
+    '''
+    pass
new file mode 100644
--- /dev/null
+++ b/server/python/junius/proc/contact.py
@@ -0,0 +1,6 @@
+class ContactProcessor(object):
+  def resolveOrCreateContacts(self, kind, values):
+    
+
+  def processContactIdentity(self, kind, value):
+    pass
new file mode 100644
--- /dev/null
+++ b/server/python/junius/proc/mail.py
@@ -0,0 +1,231 @@
+#!/usr/bin/env python
+
+import base64, datetime, email.utils, email.header
+import pprint
+
+import junius.model as model
+
+class MailProcessor(object):
+  '''
+  Processes python email representation objects.
+  '''
+  def __init__(self, account):
+    self.account = account
+
+    def grok_email_addresses(self, *address_strings):
+        seen_contacts = {}
+        result_lists = []
+        involved_list = []
+        for address_string in address_strings:
+            cur_results = []
+            cur_addresses = email.utils.getaddresses((address_string,))
+            for name, address in cur_addresses:
+                # XXX TODO: we can use 'keys' instead of just key.
+                contacts = model.Contact.by_identity(self.dbs.contacts,
+                                                     key=['email', address])
+                if len(contacts):
+                    # the contact exists, use it
+                    contact = list(contacts)[0]
+                    if contact.id in seen_contacts:
+                        contact = seen_contacts[contact.id]
+                    else:
+                        involved_list.append(contact)
+                        seen_contacts[contact.id] = contact
+                else:
+                    # the contact does't exist, create it
+                    if not name:
+                        name = address
+                    else:
+                        try:
+                            pieces = email.header.decode_header(name)
+                            encoded_pieces = [piece.decode(encoding or "utf-8") for piece, encoding in pieces]
+                            name = u"".join(encoded_pieces)
+                        except (LookupError, UnicodeError):
+                            name = u""+name
+
+                    contact = model.Contact(
+                        name=name,
+                        identities=[{'kind': 'email', 'value': address}]
+                    )
+                    contact.store(self.dbs.contacts)
+                    involved_list.append(contact)
+                    seen_contacts[contact.id] = contact
+                cur_results.append(contact)
+            result_lists.append(cur_results)
+        result_lists.append(involved_list)
+        return result_lists
+
+    def extract_message_id(self, message_id_string, acceptNonDelimitedReferences):
+        # this is a port of my fix for bug 466796, the comments should be ported
+        #  too if we keep this logic...
+        whitespaceEndedAt = None
+        firstMessageIdChar = None
+        foundLessThan = False
+        message_len = len(message_id_string)
+        i = 0
+        while i < message_len:
+            char = message_id_string[i]
+            # do nothing on whitespace
+            if char in r' \r\n\t':
+                pass
+            else:
+                if char == '<':
+                    i += 1 # skip over the '<'
+                    firstMessageIdChar = i
+                    foundLessThan = True
+                    break
+                if whitespaceEndedAt is None:
+                    whitespaceEndedAt = i
+            i += 1
+
+        # if we hit a '<', keep going until we hit a '>' or the end
+        if foundLessThan:
+            while i < message_len:
+                char = message_id_string[i]
+                if char == '>':
+                    # it's valid, update reference, making sure to stop before the '>'
+                    return [message_id_string[firstMessageIdChar:i],
+                            message_id_string[i+1:]]
+                i += 1
+
+        # if we are at the end of the string, we found some non-whitespace,
+        #  and the caller requested that we accept non-delimited whitespace,
+        #  give them that as their reference.  (otherwise, leave it empty)
+        if acceptNonDelimitedReferences and whitespaceEndedAt:
+            return [message_id_string[whitespaceEndedAt:], '']
+        return [None, '']
+
+    def extract_message_ids(self, message_id_string):
+        references = []
+        while message_id_string:
+            ref, message_id_string = self.extract_message_id(message_id_string,
+                                                             not references)
+            if ref:
+                references.append(ref)
+        return references
+
+    def grok_message_conversation(self, imsg):
+        self_header_message_id = imsg.headers['Message-Id'][1:-1]
+        refs_str = imsg.headers.get('References') or imsg.headers.get('In-Reply-To') or ''
+        conversation_id = None
+        conversations = {}
+        self_message = None
+        header_message_ids = self.extract_message_ids(refs_str)
+        unseen = set(header_message_ids)
+
+        # save off the list of referenced messages
+        references = header_message_ids[:]
+        # see if the self-message already exists...
+        header_message_ids.append(self_header_message_id)
+
+        messages = model.Message.by_header_id(self.dbs.messages,
+                                              keys=header_message_ids)
+        for message in messages:
+            if message.header_message_id == self_header_message_id:
+                self_message = message
+            else:
+                unseen.remove(message.header_message_id)
+            conversation_id = message.conversation_id
+
+        if conversation_id is None:
+            # we need to allocate a conversation_id...
+            conversation_id = self_header_message_id
+
+        # create dudes who are missing
+        if unseen:
+            missing_messages = []
+            for header_message_id in unseen:
+                missing_messages.append(model.Message(
+                    conversation_id=conversation_id,
+                    header_message_id=header_message_id,
+                    ))
+            self.dbs.messages.update(missing_messages)
+
+        return conversation_id, self_message, references
+
+    def processMessage(self, imsg):
+        attachments = {}
+        bodyPart = self.grok_part(imsg, imsg.body, attachments)
+
+        # XXX the gocept header logic unfortunately is case-sensitive...
+        # XXX also, doesn't support repeated values...
+        # (but we can live with these limitations for now)
+
+        from_contacts, to_contacts, cc_contacts, involves_contacts = self.grok_email_addresses(
+            imsg.headers.get('From', ''), imsg.headers.get('To', ''),
+            imsg.headers.get('Cc', ''))
+
+        conversation_id, existing_message, references = self.grok_message_conversation(imsg)
+
+        timestamp = email.utils.mktime_tz(email.utils.parsedate_tz(imsg.headers['Date']))
+
+
+        cmsg = model.Message(
+            account_id=self.account_def.id,
+            storage_path=imsg.parent.path,
+            storage_id=int(imsg.UID),
+            #
+            conversation_id=conversation_id,
+            header_message_id=imsg.headers.get('Message-Id')[1:-1],
+            references=references,
+            #
+            from_contact_id=from_contacts[0].id,
+            to_contact_ids=[c.id for c in to_contacts],
+            cc_contact_ids=[c.id for c in cc_contacts],
+            involves_contact_ids=[c.id for c in involves_contacts],
+            #
+            date=datetime.datetime.utcfromtimestamp(timestamp),
+            timestamp=timestamp,
+            #
+            read=r'\Seen' in imsg.flags,
+            #
+            headers=dict(imsg.headers),
+            bodyPart=bodyPart,
+            _attachments=attachments
+        )
+        if existing_message:
+            cmsg.id = existing_message.id
+            # this is ugly, we should really just have the logic above use a
+            #  style that allows it to work with new or existing...
+            cmsg._data['_rev'] = existing_message.rev
+
+        cmsg.store(self.dbs.messages)
+
+
+    def grok_part(self, msg, part, attachments, depth=0):
+        contentType = part['content_type']
+        partNumber = part['partnumber']
+        me = {'contentType': contentType,
+              'partNumber': partNumber}
+        if contentType.startswith('multipart/'):
+            parts = me['parts'] = []
+            for subpart in part.parts:
+                parts.append(self.grok_part(msg, subpart, attachments, depth+1))
+        else:
+            me['parameters'] = part['parameters']
+            data = part.fetch()
+            # XXX perhaps we should recursively process the nested part dude?
+            # (if contentType == 'message/rfc822')
+            if contentType.startswith('text/'):
+                me['data'] = data
+            else:
+                attachments[partNumber] = {'content_type': contentType,
+                                           'data': base64.b64encode(data)}
+        return me
+
+class Grabber(object):
+    def __init__(self, dbs):
+        self.dbs = dbs
+
+    def syncAccounts(self):
+        for account in model.Account.all(self.dbs.accounts):
+            if account.kind == 'imap':
+                junius_account = JuniusAccount(self.dbs, account)
+                junius_account.sync()
+
+if __name__ == '__main__':
+    import os
+    #acct = JuniusAccount('localhost', 8143, os.environ['USER'], 'pass')
+    dbs = model.fab_db()
+    grabber = Grabber(dbs)
+    grabber.syncAccounts()
new file mode 100644
--- /dev/null
+++ b/server/python/junius/proc/message.py
@@ -0,0 +1,6 @@
+
+class MessageProcessor(object):
+  '''
+  Common message processing superclass; e-mail messages, twitter messages,
+  '''
+  
\ No newline at end of file
new file mode 100644
--- /dev/null
+++ b/server/python/junius/proto/__init__.py
@@ -0,0 +1,7 @@
+import junius.proto.imap as imap
+import junius.proto.twitter as twitter
+
+protocols = {
+    'imap': imap.IMAPAccount,
+    'twitter': twitter.TwitterAccount,
+}
new file mode 100644
--- /dev/null
+++ b/server/python/junius/proto/imap.py
@@ -0,0 +1,96 @@
+from twisted.internet import protocol, ssl, defer, reactor
+from twisted.mail import imap4
+
+from junius.proc import base
+brat = base.Rat
+
+class ImapClient(imap4.IMAP4Client):
+  '''
+  Much of our logic here should perhaps be in another class that holds a
+  reference to the IMAP4Client instance subclass.  Consider refactoring if we
+  don't turn out to benefit from the subclassing relationship.
+  '''
+  def serverGreeting(self, caps):
+    d = self._doAuthenticate()
+    d.addCallback(self._reqList)
+
+
+  def _doAuthenticate(self):
+    if self.account.details['crypto'] == 'TLS':
+      d = self.startTLS(self.factory.ctx)
+      d.addErrback(self.accountStatus,
+                   brat.SERVER, brat.BAD, brat.CRYPTO, brat.PERMANENT)
+      d.addCallback(self._doLogin)
+    else:
+      d = self._doLogin()
+    d.addErrback(self.accountStatus,
+                 brat.SERVER, brat.BAD, brat.PASSWORD, brat.PERMANENT)
+    return d
+
+
+  def _doLogin(self, *args, **kwargs):
+    return self.login(self.account.details['username'],
+                      self.account.details['password'])
+
+  def _reqList(self, *args, **kwargs):
+    self.account.reportStatus(brat.EVERYTHING, brat.GOOD)
+    return self.list('', '*').addCallback(self._procList)
+
+  def _procList(self, result, *args, **kwargs):
+    for flags, delim, name in result:
+      print 'Mailbox', flags, delim, name
+
+  def accountStatus(self, result, *args):
+    self.account.reportStatus(*args)
+
+
+class ImapClientFactory(protocol.ClientFactory):
+  protocol = ImapClient
+
+  def __init__(self, account):
+    self.account = account
+
+    self.ctx = ssl.ClientContextFactory()
+    self.backoff = 8 # magic number
+
+  def buildProtocol(self, addr):
+    p = self.protocol(self.ctx)
+    p.factory = self
+    p.account = self.account
+    return p
+
+  def connect(self):
+    details = self.account.details
+    self.account.log.debug('attempting to connect to %s:%d (ssl: %s)',
+                           details['host'], details['port'], details['ssl'])
+    if self.details.get('ssl'):
+      reactor.connectSSL(details['host'], details['port'], self, self.ctx)
+    else:
+      reactor.connectTCP(details['host'], details['port'], self)
+
+  def clientConnectionLost(self, connector, reason):
+    # the flaw in this is that we start from scratch every time; which is why
+    #  most of the logic in the client class should really be pulled out into
+    #  the account logic, probably.  this class itself may have issues too...
+    self.account.log.debug(
+      'lost connection to server, going to reconnect in a bit')
+    reactor.callLater(2, self.connect)
+
+  def clientConnectionFailed(self, connector, reason):
+    self.account.reportStatus(brat.SERVER, brat.BAD, brat.UNREACHABLE,
+                              brat.TEMPORARY)
+    self.account.log.warning('Failed to connect, will retry after %d secs',
+                             self.backoff)
+    reactor.callLater(self.backoff, self.connect)
+    self.backoff = min(self.backoff * 2, 600) # magic number
+
+
+class IMAPAccount(base.AccountBase):
+  def __init__(self, db, details):
+    self.db = db
+    self.details = details
+    self.log = logging.getLogger('imap')
+
+  def startSync(self, conductor):
+    self.factory = ImapClientFactory(self)
+    self.factory.connect()
new file mode 100644
new file mode 100644
--- /dev/null
+++ b/server/python/junius/sync.py
@@ -0,0 +1,60 @@
+import logging
+
+from twisted.internet import reactor, defer
+import paisley
+
+import junius.config
+import junius.proto as proto
+
+config = junius.config.get_config()
+
+DB = None
+def get_db(couchname="local"):
+  global DB
+  if DB is None:
+    local = config.couches['local']
+    DB = paisley.CouchDB(local['host'], local['port'], local['name'])
+  return DB
+
+class SyncConductor(object):
+  def __init__(self):
+    self.log = logging.getLogger('sync')
+
+    self.db = get_db()
+    self.db_name = config.db
+
+    self.active_accounts = []
+
+  def _ohNoes(self, failure, *args, **kwargs):
+    self.log.error('OH NOES! failure! %s', str(failure))
+    reactor.stop()
+
+  def _getAllAccounts(self):
+    return self.db.openView('accounts', 'all'
+      ).addCallBack(self._gotAllAccounts
+      ).addErrback(self._ohNoes)
+
+  def _gotAllAccounts(self, rows, *args, **kwargs):
+    for row in rows:
+      account_details = row['value']
+      if account_details['kind'] in proto.protocols:
+        account = proto.protocols[account['kind']](self.db, account_details)
+        log.info('Starting sync of %s account: %s',
+                 account_details['kind'],
+                 account_details.get('name', '(un-named)'))
+        account.startSync(self)
+        self.active_accounts.append(account)
+      else:
+        log.error("Don't know what to do with account kind: %s",
+                  account_details['kind'])
+
+  def sync(self):
+    reactor.callWhenRunning(self._getAllAccounts)
+    self.log.debug('starting reactor')
+    reactor.run()
+    self.log.debug('reactor done')
+
+
+if __name__ == '__main__':
+  conductor = SyncConductor()
+  conductor.sync()
new file mode 100644
--- /dev/null
+++ b/tests/__init__.py
@@ -0,0 +1,1 @@
+# doot doot