#!/usr/bin/env python """Pretty print a mbox -- since some previous date -- as a simple HTML file""" import os, re, sys import textwrap import cgi from email.Utils import parsedate # http://docs.python.org/lib/module-email.Utils.html import email.Parser, mailbox import time from datetime import * # http://docs.python.org/lib/datetime-date.html from dateutil.tz import * from dateutil.parser import * # http://labix.org/python-dateutil from dateutil.relativedelta import * import markup # http://markup.sourceforge.net/#documentation def msgfactory(fp): try: return email.message_from_file(fp) except email.Errors.MessageParseError: # Don't return None since that will # stop the mailbox iterator return '' def main(): if len(sys.argv)==1: print "Usage: %s filename" % os.path.basename(sys.argv[0]) sys.exit(1) fp = open(sys.argv[1], 'r') mbox = mailbox.UnixMailbox(fp, msgfactory) page = markup.page( ) page.init( title="Student Responses", css='http://reagle.org/joseph/2005/01/mm-print.css', charset = 'utf-8') # http://reagle.org/joseph/2005/01/mm-print.css now = datetime.now(tzlocal()) # could use (pytz.timezone('US/Eastern')) for msg in mbox: subject = sender = msg_date = None subject = cgi.escape(msg.get('subject')) sender = cgi.escape(msg.get('from').split('@')[0] + '>') msg_date = parse(msg.get('date')) if now.today().weekday() in (2,3,4,5): # WE,TH,FR,SA msgs since prev TU # previous class is the previous TU ending at noon tzlocal prev_class = now+relativedelta(hour=12,minute=0,second=0,weekday=TU(-1)) elif now.today().weekday() in (6,0,1): # SU,MO,TU msgs since prev TH prev_class = now+relativedelta(hour=12,minute=0,second=0,weekday=TH(-1)) if msg_date > prev_class: page.div.open() page.hr() page.h1(subject, class_='subject') page.h2(sender, class_='sender') page.h2(str(msg_date), class_='msg_date') for part in msg.walk(): #print part.get_content_subtype() if part.get_content_subtype() == 'plain': charset = part.get_content_charset() content = part.get_payload(decode=True).decode(charset,'replace').encode('utf-8','replace') content = cgi.escape(content) lines = content.split('\n') if any(len(line) > 100 for line in lines): pass # hasn't been wrapped else: # un/re-wrap content = content.replace ('\n\n', '***!!!') content = content.replace ('\n', ' ') # rm artifical breaks content = content.replace ('***!!!', '\n\n') lines = content.split('\n\n') for line in lines: page.p(''.join(line)) else: if part.get_content_subtype() == 'msword' or \ part.get_content_subtype() == 'octet-stream': tmpf = '/tmp/mail-part-msw' tmpft = '/tmp/mail-part-msw.txt' os.system('rm %s %s' % (tmpf,tmpft) ) charset = part.get_content_charset() content = part.get_payload(decode=True) tmpfp = open(tmpf, 'w') tmpfp.write(content) tmpfp.close() os.system('catdoc %s > %s' % (tmpf,tmpft) ) tmpftpt = open(tmpft, 'r') content = ' '.join(tmpftpt.readlines()) content = cgi.escape(content) page.pre(content) page.div.close( ) print page class opts: recent = False if __name__=="__main__": import getopt, os, sys try: options, arguments = getopt.getopt (sys.argv[1:],"r") except getopt.error: print 'Error: Unknown option or missing argument.' for opt, val in options: if opt == '-r': opts.recent = True main()