471,571 Members | 1,632 Online
Bytes | Software Development & Data Engineering Community
Post +

Home Posts Topics Members FAQ

Join Bytes to post your question to a community of 471,571 software developers and data experts.

email parsing

It is my first script on python. Don't know is it correctly uses
modules, but it is working fine with specially with russian code pages
and mime formated messages. Also quoted-printable and base64
encoded....

It will be very good if anybody post any comments on this script. Is
it good or bad...
import email
import mailbox
from email.Header import decode_header
from email.Header import make_header
import string
import sys

outEnc="cp866"
infile=sys.argv[1]

subStrObrez = []
subStrObrez.append("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~")
subStrObrez.append("""~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~
To UNSUBSCRIBE from this forum, send an email to:""")
subStrObrez.append("~~~~~~~~~~~~~~~~~~")

# Cut yahoo info at the end of message
def obrez(strMsg):
for s in subStrObrez:
n = string.rfind(strMsg,s)
if n != -1:
return strMsg[0:n]
return strMsg

# Convert message header
def my_get_header(str):
str2=""
for val,encoding in decode_header(str):
if encoding:
str2 = str2+ val.decode(encoding)+" "
else:
str2 = str2+ val+" "
return str2

# Process the message
def proc(msg):
print 'From : '+ my_get_header(msg['From']).encode(outEnc)
print 'To : '+ my_get_header(msg['To']).encode(outEnc)
print 'Subject: '+ my_get_header(msg['Subject']).encode(outEnc)
print

if msg.is_multipart():
for part in msg.walk():
if part.get_content_type() == "text/plain":
if part.get_content_charset():
print
obrez(part.get_payload(None,True).decode(part.get_ content_charset()).encode(outEnc))
else:
print obrez(part.get_payload(None,True))

else:
if msg.get_content_type() == "text/plain":
if msg.get_content_charset():
print
obrez( (msg.get_payload(None,True)).decode(msg.get_conten t_charset()) ).encode(outEnc)
else:
print obrez( msg.get_payload(None,True) )
else:
if msg.get_content_type() == "text/html":
if msg.get_content_charset():
print
(msg.get_payload(None,True)).decode(msg.get_conten t_charset()).encode(outEnc)
else:
print msg.get_payload(None,True)
################################################## ##################################
# The main program

f = open(infile, "rb")
m1 = mailbox.UnixMailbox(f)

RubLst=[]
RubLst.append(["[contestru]","FOTSTR"])
RubLst.append(["[russiandx]","FORUDX"])

for msg in mailbox.UnixMailbox(f,email.message_from_file):
for rub in RubLst:
if string.find(my_get_header(msg['Subject']),rub[0]) != -1 :
print "SB "+rub[1]+"@FORUM < INET"
print my_get_header(msg['Subject']).encode(outEnc)
print
proc(msg)
print
print "powered by Python"
print "/EX"
Aug 27 '08 #1
0 797

This discussion thread is closed

Replies have been disabled for this discussion.

Similar topics

8 posts views Thread by Gerrit Holl | last post: by
3 posts views Thread by dont bother | last post: by
reply views Thread by Barry Warsaw | last post: by
19 posts views Thread by 叮叮当当 | last post: by
reply views Thread by Li-fan Chen | last post: by
9 posts views Thread by Jerim79 | last post: by
1 post views Thread by mneagul | last post: by
13 posts views Thread by Chris Carlen | last post: by
1 post views Thread by Gerardo Herzig | last post: by
reply views Thread by Ahmed, Shakir | last post: by
reply views Thread by leo001 | last post: by
reply views Thread by lumer26 | last post: by
reply views Thread by lumer26 | last post: by

By using Bytes.com and it's services, you agree to our Privacy Policy and Terms of Use.

To disable or enable advertisements and analytics tracking please visit the manage ads & tracking page.