On Apr 25, 2:51 pm, Larry Bates <larry.ba...@websafe.comwrote:
andrew.jeffer...@gmail.com wrote:
Hi,
I'm trying to write a simple log parsing program. I noticed that it
isn't reading my log file to the end.
My log is around 200,000 lines but it is stopping at line 26,428. I
checked that line and there aren't any special characters.
This is the file reading code segment that I'm using:
sysFile=open(sysFilename,'r')
lineCount = 0
for line in sysFile:
lineCount +=1
print str(lineCount) + " -- " + line
I also stuck this same code bit into a test script and it was able to
parse the entire log without problem. Very quirky.
This is my first foray from Perl to Python so I appreciate any help.
Thanks in advance.
--Andrew
Show us more of your surrounding code so we have some chance of figuring
out why this working code stops. There's nothing wrong with this code,
the problem is somewhere else.
Suggestion:
lineCount = 0
for line in sysFile:
lineCount +=1
print str(lineCount) + " -- " + line
can be written:
for lineCount, line in enumerate(sysFile):
print "%i--%s" % (lineCount, line)
-Larry
Hi Larry,
I've attached the whole script. Thanks again for your help.
--Andrew
import getopt, sys, re, os
def main():
try:# Get options for processing
o, a = getopt.getopt(sys.argv[1:], 'a:d:hl')
except getopt.GetoptError:
# print help information and exit:
usage()
sys.exit(2)
opts = {}
for k,v in o: #Parse parameters into
hash
opts[k] = v
#make sure that all of the needed options are included
if opts.has_key('-h'): #Return help for -
h
usage()
sys.exit(0)
if opts.has_key('-l'):
pathname = opts['-l']
if not (opts.has_key('-a')):
usage()
sys.exit()
else:
address=opts['-a']
if not (opts.has_key('-d')):
usage()
sys.exit()
if not (opts.has_key('-l')): # Use current path if not provided
pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
# Get file names and open files
sysFilename = os.path.abspath(pathname) + "\sys" + opts["-d"] +
".txt"
#logFilename = opts["-l"] + "\log" + opts["-d"] + ".txt"
spamFilename = os.path.abspath(pathname) + "\spam" + opts["-d"] +
".log"
print "Loading Files:\n" + sysFilename + "\n" + spamFilename +
"\n"
try: #Open log files
sysFile=open(sysFilename,'rb')
#logFile=open(logFilename,'r')
spamFile=open(spamFilename,'rb')
except:
print "could not open file for reading" , sys.exc_info()[0]
sys.exit()
ToAddr = {} # This will hold Messages TO the address
FrAddr = {} # This will hold Messages FROM the address
numFound = 0 # For Testing
notFound = 0 # For Testing
lineCount = 0 # For Testing
#Read file and get message IDs that correspond to the searched
address
for line in sysFile:
lineCount +=1 # For Testing
# print str(lineCount) + " -- " + line
daRegex = re.compile(address)
if daRegex.search(line): #Found address in line - Continue
processing
#re.search(address,line): #If line has address
print line + "\n" # For Testing
numFound +=1 # For Testing
if re.search('MAIL FROM:',line): #Add it (message id) to
the From list if needed
MID = getMID(line)
if FrAddr.has_key(MID):
break
else:
FrAddr[MID]=""
#print "From: " + MID + "\n"
elif re.search('RCPT TO:',line): #Add it (message id) to
the To list if needed
MID = getMID(line)
if ToAddr.has_key(MID):
break
else:
ToAddr[MID]=""
else:
notFound +=1 #For Testing
# Close and re-open file for re-processes (there is probably a
better way to do this)
sysFile.close
sysFile=open(sysFilename,'r')
for line in sysFile: # Get all messages with message IDs that have
been found
MID = getMID(line)
if FrAddr.has_key(MID):
FrAddr[MID]+=line
# print line + "\n"
elif ToAddr.has_key(MID):
ToAddr[MID]+=line
sysFile.close
for line in spamFile: # Get similar messages from spam file
MID = getMID(line)
if FrAddr.has_key(MID):
FrAddr[MID]+='SPAM>>>'+ line
elif ToAddr.has_key(MID):
ToAddr[MID]+='SPAM>>>'+ line
spamFile.close
#open output files
fname = pathname + "\\" + address + ".txt"
fout = open(fname,'w')
# Output and format
for key in FrAddr.keys():
fout.write("<<<<<<< FROM "+ address+ " Message ID "+ key
+ "------------\n")
fout.write(FrAddr[key]+"\n")
for key in ToAddr.keys():
fout.write(">>>>>>To "+ address+ " Message ID "+ key
+ "------------\n")
fout.write(ToAddr[key]+"\n")
print "------------------- Done processing
---------------------"
print "Found: " + str(numFound) #Test
print "Not matching: " + str(notFound) #Test
print "Line Cound: " + str(lineCount) #test
fout.close
def getMID(daLine): #Extracts the message ID from the message
p = re.compile("\(.*?\)")
pid=p.search(daLine)
if pid:
id=pid.group()
id=id.lstrip('\(')
id=id.rstrip('\)')
#print id
return id
else:
return
def usage(): # Provides usage feedback
print """
Syntax:
-a email account to find
-l location of log files (OPTIONAL)
-d date, in file date format (####)
"""
if __name__ == "__main__": # Call mail loop
main()