By using this site, you agree to our updated Privacy Policy and our Terms of Use. Manage your Cookies Settings.
440,686 Members | 1,510 Online
Bytes IT Community
+ Ask a Question
Need help? Post your question and get tips & solutions from a community of 440,686 IT Pros & Developers. It's quick & easy.

problem with the logic of read files

P: n/a
I am new to python and I am not in computer science. In fact I am a biologist and I ma trying to learn python. So if someone can help me, I will appreciate it.
Thanks
#!/cbi/prg/python/current/bin/python
# -*- coding: iso-8859-1 -*-
import sys
import os
from progadn import *

ab1seq = raw_input("Entrez le répertoire où sont les fichiers à analyser: ") or None
if ab1seq == None :
print "Erreur: Pas de répertoire! \n"
"\nAu revoir \n"
sys.exit()

listrep = os.listdir(ab1seq)
#print listrep

extseq=[]

for f in listrep:
if f[-4:]==".Seq":
extseq.append(f)
# print extseq

for x in extseq:
f = open(x, "r")
seq=f.read()
f.close()
s=seq

def checkDNA(seq):
"""Retourne une liste des caractères non conformes à l'IUPAC."""

junk=[]
for c in range (len(seq)):
if seq[c] not in iupac:
junk.append([seq[c],c])
#print junk
print "ATTN: Il y a le caractère %s en position %s " % (seq[c],c)
if junk == []:
indinv=range(len(seq))
indinv.reverse()
resultat=""
for i in indinv:
resultat +=comp[seq[i]]
return resultat

seq=checkDNA(seq)
print seq
#I got the following ( as you see only one file is proceed by the function even if more files is in extseq

['B1-11_win3F_B04_04.ab1.Seq']
['B1-11_win3F_B04_04.ab1.Seq', 'B1-11_win3R_C04_06.ab1.Seq']
['B1-11_win3F_B04_04.ab1.Seq', 'B1-11_win3R_C04_06.ab1.Seq', 'B1-18_win3F_D04_08.ab1.Seq']
['B1-11_win3F_B04_04.ab1.Seq', 'B1-11_win3R_C04_06.ab1.Seq', 'B1-18_win3F_D04_08.ab1.Seq', 'B1-18_win3R_E04_10.ab1.Seq']
['B1-11_win3F_B04_04.ab1.Seq', 'B1-11_win3R_C04_06.ab1.Seq', 'B1-18_win3F_D04_08.ab1.Seq', 'B1-18_win3R_E04_10.ab1.Seq', 'B1-19_win3F_F04_12.ab1.Seq']
...
['B1-11_win3F_B04_04.ab1.Seq', 'B1-11_win3R_C04_06.ab1.Seq', 'B1-18_win3F_D04_08.ab1.Seq', 'B1-18_win3R_E04_10.ab1.Seq', 'B1-19_win3F_F04_12.ab1.Seq', 'B1-19_win3R_G04_14.ab1.Seq', 'B90_win3F_H04_16.ab1.Seq', 'B90_win3R_A05_01.ab1.Seq', 'DL2-11_win3F_H03_15.ab1.Seq', 'DL2-11_win3R_A04_02.ab1.Seq', 'DL2-12_win3F_F03_11.ab1.Seq', 'DL2-12_win3R_G03_13.ab1.Seq', 'M7757_win3F_B05_03.ab1.Seq', 'M7757_win3R_C05_05.ab1.Seq', 'M7759_win3F_D05_07.ab1.Seq', 'M7759_win3R_E05_09.ab1.Seq', 'TCR700-114_win3F_H05_15.ab1.Seq', 'TCR700-114_win3R_A06_02.ab1.Seq', 'TRC666-100_win3F_F05_11.ab1.Seq', 'TRC666-100_win3R_G05_13.ab1.Seq']

after this listing my programs proceed only the last element of this listing (TRC666-100_win3R_G05_13.ab1.Seq)

NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNTCCCGAAGTGTCCCAGAGCA AATAAATGGACCAAAACGTTTTTAGAATACTTGAACGTGTAATCTCATTT TAA
Jul 18 '05 #1
Share this Question
Share on Google+
3 Replies


P: n/a
gry

<m_t...@yahoo.com> wrote:
I am new to python and I am not in computer science. In fact I am a biologist and I ma trying to learn python. So if someone can help me, I
will appreciate it. Thanks
#!/cbi/prg/python/current/bin/python
# -*- coding: iso-8859-1 -*-
import sys
import os
from progadn import *

ab1seq = raw_input("Entrez le répertoire où sont les fichiers à analyser: ") or None if ab1seq == None :
print "Erreur: Pas de répertoire! \n"
"\nAu revoir \n"
sys.exit()

listrep = os.listdir(ab1seq)
#print listrep

extseq=[]

for f in listrep: ###### Minor -- this is better said as: if f.endswith(".Seq"): if f[-4:]==".Seq":
extseq.append(f)
# print extseq

for x in extseq:
f = open(x, "r") ###### seq=... discards previous data and refers only to that just
read.
###### It would be simplest to process each file as it is read:
@@@@@@ seq=f.read()
@@@@@@ checkDNA(seq) seq=f.read()
f.close()
s=seq

def checkDNA(seq):
"""Retourne une liste des caractères non conformes à l'IUPAC."""
junk=[]
for c in range (len(seq)):
if seq[c] not in iupac:
junk.append([seq[c],c])
#print junk
print "ATTN: Il y a le caractère %s en position %s " % (seq[c],c) if junk == []:
indinv=range(len(seq))
indinv.reverse()
resultat=""
for i in indinv:
resultat +=comp[seq[i]]
return resultat

seq=checkDNA(seq)
print seq
##### The program segment you posted did not define "comp" or "iupac",
##### so it's a little hard to guess how it's supposed to work. It
would
##### be helpful if you gave a concise description of what you want the

##### program to do, as well as brief sample of input data.
##### I hope this helps! -- George
#I got the following ( as you see only one file is proceed by the function even if more files is in extseq
['B1-11_win3F_B04_04.ab1.Seq']
['B1-11_win3F_B04_04.ab1.Seq', 'B1-11_win3R_C04_06.ab1.Seq']
['B1-11_win3F_B04_04.ab1.Seq', 'B1-11_win3R_C04_06.ab1.Seq', 'B1-18_win3F_D04_08.ab1.Seq'] ['B1-11_win3F_B04_04.ab1.Seq', 'B1-11_win3R_C04_06.ab1.Seq', 'B1-18_win3F_D04_08.ab1.Seq', 'B1-18_win3R_E04_10.ab1.Seq'] ['B1-11_win3F_B04_04.ab1.Seq', 'B1-11_win3R_C04_06.ab1.Seq', 'B1-18_win3F_D04_08.ab1.Seq', 'B1-18_win3R_E04_10.ab1.Seq',
'B1-19_win3F_F04_12.ab1.Seq'] ..
['B1-11_win3F_B04_04.ab1.Seq', 'B1-11_win3R_C04_06.ab1.Seq', 'B1-18_win3F_D04_08.ab1.Seq', 'B1-18_win3R_E04_10.ab1.Seq',
'B1-19_win3F_F04_12.ab1.Seq', 'B1-19_win3R_G04_14.ab1.Seq',
'B90_win3F_H04_16.ab1.Seq', 'B90_win3R_A05_01.ab1.Seq',
'DL2-11_win3F_H03_15.ab1.Seq', 'DL2-11_win3R_A04_02.ab1.Seq',
'DL2-12_win3F_F03_11.ab1.Seq', 'DL2-12_win3R_G03_13.ab1.Seq',
'M7757_win3F_B05_03.ab1.Seq', 'M7757_win3R_C05_05.ab1.Seq',
'M7759_win3F_D05_07.ab1.Seq', 'M7759_win3R_E05_09.ab1.Seq',
'TCR700-114_win3F_H05_15.ab1.Seq', 'TCR700-114_win3R_A06_02.ab1.Seq',
'TRC666-100_win3F_F05_11.ab1.Seq', 'TRC666-100_win3R_G05_13.ab1.Seq']
after this listing my programs proceed only the last element of this listing (TRC666-100_win3R_G05_13.ab1.Seq)

NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNTCCCGAAGTGTCCCAGAGCA AATAAATGGACCAAAACGTTTTTAGAATACTTGAACGTGTAATCTCATTT TAA

Jul 18 '05 #2

P: n/a
m_****@yahoo.com wrote:
#!/cbi/prg/python/current/bin/python
# -*- coding: iso-8859-1 -*-
import sys
import os
from progadn import *
...
for x in extseq:
f = open(x, "r")
seq=f.read()
f.close()
s=seq

def checkDNA(seq):
...

seq=checkDNA(seq)
print seq


You terminated the loop to define checkDNA.

What you want is:
...
from progadn import *
def checkDNA(seq):
...

...
for x in extseq:
f = open(x, "r")
seq=f.read()
f.close()
s=seq
seq = checkDNA(seq)
print seq

Even better might be:
...
from progadn import *
def checkDNA(seq):
...

def main():
...
for x in extseq:
f = open(x, "r")
try:
print checkDNA(f.read())
finally:
f.close()
if __name__ = '__main__':
main()

--Scott David Daniels
Sc***********@Acm.Org
Jul 18 '05 #3

P: n/a
You may also be interested in the biopython project:
http://www.biopython.org/

tom

Jul 18 '05 #4

This discussion thread is closed

Replies have been disabled for this discussion.