472,958 Members | 2,178 Online

50
Hi,

With a list of fixed length strings, I want to count the occurrences of each characters at each of 9 positions. I then want to return the top 2 results for each position. The result has to be a list for the function I am passing this too. The code I have so far has two rather big problems (1) it is too slow and (2) it gives the wrong results :(

Expand|Select|Wrap|Line Numbers
1. dd ={'.LEA.....':77,'R....L...':8,'.L....DA.':5,'.L.R.V..L':4,'A....S.SA':55,'QL..L....':5,'M.SC.SE..':77}
2.
3.
4. def positionalWeights(dd, topx=2):
5.     posList = [[] for i in range(9)]
6.
7.     for key in dd.keys():
8.         for i, item in enumerate(key):
9.             if item != '.':
10.                 if posList[i]==[]:
11.                     posList[i].append([item, 1])
12.                 else:
13.                     for c in posList[i]:
14.                         if c[0] != item:
15.                             posList[i].append([item,1])
16.                         else:
17.                             c[1] += 1
18.
19.     for i in posList:
20.         for j in i:
21.             j.sort()
22.
23.     y =[]
24.     for i in posList:
25.         x = topx
26.         for j, k in enumerate(i):
27.             z=[]
28.             while x > 0:
29.                 z.append(k[1])
30.                 x-=1
31.             y.append(z)
32.     return y
33.
34. pw= positionalWeights(dd)
35. print pw
36.
37.
38. >>>
39. [['A', 'A'], [], [], [], [], [], [], [], ['L', 'L'], ['S', 'S'], [], ['R', 'R'], [], [], [], ['L', 'L'], ['S', 'S'], [], [], [], [], [], [], ['D', 'D'], [], ['S', 'S'], [], ['A', 'A'], []]
40.
41.
Sep 18 '07 #1
8 1474
I think I got the first part working:
Expand|Select|Wrap|Line Numbers
1. posList = [[] for i in range(9)]
2.
3. for key in dd.keys():
4.     for i, item in enumerate(key):
5.         if item != '.':
6.             if posList[i]==[]:
7.                 posList[i].append([item, 1])
8.             else:
9.                 found = False
10.                 for c in posList[i]:
11.                     if c[0] == item:
12.                         c[1] += 1
13.                         found = True
14.                 if not found:
15.                     posList[i].append([item,1])
You wern't checking every list in a posList index before you inserted a new list.
Sep 18 '07 #2
kdt
50
I think I got the first part working:
Expand|Select|Wrap|Line Numbers
1. posList = [[] for i in range(9)]
2.
3. for key in dd.keys():
4.     for i, item in enumerate(key):
5.         if item != '.':
6.             if posList[i]==[]:
7.                 posList[i].append([item, 1])
8.             else:
9.                 found = False
10.                 for c in posList[i]:
11.                     if c[0] == item:
12.                         c[1] += 1
13.                         found = True
14.                 if not found:
15.                     posList[i].append([item,1])
You wern't checking every list in a posList index before you inserted a new list.
cheers mate, made quite a few mistakes in this one. Finally got it working now- yipee! Still if anyone can propose speed tips, I'm using psyco, and it doesn't seem to be making much difference :S

Expand|Select|Wrap|Line Numbers
1. def positionalWeights(dd, topx=2):
2.     posList = [[] for i in range(9)]
3.
4.     for key in dd.keys():
5.         for i, item in enumerate(key):
6.             if item != '.':
7.                 if posList[i]==[]:
8.                     posList[i].append([1, item])
9.                 else:
10.                     found = False
11.                     for c in posList[i]:
12.                         if c[1] == item:
13.                             c[0] += 1
14.                             found = True
15.                     if not found:
16.                         posList[i].append([1, item])
17.
18.     for i in posList:
19.         i.sort()
20.         i.reverse()
21.
22.     y =[]
23.     for i in posList:
24.         x = topx
25.         z=[]
26.         while x>0:
27.             z.append(i[x][1])
28.             x-=1
29.         print z
30.         y.append(z)
31.     return y
32.
33. >>>
34. ['Y', 'M']
35. ['V', 'M']
36. ['L', 'M']
37. ['Y', 'E']
38. ['L', 'K']
39. ['F', 'L']
40. ['L', 'Y']
41. ['L', 'Y']
42. ['I', 'L']
43. [['Y', 'M'], ['V', 'M'], ['L', 'M'], ['Y', 'E'], ['L', 'K'], ['F', 'L'], ['L', 'Y'], ['L', 'Y'], ['I', 'L']]
44.
Sep 18 '07 #3
bvdet
2,851 Expert Mod 2GB
See if this helps any:
Expand|Select|Wrap|Line Numbers
1. def positionalWeights(dd, topx=2):
2.     posDict = [{} for i in range(len(dd.keys()[0]))]
3.     for item in dd.keys():
4.         item = list(item)
5.         pos = 0
6.         while True:
7.             try:
8.                 s = item.pop(0)
9.                 if s != '.':
10.                     if posDict[pos].has_key(s):
11.                         posDict[pos][s] += 1
12.                     else:
13.                         posDict[pos][s] = 1
14.                 pos += 1
15.             except: break
16.
17.     result = []
18.
19.     for dd in posDict:
20.         a = sorted(zip(dd.values(), dd.keys()), reverse=True)
21.         print a
22.         try:
23.             result.append([[a[0][0], a[0][1]]])
24.             for i in range(topx-1):
25.                 result[-1].append([a[i+1][0], a[i+1][1]])
26.         except IndexError, e:
27.             pass
28.
29.     return result
30.
31. s = positionalWeights(dd, 2)
32.
33. for i, item in enumerate(s):
34.     for j in item:
35.         print 'Position %d: %s = %d' % (i, j[1], j[0])
36.
37.
Output:
>>> [(1, 'R'), (1, 'Q'), (1, 'M'), (1, 'A')]
[(4, 'L')]
[(1, 'S'), (1, 'E')]
[(1, 'R'), (1, 'C'), (1, 'A')]
[(1, 'L')]
[(2, 'S'), (1, 'V'), (1, 'L')]
[(1, 'E'), (1, 'D')]
[(1, 'S'), (1, 'A')]
[(1, 'L'), (1, 'A')]
Position 0: R = 1
Position 0: Q = 1
Position 1: L = 4
Position 2: S = 1
Position 2: E = 1
Position 3: R = 1
Position 3: C = 1
Position 4: L = 1
Position 5: S = 2
Position 5: V = 1
Position 6: E = 1
Position 6: D = 1
Position 7: S = 1
Position 7: A = 1
Position 8: L = 1
Position 8: A = 1
>>>
Sep 18 '07 #4
kdt
50
See if this helps any:
Expand|Select|Wrap|Line Numbers
1. def positionalWeights(dd, topx=2):
2.     posDict = [{} for i in range(len(dd.keys()[0]))]
3.     for item in dd.keys():
4.         item = list(item)
5.         pos = 0
6.         while True:
7.             try:
8.                 s = item.pop(0)
9.                 if s != '.':
10.                     if posDict[pos].has_key(s):
11.                         posDict[pos][s] += 1
12.                     else:
13.                         posDict[pos][s] = 1
14.                 pos += 1
15.             except: break
16.
17.     result = []
18.
19.     for dd in posDict:
20.         a = sorted(zip(dd.values(), dd.keys()), reverse=True)
21.         print a
22.         try:
23.             result.append([[a[0][0], a[0][1]]])
24.             for i in range(topx-1):
25.                 result[-1].append([a[i+1][0], a[i+1][1]])
26.         except IndexError, e:
27.             pass
28.
29.     return result
30.
31. s = positionalWeights(dd, 2)
32.
33. for i, item in enumerate(s):
34.     for j in item:
35.         print 'Position %d: %s = %d' % (i, j[1], j[0])
36.
37.
Output:
>>> [(1, 'R'), (1, 'Q'), (1, 'M'), (1, 'A')]
[(4, 'L')]
[(1, 'S'), (1, 'E')]
[(1, 'R'), (1, 'C'), (1, 'A')]
[(1, 'L')]
[(2, 'S'), (1, 'V'), (1, 'L')]
[(1, 'E'), (1, 'D')]
[(1, 'S'), (1, 'A')]
[(1, 'L'), (1, 'A')]
Position 0: R = 1
Position 0: Q = 1
Position 1: L = 4
Position 2: S = 1
Position 2: E = 1
Position 3: R = 1
Position 3: C = 1
Position 4: L = 1
Position 5: S = 2
Position 5: V = 1
Position 6: E = 1
Position 6: D = 1
Position 7: S = 1
Position 7: A = 1
Position 8: L = 1
Position 8: A = 1
>>>

Thanks mate, it looks like the speed issue is from another part of the program. I'll definately use parts of this (especially for learning, I need to use try: except more) :)
Sep 18 '07 #5
bvdet
2,851 Expert Mod 2GB
I eliminated one of the try/except blocks by substituting 'while True' for 'while len(item)' and modified the way 'result' is compiled:
Expand|Select|Wrap|Line Numbers
1. def positionalWeights(dd, topx=2):
2.     posDict = [{} for i in range(len(dd.keys()[0]))]
3.     for item in dd.keys():
4.         item = list(item)
5.         pos = 0
6.         while len(item):
7.             s = item.pop(0)
8.             if s != '.':
9.                 if posDict[pos].has_key(s):
10.                     posDict[pos][s] += 1
11.                 else:
12.                     posDict[pos][s] = 1
13.             pos += 1
14.     result = [[] for i in range(len(dd.keys()[0]))]
15.     for j, dd in enumerate(posDict):
16.         a = sorted(zip(dd.values(), dd.keys()), reverse=True)
17.         try:
18.             for i in range(topx):
19.                 result[j].append([a[i][0], a[i][1]])
20.         except IndexError, e:
21.             pass
22.     return result
I am sure it can be improved.
Sep 19 '07 #6
kdt
50
I eliminated one of the try/except blocks by substituting 'while True' for 'while len(item)' and modified the way 'result' is compiled:
Expand|Select|Wrap|Line Numbers
1. def positionalWeights(dd, topx=2):
2.     posDict = [{} for i in range(len(dd.keys()[0]))]
3.     for item in dd.keys():
4.         item = list(item)
5.         pos = 0
6.         while len(item):
7.             s = item.pop(0)
8.             if s != '.':
9.                 if posDict[pos].has_key(s):
10.                     posDict[pos][s] += 1
11.                 else:
12.                     posDict[pos][s] = 1
13.             pos += 1
14.     result = [[] for i in range(len(dd.keys()[0]))]
15.     for j, dd in enumerate(posDict):
16.         a = sorted(zip(dd.values(), dd.keys()), reverse=True)
17.         try:
18.             for i in range(topx):
19.                 result[j].append([a[i][0], a[i][1]])
20.         except IndexError, e:
21.             pass
22.     return result
I am sure it can be improved.
Thanks bvdet, definately more concise than my attempt. Had to make some slight changes to it to get the output I wanted. However, there is a really strange property of it, in it that it doesn't return the top topx results, instead it will only return the top 1 for each position regardless of the value of topx. You can however add y to topx where y = top number of results you want -1. Strange indeed!

Expand|Select|Wrap|Line Numbers
1. def positionalWeights(dd,topx =5):
2.     posDict = [{} for i in range(len(dd.keys()[0]))]
3.     for item in dd.keys():
4.         item = list(item)
5.         pos = 0
6.         while len(item):
7.             s = item.pop(0)
8.             if s != '.':
9.                 if posDict[pos].has_key(s):
10.                     posDict[pos][s] += 1
11.                 else:
12.                     posDict[pos][s] = 1
13.             pos += 1
14.     result = [[] for i in range(len(dd.keys()[0]))]
15.     for j, dd in enumerate(posDict):
16.         a = sorted(zip(dd.values(), dd.keys()), reverse=True)
17.         try:
18.             for i in range(topx+1): # need to add 1 to return top 2
19.                 result[j].append(a[i][1]) # changed for top topx results, no counts required
20.         except IndexError, e:
21.             pass
22.     return result
23.
24. >>>
25. [['F', 'M'], ['L', 'M'], ['A', 'M'], ['F', 'E'], ['Y', 'K'], ['I', 'L'], ['F', 'Y'], ['S', 'Y'], ['V', 'L']]
26.
27.
Sep 19 '07 #7
bvdet
2,851 Expert Mod 2GB
This code seems to work correctly without adding 1 to 'topx':
Expand|Select|Wrap|Line Numbers
1. def positionalWeights(dd, topx=2):
2.     posDict = [{} for i in range(len(dd.keys()[0]))]
3.     for item in dd.keys():
4.         item = list(item)
5.         pos = 0
6.         while len(item):
7.             s = item.pop(0)
8.             if s != '.':
9.                 if posDict[pos].has_key(s):
10.                     posDict[pos][s] += 1
11.                 else:
12.                     posDict[pos][s] = 1
13.             pos += 1
14.     print posDict
15.     result = [[] for i in range(len(dd.keys()[0]))]
16.     for j, dd in enumerate(posDict):
17.         a = sorted(zip(dd.values(), dd.keys()), reverse=True)
18.         print a
19.         try:
20.             for i in range(topx):
21.                 # with counts
22.                 # result[j].append([a[i][0], a[i][1]])
23.                 # without counts
24.                 result[j].append(a[i][1])
25.         except IndexError, e:
26.             pass
27.     return result
28.
29. dd ={'.LEA.....':77,'R....L...':8,'.L....DA.':5,'.L.R.V..L':4,\
32.
33. s = positionalWeights(dd, 3)
34. print s
Output:
Expand|Select|Wrap|Line Numbers
1. >>> [{'A': 2, 'Q': 1, 'R': 1, 'M': 1}, {'Z': 1, 'L': 6}, {'S': 1, 'R': 1, 'E': 2, 'L': 2}, {'A': 2, 'C': 1, 'R': 1, 'E': 1}, {'A': 1, 'D': 1, 'L': 1}, {'E': 1, 'D': 1, 'L': 1, 'S': 2, 'T': 2, 'V': 1}, {'F': 1, 'R': 1, 'E': 2, 'D': 1, 'T': 1}, {'A': 1, 'S': 1, 'R': 1, 'G': 1}, {'A': 1, 'L': 1, 'G': 1}]
2. [(2, 'A'), (1, 'R'), (1, 'Q'), (1, 'M')]
3. [(6, 'L'), (1, 'Z')]
4. [(2, 'L'), (2, 'E'), (1, 'S'), (1, 'R')]
5. [(2, 'A'), (1, 'R'), (1, 'E'), (1, 'C')]
6. [(1, 'L'), (1, 'D'), (1, 'A')]
7. [(2, 'T'), (2, 'S'), (1, 'V'), (1, 'L'), (1, 'E'), (1, 'D')]
8. [(2, 'E'), (1, 'T'), (1, 'R'), (1, 'F'), (1, 'D')]
9. [(1, 'S'), (1, 'R'), (1, 'G'), (1, 'A')]
10. [(1, 'L'), (1, 'G'), (1, 'A')]
11. [['A', 'R', 'Q'], ['L', 'Z'], ['L', 'E', 'S'], ['A', 'R', 'E'], ['L', 'D', 'A'], ['T', 'S', 'V'], ['E', 'T', 'R'], ['S', 'R', 'G'], ['L', 'G', 'A']]
12. >>>
Sep 19 '07 #8
kdt
50
sorry, please disregard my last post, I was being silly again - passing the wrong values to the function. All's good now

Thanks
Sep 19 '07 #9