On 2 Aug 2003 10:16:13 -0400,
aa**@pythoncraf t.com (Aahz) wrote:
In article <w1************ ********@newssv r21.news.prodig y.com>,
Andy C <ay********@cor nell.edu> wrote:
I don't see how I can do this and let me eliminate duplicates. I need
to assign the old duplicate string to the unique string that already
exists. Hence the question, how do I get a reference to the KEY value?
I know I can use keys() and do a linear search, but that is much more
inefficient . I would like to get a reference to the key value in the
same time that it takes to do a hash lookup (constant time).
Ahhhh.... Right. Hmmmmm.... <thinks hard> You're correct, you do
need to set the value to the key. I think using a dict is better than
using intern(). Here's an optimization:
node2 = node_cache.setd efault(node2, node2)
I would sure be more comfortable with a change in nomenclature, e.g.,
node2name = node_name_cache .setdefault(nod e2name, node2name)
so that it is clear that you are dealing with name strings, not typical graph
vertex/node objects.
BTW, reading from a file, you can probably not in general avoid forward references,
(i.e., names of adjacent nodes yet to be defined), but they should be resolvable at the end,
so you could make nodes that refer to adjacent nodes directly instead of by name.
Nodes would carry a ref to their own (string) name, and go by that same name string in a graph dict,
so there should be no duplication of strings, just references to them. E.g.,
(I started to just sketch something, and ... well, it's hard to stop programming Python
before something is running at least preliminarily ;-) I subclassed dict to keep nodes in by name.
====< graph.py >============== =============== =============== =============
def boxit(s):
lines = s.splitlines()
wid = max(map(len,lin es))
ret = ['+-%s-+' % ('-'*wid)]
fmt = '| %%-%ds |'%wid
for line in lines: ret.append(fmt% line)
ret.append(ret[0])
return '\n'.join(ret)
def errep(Ex, *rest):
if __debug__: print boxit('%s: %s'%(Ex.__name_ _, rest and rest[0])) # and blunder on
else: raise Ex(*rest)
class Node(object):
__slots__ = 'name', 'adj_list'
def __init__(self, name):
self.name = name
self.adj_list = None # signify undefined, vs [] for no adjacent nodes
class Graph(dict):
def load(self, infile, print_lines=Fal se):
for line in infile:
if print_lines: print line.rstrip()
sline = line.strip()
if not sline or sline.startswit h('#'): continue
node_def_names = line.split() # assume line like 'nodename adj_node1_name adj_node2_name ...'
node_name = node_def_names[0]
if node_name in self:
# don't allow re-definition
errep(ValueErro r, 'Node %r being re-defined:\n new adj: %r\n old adj: %r' %(
node_name, node_def_names[1:], self[node_name].adj_list))
else:
self[node_name] = Node(node_name)
# set adj list, making making this a defined node
self[node_name].adj_list = node_def_names[1:]
# change adj lists to direct references to adjacent nodes
for node in self.values():
adj_list = node.adj_list
assert adj_list is not None
for i in xrange(len(adj_ list)):
adj_name = adj_list[i]
try:
adj_list[i] = self[adj_name]
except KeyError:
errep( ValueError, 'node %r refers to undefined adj node %r' % (node.name, adj_name))
# continuing if debugging
adj_list[i] = Node(adj_name) # adj_list None (vs []) signifies undefined
def show(self):
node_names = self.keys()
node_names.sort ()
visited = {}
ret = []
def prtree(node, indent=0):
if node.name in visited:
if indent: ret.append('%s% s (see previous)'%(ind ent*' ',node.name))
else:
visited[node.name]=True
if node.adj_list is None:
ret.append('%s% s (undefined) '%(indent*' ',node.name))
return
ret.append('%s% s'%(indent*' ',node.name))
for adj in node.adj_list:
prtree(adj, indent+1)
for node_name in node_names:
prtree(self[node_name])
ret.append('')
return '\n'.join(ret)
def test(filepath=' '):
graph = Graph() # name-string -> Node instance
if not filepath:
import StringIO
f_graph = StringIO.String IO("""
A B C D
B E F
C G H
G A
D B
E
F
H
D E F
X E Y
""")
else:
f_graph = file(filepath)
print 'Loading graph from %s ...\n--' % (filepath or '(test)')
graph.load(f_gr aph, True) # print the lines read
print '--'
f_graph.close()
print graph.show()
if __name__ == '__main__':
import sys
args = sys.argv[1:]
test(args and args[0] or '')
=============== =============== =============== =============== =============
Result:
[16:33] C:\pywk\clp\pp> graph.py
Loading graph from (test) ...
--
A B C D
B E F
C G H
G A
D B
E
F
H
D E F
+----------------------------------------+
| ValueError: Node 'D' being re-defined: |
| new adj: ['E', 'F'] |
| old adj: ['B'] |
+----------------------------------------+
X E Y
+-------------------------------------------------------+
| ValueError: node 'X' refers to undefined adj node 'Y' |
+-------------------------------------------------------+
--
A
B
E
F
C
G
A (see previous)
H
D
B (see previous)
X
E (see previous)
Y (undefined)
(Not tested beyond what you see. NTBWYS? ;-)
Regards,
Bengt Richter