On Sep 19, 2:01*pm, bearophileH...@lycos.com wrote:

Gerard flanagan:

data.sort()

datadict = \

dict((k, len(list(g))) for k,g in groupby(data, lambda s:

* * *'.'.join(s.split('.',2)[:2])))

That code may run correctly, but it's quite unreadable, while good

Python programmers value high readability. So the right thing to do is

to split that line into parts, giving meaningful names, and maybe even

add comments.

len(list(g))) looks like a good job for my little leniter() function

(or better just an extension to the semantics of len) that time ago

some people here have judged as useless, while I use it often in both

Python and D ;-)

Extending len() to support iterables sounds like a good idea, except

that it could be misleading when:

len(file(path))

returns the number of lines and /not/ the length in bytes as you might

first think! :-)

Anyway, here's another possible implementation using bags (multisets):

def major_version(version_string):

"convert '1.2.3.2' to '1.2'"

return '.'.join(version_string.split('.')[:2])

versions = ["1.1.1.1", "1.2.2.2", "1.2.2.3", "1.3.1.2", "1.3.4.5"]

bag_of_versions = bag(major_version(x) for x in versions)

dict_of_counts = dict(bag_of_versions.items())

Here's my implementation of the bag class in Python (sorry about the

length):

class bag(object):

def __init__(self, iterable = None):

self._counts = {}

if isinstance(iterable, dict):

for x, n in iterable.items():

if not isinstance(n, int):

raise TypeError()

if n < 0:

raise ValueError()

self._counts[x] = n

elif iterable:

for x in iterable:

try:

self._counts[x] += 1

except KeyError:

self._counts[x] = 1

def __and__(self, other):

new_counts = {}

for x, n in other._counts.items():

try:

new_counts[x] = min(self._counts[x], n)

except KeyError:

pass

result = bag()

result._counts = new_counts

return result

def __iand__(self):

new_counts = {}

for x, n in other._counts.items():

try:

new_counts[x] = min(self._counts[x], n)

except KeyError:

pass

self._counts = new_counts

def __or__(self, other):

new_counts = self._counts.copy()

for x, n in other._counts.items():

try:

new_counts[x] = max(new_counts[x], n)

except KeyError:

new_counts[x] = n

result = bag()

result._counts = new_counts

return result

def __ior__(self):

for x, n in other._counts.items():

try:

self._counts[x] = max(self._counts[x], n)

except KeyError:

self._counts[x] = n

def __len__(self):

return sum(self._counts.values())

def __list__(self):

result = []

for x, n in self._counts.items():

result.extend([x] * n)

return result

def __repr__(self):

return "bag([%s])" % ", ".join(", ".join([repr(x)] * n) for x,

n in self._counts.items())

def __iter__(self):

for x, n in self._counts.items():

for i in range(n):

yield x

def keys(self):

return self._counts.keys()

def values(self):

return self._counts.values()

def items(self):

return self._counts.items()

def __add__(self, other):

for x, n in other.items():

self._counts[x] = self._counts.get(x, 0) + n

def __contains__(self, x):

return x in self._counts

def add(self, x):

try:

self._counts[x] += 1

except KeyError:

self._counts[x] = 1

def __add__(self, other):

new_counts = self._counts.copy()

for x, n in other.items():

try:

new_counts[x] += n

except KeyError:

new_counts[x] = n

result = bag()

result._counts = new_counts

return result

def __sub__(self, other):

new_counts = self._counts.copy()

for x, n in other.items():

try:

new_counts[x] -= n

if new_counts[x] < 1:

del new_counts[x]

except KeyError:

pass

result = bag()

result._counts = new_counts

return result

def __iadd__(self, other):

for x, n in other.items():

try:

self._counts[x] += n

except KeyError:

self._counts[x] = n

def __isub__(self, other):

for x, n in other.items():

try:

self._counts[x] -= n

if self._counts[x] < 1:

del self._counts[x]

except KeyError:

pass

def clear(self):

self._counts = {}

def count(self, x):

return self._counts.get(x, 0)