simonwittber posted his test code.
I tooks the code from the cookbook, called it "sencode" and
added these two lines
dumps = encode
loads = decode
I then ran your test code (unchanged except that my newsreader
folded the "value = ..." line) and got
marshal enc T: 0.21
marshal dec T: 0.4
sencode enc T: 7.76
sencode dec T: 11.56
This is with Python 2.3; the stock one provided by Apple
for my Mac.
I expected the numbers to be like this because the marshal
code is used to make and read the .pyc files and is supposed
to be pretty fast.
BTW, I tried the performance approach I outlined earlier.
The numbers aren't much better
marshal enc T: 0.2
marshal dec T: 0.38
sencode2 enc T: 7.16
sencode2 dec T: 9.49
I changed the format a little bit; dicts are treated a bit
differently.
from struct import pack, unpack
from cStringIO import StringIO
class EncodeError(Exc eption):
pass
class DecodeError(Exc eption):
pass
def encode(data):
f = StringIO()
_encode(data, f.write)
return f.getvalue()
def _encode(data, write, pack = pack):
# The original code use the equivalent of "type(data) is list"
# I preserve that behavior
T = type(data)
if T is int:
write("I")
write(pack("!i" , data))
elif T is list:
write("L")
write(pack("!L" , len(data)))
# Assumes len and 'for ... in' aren't lying
for item in data:
_encode(item, write)
elif T is tuple:
write("T")
write(pack("!L" , len(data)))
# Assumes len and 'for ... in' aren't lying
for item in data:
_encode(item, write)
elif T is str:
write("S")
write(pack("!L" , len(data)))
write(data)
elif T is long:
s = hex(data)[2:-1]
write("B")
write(pack("!i" , len(s)))
write(s)
elif T is type(None):
write("N")
elif T is float:
write("F")
write(pack("!f" , data))
elif T is dict:
write("D")
write(pack("!L" , len(data)))
for k, v in data.items():
_encode(k, write)
_encode(v, write)
else:
raise EncodeError((da ta, T))
def decode(s):
"""
Decode a binary string into the original Python types.
"""
buffer = StringIO(s)
return _decode(buffer. read)
def _decode(read, unpack = unpack):
code = read(1)
if code == "I":
return unpack("!i", read(4))[0]
if code == "D":
size = unpack("!L", read(4))[0]
x = [_decode(read) for i in range(size*2)]
return dict(zip(x[0::2], x[1::2]))
if code == "T":
size = unpack("!L", read(4))[0]
return tuple([_decode(read) for i in range(size)])
if code == "L":
size = unpack("!L", read(4))[0]
return [_decode(read) for i in range(size)]
if code == "N":
return None
if code == "S":
size = unpack("!L", read(4))[0]
return read(size)
if code == "F":
return unpack("!f", read(4))[0]
if code == "B":
size = unpack("!L", read(4))[0]
return long(read(size) , 16)
raise DecodeError(cod e)
dumps = encode
loads = decode
I wonder if this could be improved by a "struct2" module
which could compile a pack/unpack format once. Eg,
float_struct = struct2.struct( "!f")
float_struct.pa ck(f)
return float_struct.un pack('?\x80\x00 \x00')[0]
which might the same as
return float_struct.un pack1('?\x80\x0 0\x00')
Andrew
da***@dalkescie ntific.com