It's Official: for LONG1/LONG4, a "byte count" of 0 is taken as a

shortcut meaning 0L.  This allows LONG1 to encode 0L in two bytes
total.
This commit is contained in:
Tim Peters 2003-01-31 16:43:39 +00:00
parent 757246c189
commit 4b23f2b44b
2 changed files with 25 additions and 9 deletions

View File

@ -1291,6 +1291,11 @@ import binascii as _binascii
def encode_long(x): def encode_long(x):
r"""Encode a long to a two's complement little-endian binary string. r"""Encode a long to a two's complement little-endian binary string.
Note that 0L is a special case, returning an empty string, to save a
byte in the LONG1 pickling context.
>>> encode_long(0L)
''
>>> encode_long(255L) >>> encode_long(255L)
'\xff\x00' '\xff\x00'
>>> encode_long(32767L) >>> encode_long(32767L)
@ -1307,7 +1312,7 @@ def encode_long(x):
""" """
if x == 0: if x == 0:
return '\x00' return ''
if x > 0: if x > 0:
ashex = hex(x) ashex = hex(x)
assert ashex.startswith("0x") assert ashex.startswith("0x")
@ -1316,7 +1321,7 @@ def encode_long(x):
if nibbles & 1: if nibbles & 1:
# need an even # of nibbles for unhexlify # need an even # of nibbles for unhexlify
ashex = "0x0" + ashex[2:] ashex = "0x0" + ashex[2:]
elif ashex[2] >= '8': elif int(ashex[2], 16) >= 8:
# "looks negative", so need a byte of sign bits # "looks negative", so need a byte of sign bits
ashex = "0x00" + ashex[2:] ashex = "0x00" + ashex[2:]
else: else:
@ -1330,11 +1335,11 @@ def encode_long(x):
if nibbles & 1: if nibbles & 1:
# need an even # of nibbles for unhexlify # need an even # of nibbles for unhexlify
nibbles += 1 nibbles += 1
nbytes = nibbles >> 1 nbits = nibbles * 4
x += 1L << (nbytes * 8) x += 1L << nbits
assert x > 0 assert x > 0
ashex = hex(x) ashex = hex(x)
if x >> (nbytes * 8 - 1) == 0: if x >> (nbits - 1) == 0:
# "looks positive", so need a byte of sign bits # "looks positive", so need a byte of sign bits
ashex = "0xff" + x[2:] ashex = "0xff" + x[2:]
@ -1348,6 +1353,9 @@ def encode_long(x):
def decode_long(data): def decode_long(data):
r"""Decode a long from a two's complement little-endian binary string. r"""Decode a long from a two's complement little-endian binary string.
>>> decode_long('')
0L
>>> decode_long("\xff\x00") >>> decode_long("\xff\x00")
255L 255L
>>> decode_long("\xff\x7f") >>> decode_long("\xff\x7f")
@ -1362,10 +1370,13 @@ def decode_long(data):
127L 127L
""" """
nbytes = len(data)
if nbytes == 0:
return 0L
ashex = _binascii.hexlify(data[::-1]) ashex = _binascii.hexlify(data[::-1])
n = long(ashex, 16) n = long(ashex, 16)
if data[-1] >= '\x80': if data[-1] >= '\x80':
n -= 1L << (len(data) * 8) n -= 1L << (nbytes * 8)
return n return n
# Shorthands # Shorthands

View File

@ -620,6 +620,8 @@ from pickle import decode_long
def read_long1(f): def read_long1(f):
r""" r"""
>>> import StringIO >>> import StringIO
>>> read_long1(StringIO.StringIO("\x00"))
0L
>>> read_long1(StringIO.StringIO("\x02\xff\x00")) >>> read_long1(StringIO.StringIO("\x02\xff\x00"))
255L 255L
>>> read_long1(StringIO.StringIO("\x02\xff\x7f")) >>> read_long1(StringIO.StringIO("\x02\xff\x7f"))
@ -628,7 +630,6 @@ def read_long1(f):
-256L -256L
>>> read_long1(StringIO.StringIO("\x02\x00\x80")) >>> read_long1(StringIO.StringIO("\x02\x00\x80"))
-32768L -32768L
>>>
""" """
n = read_uint1(f) n = read_uint1(f)
@ -645,6 +646,7 @@ long1 = ArgumentDescriptor(
This first reads one byte as an unsigned size, then reads that This first reads one byte as an unsigned size, then reads that
many bytes and interprets them as a little-endian 2's-complement long. many bytes and interprets them as a little-endian 2's-complement long.
If the size is 0, that's taken as a shortcut for the long 0L.
""") """)
def read_long4(f): def read_long4(f):
@ -658,7 +660,8 @@ def read_long4(f):
-256L -256L
>>> read_long4(StringIO.StringIO("\x02\x00\x00\x00\x00\x80")) >>> read_long4(StringIO.StringIO("\x02\x00\x00\x00\x00\x80"))
-32768L -32768L
>>> >>> read_long1(StringIO.StringIO("\x00\x00\x00\x00"))
0L
""" """
n = read_int4(f) n = read_int4(f)
@ -677,7 +680,9 @@ long4 = ArgumentDescriptor(
This first reads four bytes as a signed size (but requires the This first reads four bytes as a signed size (but requires the
size to be >= 0), then reads that many bytes and interprets them size to be >= 0), then reads that many bytes and interprets them
as a little-endian 2's-complement long. as a little-endian 2's-complement long. If the size is 0, that's taken
as a shortcut for the long 0L, although LONG1 should really be used
then instead (and in any case where # of bytes < 256).
""") """)