Issue #17516: use comment syntax for comments, instead of multiline string
This commit is contained in:
parent
1f8898a591
commit
765531d2d0
|
@ -34,17 +34,15 @@ from _ctypes import FUNCFLAG_CDECL as _FUNCFLAG_CDECL, \
|
|||
FUNCFLAG_USE_ERRNO as _FUNCFLAG_USE_ERRNO, \
|
||||
FUNCFLAG_USE_LASTERROR as _FUNCFLAG_USE_LASTERROR
|
||||
|
||||
"""
|
||||
WINOLEAPI -> HRESULT
|
||||
WINOLEAPI_(type)
|
||||
|
||||
STDMETHODCALLTYPE
|
||||
|
||||
STDMETHOD(name)
|
||||
STDMETHOD_(type, name)
|
||||
|
||||
STDAPICALLTYPE
|
||||
"""
|
||||
# WINOLEAPI -> HRESULT
|
||||
# WINOLEAPI_(type)
|
||||
#
|
||||
# STDMETHODCALLTYPE
|
||||
#
|
||||
# STDMETHOD(name)
|
||||
# STDMETHOD_(type, name)
|
||||
#
|
||||
# STDAPICALLTYPE
|
||||
|
||||
def create_string_buffer(init, size=None):
|
||||
"""create_string_buffer(aBytes) -> character array
|
||||
|
|
|
@ -5,17 +5,14 @@ from sys import getrefcount as grc
|
|||
|
||||
# XXX This test must be reviewed for correctness!!!
|
||||
|
||||
"""
|
||||
ctypes' types are container types.
|
||||
|
||||
They have an internal memory block, which only consists of some bytes,
|
||||
but it has to keep references to other objects as well. This is not
|
||||
really needed for trivial C types like int or char, but it is important
|
||||
for aggregate types like strings or pointers in particular.
|
||||
|
||||
What about pointers?
|
||||
|
||||
"""
|
||||
# ctypes' types are container types.
|
||||
#
|
||||
# They have an internal memory block, which only consists of some bytes,
|
||||
# but it has to keep references to other objects as well. This is not
|
||||
# really needed for trivial C types like int or char, but it is important
|
||||
# for aggregate types like strings or pointers in particular.
|
||||
#
|
||||
# What about pointers?
|
||||
|
||||
class ObjectsTestCase(unittest.TestCase):
|
||||
def assertSame(self, a, b):
|
||||
|
|
|
@ -3,35 +3,33 @@ import sys
|
|||
import unittest
|
||||
|
||||
# Bob Ippolito:
|
||||
"""
|
||||
Ok.. the code to find the filename for __getattr__ should look
|
||||
something like:
|
||||
|
||||
import os
|
||||
from macholib.dyld import dyld_find
|
||||
|
||||
def find_lib(name):
|
||||
possible = ['lib'+name+'.dylib', name+'.dylib',
|
||||
name+'.framework/'+name]
|
||||
for dylib in possible:
|
||||
try:
|
||||
return os.path.realpath(dyld_find(dylib))
|
||||
except ValueError:
|
||||
pass
|
||||
raise ValueError, "%s not found" % (name,)
|
||||
|
||||
It'll have output like this:
|
||||
|
||||
>>> find_lib('pthread')
|
||||
'/usr/lib/libSystem.B.dylib'
|
||||
>>> find_lib('z')
|
||||
'/usr/lib/libz.1.dylib'
|
||||
>>> find_lib('IOKit')
|
||||
'/System/Library/Frameworks/IOKit.framework/Versions/A/IOKit'
|
||||
|
||||
-bob
|
||||
|
||||
"""
|
||||
#
|
||||
# Ok.. the code to find the filename for __getattr__ should look
|
||||
# something like:
|
||||
#
|
||||
# import os
|
||||
# from macholib.dyld import dyld_find
|
||||
#
|
||||
# def find_lib(name):
|
||||
# possible = ['lib'+name+'.dylib', name+'.dylib',
|
||||
# name+'.framework/'+name]
|
||||
# for dylib in possible:
|
||||
# try:
|
||||
# return os.path.realpath(dyld_find(dylib))
|
||||
# except ValueError:
|
||||
# pass
|
||||
# raise ValueError, "%s not found" % (name,)
|
||||
#
|
||||
# It'll have output like this:
|
||||
#
|
||||
# >>> find_lib('pthread')
|
||||
# '/usr/lib/libSystem.B.dylib'
|
||||
# >>> find_lib('z')
|
||||
# '/usr/lib/libz.1.dylib'
|
||||
# >>> find_lib('IOKit')
|
||||
# '/System/Library/Frameworks/IOKit.framework/Versions/A/IOKit'
|
||||
#
|
||||
# -bob
|
||||
|
||||
from ctypes.macholib.dyld import dyld_find
|
||||
|
||||
|
|
390
Lib/datetime.py
390
Lib/datetime.py
|
@ -1929,203 +1929,203 @@ timezone.utc = timezone._create(timedelta(0))
|
|||
timezone.min = timezone._create(timezone._minoffset)
|
||||
timezone.max = timezone._create(timezone._maxoffset)
|
||||
_EPOCH = datetime(1970, 1, 1, tzinfo=timezone.utc)
|
||||
"""
|
||||
Some time zone algebra. For a datetime x, let
|
||||
x.n = x stripped of its timezone -- its naive time.
|
||||
x.o = x.utcoffset(), and assuming that doesn't raise an exception or
|
||||
return None
|
||||
x.d = x.dst(), and assuming that doesn't raise an exception or
|
||||
return None
|
||||
x.s = x's standard offset, x.o - x.d
|
||||
|
||||
Now some derived rules, where k is a duration (timedelta).
|
||||
# Some time zone algebra. For a datetime x, let
|
||||
# x.n = x stripped of its timezone -- its naive time.
|
||||
# x.o = x.utcoffset(), and assuming that doesn't raise an exception or
|
||||
# return None
|
||||
# x.d = x.dst(), and assuming that doesn't raise an exception or
|
||||
# return None
|
||||
# x.s = x's standard offset, x.o - x.d
|
||||
#
|
||||
# Now some derived rules, where k is a duration (timedelta).
|
||||
#
|
||||
# 1. x.o = x.s + x.d
|
||||
# This follows from the definition of x.s.
|
||||
#
|
||||
# 2. If x and y have the same tzinfo member, x.s = y.s.
|
||||
# This is actually a requirement, an assumption we need to make about
|
||||
# sane tzinfo classes.
|
||||
#
|
||||
# 3. The naive UTC time corresponding to x is x.n - x.o.
|
||||
# This is again a requirement for a sane tzinfo class.
|
||||
#
|
||||
# 4. (x+k).s = x.s
|
||||
# This follows from #2, and that datimetimetz+timedelta preserves tzinfo.
|
||||
#
|
||||
# 5. (x+k).n = x.n + k
|
||||
# Again follows from how arithmetic is defined.
|
||||
#
|
||||
# Now we can explain tz.fromutc(x). Let's assume it's an interesting case
|
||||
# (meaning that the various tzinfo methods exist, and don't blow up or return
|
||||
# None when called).
|
||||
#
|
||||
# The function wants to return a datetime y with timezone tz, equivalent to x.
|
||||
# x is already in UTC.
|
||||
#
|
||||
# By #3, we want
|
||||
#
|
||||
# y.n - y.o = x.n [1]
|
||||
#
|
||||
# The algorithm starts by attaching tz to x.n, and calling that y. So
|
||||
# x.n = y.n at the start. Then it wants to add a duration k to y, so that [1]
|
||||
# becomes true; in effect, we want to solve [2] for k:
|
||||
#
|
||||
# (y+k).n - (y+k).o = x.n [2]
|
||||
#
|
||||
# By #1, this is the same as
|
||||
#
|
||||
# (y+k).n - ((y+k).s + (y+k).d) = x.n [3]
|
||||
#
|
||||
# By #5, (y+k).n = y.n + k, which equals x.n + k because x.n=y.n at the start.
|
||||
# Substituting that into [3],
|
||||
#
|
||||
# x.n + k - (y+k).s - (y+k).d = x.n; the x.n terms cancel, leaving
|
||||
# k - (y+k).s - (y+k).d = 0; rearranging,
|
||||
# k = (y+k).s - (y+k).d; by #4, (y+k).s == y.s, so
|
||||
# k = y.s - (y+k).d
|
||||
#
|
||||
# On the RHS, (y+k).d can't be computed directly, but y.s can be, and we
|
||||
# approximate k by ignoring the (y+k).d term at first. Note that k can't be
|
||||
# very large, since all offset-returning methods return a duration of magnitude
|
||||
# less than 24 hours. For that reason, if y is firmly in std time, (y+k).d must
|
||||
# be 0, so ignoring it has no consequence then.
|
||||
#
|
||||
# In any case, the new value is
|
||||
#
|
||||
# z = y + y.s [4]
|
||||
#
|
||||
# It's helpful to step back at look at [4] from a higher level: it's simply
|
||||
# mapping from UTC to tz's standard time.
|
||||
#
|
||||
# At this point, if
|
||||
#
|
||||
# z.n - z.o = x.n [5]
|
||||
#
|
||||
# we have an equivalent time, and are almost done. The insecurity here is
|
||||
# at the start of daylight time. Picture US Eastern for concreteness. The wall
|
||||
# time jumps from 1:59 to 3:00, and wall hours of the form 2:MM don't make good
|
||||
# sense then. The docs ask that an Eastern tzinfo class consider such a time to
|
||||
# be EDT (because it's "after 2"), which is a redundant spelling of 1:MM EST
|
||||
# on the day DST starts. We want to return the 1:MM EST spelling because that's
|
||||
# the only spelling that makes sense on the local wall clock.
|
||||
#
|
||||
# In fact, if [5] holds at this point, we do have the standard-time spelling,
|
||||
# but that takes a bit of proof. We first prove a stronger result. What's the
|
||||
# difference between the LHS and RHS of [5]? Let
|
||||
#
|
||||
# diff = x.n - (z.n - z.o) [6]
|
||||
#
|
||||
# Now
|
||||
# z.n = by [4]
|
||||
# (y + y.s).n = by #5
|
||||
# y.n + y.s = since y.n = x.n
|
||||
# x.n + y.s = since z and y are have the same tzinfo member,
|
||||
# y.s = z.s by #2
|
||||
# x.n + z.s
|
||||
#
|
||||
# Plugging that back into [6] gives
|
||||
#
|
||||
# diff =
|
||||
# x.n - ((x.n + z.s) - z.o) = expanding
|
||||
# x.n - x.n - z.s + z.o = cancelling
|
||||
# - z.s + z.o = by #2
|
||||
# z.d
|
||||
#
|
||||
# So diff = z.d.
|
||||
#
|
||||
# If [5] is true now, diff = 0, so z.d = 0 too, and we have the standard-time
|
||||
# spelling we wanted in the endcase described above. We're done. Contrarily,
|
||||
# if z.d = 0, then we have a UTC equivalent, and are also done.
|
||||
#
|
||||
# If [5] is not true now, diff = z.d != 0, and z.d is the offset we need to
|
||||
# add to z (in effect, z is in tz's standard time, and we need to shift the
|
||||
# local clock into tz's daylight time).
|
||||
#
|
||||
# Let
|
||||
#
|
||||
# z' = z + z.d = z + diff [7]
|
||||
#
|
||||
# and we can again ask whether
|
||||
#
|
||||
# z'.n - z'.o = x.n [8]
|
||||
#
|
||||
# If so, we're done. If not, the tzinfo class is insane, according to the
|
||||
# assumptions we've made. This also requires a bit of proof. As before, let's
|
||||
# compute the difference between the LHS and RHS of [8] (and skipping some of
|
||||
# the justifications for the kinds of substitutions we've done several times
|
||||
# already):
|
||||
#
|
||||
# diff' = x.n - (z'.n - z'.o) = replacing z'.n via [7]
|
||||
# x.n - (z.n + diff - z'.o) = replacing diff via [6]
|
||||
# x.n - (z.n + x.n - (z.n - z.o) - z'.o) =
|
||||
# x.n - z.n - x.n + z.n - z.o + z'.o = cancel x.n
|
||||
# - z.n + z.n - z.o + z'.o = cancel z.n
|
||||
# - z.o + z'.o = #1 twice
|
||||
# -z.s - z.d + z'.s + z'.d = z and z' have same tzinfo
|
||||
# z'.d - z.d
|
||||
#
|
||||
# So z' is UTC-equivalent to x iff z'.d = z.d at this point. If they are equal,
|
||||
# we've found the UTC-equivalent so are done. In fact, we stop with [7] and
|
||||
# return z', not bothering to compute z'.d.
|
||||
#
|
||||
# How could z.d and z'd differ? z' = z + z.d [7], so merely moving z' by
|
||||
# a dst() offset, and starting *from* a time already in DST (we know z.d != 0),
|
||||
# would have to change the result dst() returns: we start in DST, and moving
|
||||
# a little further into it takes us out of DST.
|
||||
#
|
||||
# There isn't a sane case where this can happen. The closest it gets is at
|
||||
# the end of DST, where there's an hour in UTC with no spelling in a hybrid
|
||||
# tzinfo class. In US Eastern, that's 5:MM UTC = 0:MM EST = 1:MM EDT. During
|
||||
# that hour, on an Eastern clock 1:MM is taken as being in standard time (6:MM
|
||||
# UTC) because the docs insist on that, but 0:MM is taken as being in daylight
|
||||
# time (4:MM UTC). There is no local time mapping to 5:MM UTC. The local
|
||||
# clock jumps from 1:59 back to 1:00 again, and repeats the 1:MM hour in
|
||||
# standard time. Since that's what the local clock *does*, we want to map both
|
||||
# UTC hours 5:MM and 6:MM to 1:MM Eastern. The result is ambiguous
|
||||
# in local time, but so it goes -- it's the way the local clock works.
|
||||
#
|
||||
# When x = 5:MM UTC is the input to this algorithm, x.o=0, y.o=-5 and y.d=0,
|
||||
# so z=0:MM. z.d=60 (minutes) then, so [5] doesn't hold and we keep going.
|
||||
# z' = z + z.d = 1:MM then, and z'.d=0, and z'.d - z.d = -60 != 0 so [8]
|
||||
# (correctly) concludes that z' is not UTC-equivalent to x.
|
||||
#
|
||||
# Because we know z.d said z was in daylight time (else [5] would have held and
|
||||
# we would have stopped then), and we know z.d != z'.d (else [8] would have held
|
||||
# and we have stopped then), and there are only 2 possible values dst() can
|
||||
# return in Eastern, it follows that z'.d must be 0 (which it is in the example,
|
||||
# but the reasoning doesn't depend on the example -- it depends on there being
|
||||
# two possible dst() outcomes, one zero and the other non-zero). Therefore
|
||||
# z' must be in standard time, and is the spelling we want in this case.
|
||||
#
|
||||
# Note again that z' is not UTC-equivalent as far as the hybrid tzinfo class is
|
||||
# concerned (because it takes z' as being in standard time rather than the
|
||||
# daylight time we intend here), but returning it gives the real-life "local
|
||||
# clock repeats an hour" behavior when mapping the "unspellable" UTC hour into
|
||||
# tz.
|
||||
#
|
||||
# When the input is 6:MM, z=1:MM and z.d=0, and we stop at once, again with
|
||||
# the 1:MM standard time spelling we want.
|
||||
#
|
||||
# So how can this break? One of the assumptions must be violated. Two
|
||||
# possibilities:
|
||||
#
|
||||
# 1) [2] effectively says that y.s is invariant across all y belong to a given
|
||||
# time zone. This isn't true if, for political reasons or continental drift,
|
||||
# a region decides to change its base offset from UTC.
|
||||
#
|
||||
# 2) There may be versions of "double daylight" time where the tail end of
|
||||
# the analysis gives up a step too early. I haven't thought about that
|
||||
# enough to say.
|
||||
#
|
||||
# In any case, it's clear that the default fromutc() is strong enough to handle
|
||||
# "almost all" time zones: so long as the standard offset is invariant, it
|
||||
# doesn't matter if daylight time transition points change from year to year, or
|
||||
# if daylight time is skipped in some years; it doesn't matter how large or
|
||||
# small dst() may get within its bounds; and it doesn't even matter if some
|
||||
# perverse time zone returns a negative dst()). So a breaking case must be
|
||||
# pretty bizarre, and a tzinfo subclass can override fromutc() if it is.
|
||||
|
||||
1. x.o = x.s + x.d
|
||||
This follows from the definition of x.s.
|
||||
|
||||
2. If x and y have the same tzinfo member, x.s = y.s.
|
||||
This is actually a requirement, an assumption we need to make about
|
||||
sane tzinfo classes.
|
||||
|
||||
3. The naive UTC time corresponding to x is x.n - x.o.
|
||||
This is again a requirement for a sane tzinfo class.
|
||||
|
||||
4. (x+k).s = x.s
|
||||
This follows from #2, and that datimetimetz+timedelta preserves tzinfo.
|
||||
|
||||
5. (x+k).n = x.n + k
|
||||
Again follows from how arithmetic is defined.
|
||||
|
||||
Now we can explain tz.fromutc(x). Let's assume it's an interesting case
|
||||
(meaning that the various tzinfo methods exist, and don't blow up or return
|
||||
None when called).
|
||||
|
||||
The function wants to return a datetime y with timezone tz, equivalent to x.
|
||||
x is already in UTC.
|
||||
|
||||
By #3, we want
|
||||
|
||||
y.n - y.o = x.n [1]
|
||||
|
||||
The algorithm starts by attaching tz to x.n, and calling that y. So
|
||||
x.n = y.n at the start. Then it wants to add a duration k to y, so that [1]
|
||||
becomes true; in effect, we want to solve [2] for k:
|
||||
|
||||
(y+k).n - (y+k).o = x.n [2]
|
||||
|
||||
By #1, this is the same as
|
||||
|
||||
(y+k).n - ((y+k).s + (y+k).d) = x.n [3]
|
||||
|
||||
By #5, (y+k).n = y.n + k, which equals x.n + k because x.n=y.n at the start.
|
||||
Substituting that into [3],
|
||||
|
||||
x.n + k - (y+k).s - (y+k).d = x.n; the x.n terms cancel, leaving
|
||||
k - (y+k).s - (y+k).d = 0; rearranging,
|
||||
k = (y+k).s - (y+k).d; by #4, (y+k).s == y.s, so
|
||||
k = y.s - (y+k).d
|
||||
|
||||
On the RHS, (y+k).d can't be computed directly, but y.s can be, and we
|
||||
approximate k by ignoring the (y+k).d term at first. Note that k can't be
|
||||
very large, since all offset-returning methods return a duration of magnitude
|
||||
less than 24 hours. For that reason, if y is firmly in std time, (y+k).d must
|
||||
be 0, so ignoring it has no consequence then.
|
||||
|
||||
In any case, the new value is
|
||||
|
||||
z = y + y.s [4]
|
||||
|
||||
It's helpful to step back at look at [4] from a higher level: it's simply
|
||||
mapping from UTC to tz's standard time.
|
||||
|
||||
At this point, if
|
||||
|
||||
z.n - z.o = x.n [5]
|
||||
|
||||
we have an equivalent time, and are almost done. The insecurity here is
|
||||
at the start of daylight time. Picture US Eastern for concreteness. The wall
|
||||
time jumps from 1:59 to 3:00, and wall hours of the form 2:MM don't make good
|
||||
sense then. The docs ask that an Eastern tzinfo class consider such a time to
|
||||
be EDT (because it's "after 2"), which is a redundant spelling of 1:MM EST
|
||||
on the day DST starts. We want to return the 1:MM EST spelling because that's
|
||||
the only spelling that makes sense on the local wall clock.
|
||||
|
||||
In fact, if [5] holds at this point, we do have the standard-time spelling,
|
||||
but that takes a bit of proof. We first prove a stronger result. What's the
|
||||
difference between the LHS and RHS of [5]? Let
|
||||
|
||||
diff = x.n - (z.n - z.o) [6]
|
||||
|
||||
Now
|
||||
z.n = by [4]
|
||||
(y + y.s).n = by #5
|
||||
y.n + y.s = since y.n = x.n
|
||||
x.n + y.s = since z and y are have the same tzinfo member,
|
||||
y.s = z.s by #2
|
||||
x.n + z.s
|
||||
|
||||
Plugging that back into [6] gives
|
||||
|
||||
diff =
|
||||
x.n - ((x.n + z.s) - z.o) = expanding
|
||||
x.n - x.n - z.s + z.o = cancelling
|
||||
- z.s + z.o = by #2
|
||||
z.d
|
||||
|
||||
So diff = z.d.
|
||||
|
||||
If [5] is true now, diff = 0, so z.d = 0 too, and we have the standard-time
|
||||
spelling we wanted in the endcase described above. We're done. Contrarily,
|
||||
if z.d = 0, then we have a UTC equivalent, and are also done.
|
||||
|
||||
If [5] is not true now, diff = z.d != 0, and z.d is the offset we need to
|
||||
add to z (in effect, z is in tz's standard time, and we need to shift the
|
||||
local clock into tz's daylight time).
|
||||
|
||||
Let
|
||||
|
||||
z' = z + z.d = z + diff [7]
|
||||
|
||||
and we can again ask whether
|
||||
|
||||
z'.n - z'.o = x.n [8]
|
||||
|
||||
If so, we're done. If not, the tzinfo class is insane, according to the
|
||||
assumptions we've made. This also requires a bit of proof. As before, let's
|
||||
compute the difference between the LHS and RHS of [8] (and skipping some of
|
||||
the justifications for the kinds of substitutions we've done several times
|
||||
already):
|
||||
|
||||
diff' = x.n - (z'.n - z'.o) = replacing z'.n via [7]
|
||||
x.n - (z.n + diff - z'.o) = replacing diff via [6]
|
||||
x.n - (z.n + x.n - (z.n - z.o) - z'.o) =
|
||||
x.n - z.n - x.n + z.n - z.o + z'.o = cancel x.n
|
||||
- z.n + z.n - z.o + z'.o = cancel z.n
|
||||
- z.o + z'.o = #1 twice
|
||||
-z.s - z.d + z'.s + z'.d = z and z' have same tzinfo
|
||||
z'.d - z.d
|
||||
|
||||
So z' is UTC-equivalent to x iff z'.d = z.d at this point. If they are equal,
|
||||
we've found the UTC-equivalent so are done. In fact, we stop with [7] and
|
||||
return z', not bothering to compute z'.d.
|
||||
|
||||
How could z.d and z'd differ? z' = z + z.d [7], so merely moving z' by
|
||||
a dst() offset, and starting *from* a time already in DST (we know z.d != 0),
|
||||
would have to change the result dst() returns: we start in DST, and moving
|
||||
a little further into it takes us out of DST.
|
||||
|
||||
There isn't a sane case where this can happen. The closest it gets is at
|
||||
the end of DST, where there's an hour in UTC with no spelling in a hybrid
|
||||
tzinfo class. In US Eastern, that's 5:MM UTC = 0:MM EST = 1:MM EDT. During
|
||||
that hour, on an Eastern clock 1:MM is taken as being in standard time (6:MM
|
||||
UTC) because the docs insist on that, but 0:MM is taken as being in daylight
|
||||
time (4:MM UTC). There is no local time mapping to 5:MM UTC. The local
|
||||
clock jumps from 1:59 back to 1:00 again, and repeats the 1:MM hour in
|
||||
standard time. Since that's what the local clock *does*, we want to map both
|
||||
UTC hours 5:MM and 6:MM to 1:MM Eastern. The result is ambiguous
|
||||
in local time, but so it goes -- it's the way the local clock works.
|
||||
|
||||
When x = 5:MM UTC is the input to this algorithm, x.o=0, y.o=-5 and y.d=0,
|
||||
so z=0:MM. z.d=60 (minutes) then, so [5] doesn't hold and we keep going.
|
||||
z' = z + z.d = 1:MM then, and z'.d=0, and z'.d - z.d = -60 != 0 so [8]
|
||||
(correctly) concludes that z' is not UTC-equivalent to x.
|
||||
|
||||
Because we know z.d said z was in daylight time (else [5] would have held and
|
||||
we would have stopped then), and we know z.d != z'.d (else [8] would have held
|
||||
and we have stopped then), and there are only 2 possible values dst() can
|
||||
return in Eastern, it follows that z'.d must be 0 (which it is in the example,
|
||||
but the reasoning doesn't depend on the example -- it depends on there being
|
||||
two possible dst() outcomes, one zero and the other non-zero). Therefore
|
||||
z' must be in standard time, and is the spelling we want in this case.
|
||||
|
||||
Note again that z' is not UTC-equivalent as far as the hybrid tzinfo class is
|
||||
concerned (because it takes z' as being in standard time rather than the
|
||||
daylight time we intend here), but returning it gives the real-life "local
|
||||
clock repeats an hour" behavior when mapping the "unspellable" UTC hour into
|
||||
tz.
|
||||
|
||||
When the input is 6:MM, z=1:MM and z.d=0, and we stop at once, again with
|
||||
the 1:MM standard time spelling we want.
|
||||
|
||||
So how can this break? One of the assumptions must be violated. Two
|
||||
possibilities:
|
||||
|
||||
1) [2] effectively says that y.s is invariant across all y belong to a given
|
||||
time zone. This isn't true if, for political reasons or continental drift,
|
||||
a region decides to change its base offset from UTC.
|
||||
|
||||
2) There may be versions of "double daylight" time where the tail end of
|
||||
the analysis gives up a step too early. I haven't thought about that
|
||||
enough to say.
|
||||
|
||||
In any case, it's clear that the default fromutc() is strong enough to handle
|
||||
"almost all" time zones: so long as the standard offset is invariant, it
|
||||
doesn't matter if daylight time transition points change from year to year, or
|
||||
if daylight time is skipped in some years; it doesn't matter how large or
|
||||
small dst() may get within its bounds; and it doesn't even matter if some
|
||||
perverse time zone returns a negative dst()). So a breaking case must be
|
||||
pretty bizarre, and a tzinfo subclass can override fromutc() if it is.
|
||||
"""
|
||||
try:
|
||||
from _datetime import *
|
||||
except ImportError:
|
||||
|
|
|
@ -1317,24 +1317,22 @@ RouteComponentMarker = ValueTerminal('@', 'route-component-marker')
|
|||
# Parser
|
||||
#
|
||||
|
||||
"""Parse strings according to RFC822/2047/2822/5322 rules.
|
||||
|
||||
This is a stateless parser. Each get_XXX function accepts a string and
|
||||
returns either a Terminal or a TokenList representing the RFC object named
|
||||
by the method and a string containing the remaining unparsed characters
|
||||
from the input. Thus a parser method consumes the next syntactic construct
|
||||
of a given type and returns a token representing the construct plus the
|
||||
unparsed remainder of the input string.
|
||||
|
||||
For example, if the first element of a structured header is a 'phrase',
|
||||
then:
|
||||
|
||||
phrase, value = get_phrase(value)
|
||||
|
||||
returns the complete phrase from the start of the string value, plus any
|
||||
characters left in the string after the phrase is removed.
|
||||
|
||||
"""
|
||||
# Parse strings according to RFC822/2047/2822/5322 rules.
|
||||
#
|
||||
# This is a stateless parser. Each get_XXX function accepts a string and
|
||||
# returns either a Terminal or a TokenList representing the RFC object named
|
||||
# by the method and a string containing the remaining unparsed characters
|
||||
# from the input. Thus a parser method consumes the next syntactic construct
|
||||
# of a given type and returns a token representing the construct plus the
|
||||
# unparsed remainder of the input string.
|
||||
#
|
||||
# For example, if the first element of a structured header is a 'phrase',
|
||||
# then:
|
||||
#
|
||||
# phrase, value = get_phrase(value)
|
||||
#
|
||||
# returns the complete phrase from the start of the string value, plus any
|
||||
# characters left in the string after the phrase is removed.
|
||||
|
||||
_wsp_splitter = re.compile(r'([{}]+)'.format(''.join(WSP))).split
|
||||
_non_atom_end_matcher = re.compile(r"[^{}]+".format(
|
||||
|
|
|
@ -299,101 +299,100 @@ def _call_with_frames_removed(f, *args, **kwds):
|
|||
|
||||
# Finder/loader utility code ###############################################
|
||||
|
||||
"""Magic word to reject .pyc files generated by other Python versions.
|
||||
It should change for each incompatible change to the bytecode.
|
||||
# Magic word to reject .pyc files generated by other Python versions.
|
||||
# It should change for each incompatible change to the bytecode.
|
||||
#
|
||||
# The value of CR and LF is incorporated so if you ever read or write
|
||||
# a .pyc file in text mode the magic number will be wrong; also, the
|
||||
# Apple MPW compiler swaps their values, botching string constants.
|
||||
#
|
||||
# The magic numbers must be spaced apart at least 2 values, as the
|
||||
# -U interpeter flag will cause MAGIC+1 being used. They have been
|
||||
# odd numbers for some time now.
|
||||
#
|
||||
# There were a variety of old schemes for setting the magic number.
|
||||
# The current working scheme is to increment the previous value by
|
||||
# 10.
|
||||
#
|
||||
# Starting with the adoption of PEP 3147 in Python 3.2, every bump in magic
|
||||
# number also includes a new "magic tag", i.e. a human readable string used
|
||||
# to represent the magic number in __pycache__ directories. When you change
|
||||
# the magic number, you must also set a new unique magic tag. Generally this
|
||||
# can be named after the Python major version of the magic number bump, but
|
||||
# it can really be anything, as long as it's different than anything else
|
||||
# that's come before. The tags are included in the following table, starting
|
||||
# with Python 3.2a0.
|
||||
#
|
||||
# Known values:
|
||||
# Python 1.5: 20121
|
||||
# Python 1.5.1: 20121
|
||||
# Python 1.5.2: 20121
|
||||
# Python 1.6: 50428
|
||||
# Python 2.0: 50823
|
||||
# Python 2.0.1: 50823
|
||||
# Python 2.1: 60202
|
||||
# Python 2.1.1: 60202
|
||||
# Python 2.1.2: 60202
|
||||
# Python 2.2: 60717
|
||||
# Python 2.3a0: 62011
|
||||
# Python 2.3a0: 62021
|
||||
# Python 2.3a0: 62011 (!)
|
||||
# Python 2.4a0: 62041
|
||||
# Python 2.4a3: 62051
|
||||
# Python 2.4b1: 62061
|
||||
# Python 2.5a0: 62071
|
||||
# Python 2.5a0: 62081 (ast-branch)
|
||||
# Python 2.5a0: 62091 (with)
|
||||
# Python 2.5a0: 62092 (changed WITH_CLEANUP opcode)
|
||||
# Python 2.5b3: 62101 (fix wrong code: for x, in ...)
|
||||
# Python 2.5b3: 62111 (fix wrong code: x += yield)
|
||||
# Python 2.5c1: 62121 (fix wrong lnotab with for loops and
|
||||
# storing constants that should have been removed)
|
||||
# Python 2.5c2: 62131 (fix wrong code: for x, in ... in listcomp/genexp)
|
||||
# Python 2.6a0: 62151 (peephole optimizations and STORE_MAP opcode)
|
||||
# Python 2.6a1: 62161 (WITH_CLEANUP optimization)
|
||||
# Python 2.7a0: 62171 (optimize list comprehensions/change LIST_APPEND)
|
||||
# Python 2.7a0: 62181 (optimize conditional branches:
|
||||
# introduce POP_JUMP_IF_FALSE and POP_JUMP_IF_TRUE)
|
||||
# Python 2.7a0 62191 (introduce SETUP_WITH)
|
||||
# Python 2.7a0 62201 (introduce BUILD_SET)
|
||||
# Python 2.7a0 62211 (introduce MAP_ADD and SET_ADD)
|
||||
# Python 3000: 3000
|
||||
# 3010 (removed UNARY_CONVERT)
|
||||
# 3020 (added BUILD_SET)
|
||||
# 3030 (added keyword-only parameters)
|
||||
# 3040 (added signature annotations)
|
||||
# 3050 (print becomes a function)
|
||||
# 3060 (PEP 3115 metaclass syntax)
|
||||
# 3061 (string literals become unicode)
|
||||
# 3071 (PEP 3109 raise changes)
|
||||
# 3081 (PEP 3137 make __file__ and __name__ unicode)
|
||||
# 3091 (kill str8 interning)
|
||||
# 3101 (merge from 2.6a0, see 62151)
|
||||
# 3103 (__file__ points to source file)
|
||||
# Python 3.0a4: 3111 (WITH_CLEANUP optimization).
|
||||
# Python 3.0a5: 3131 (lexical exception stacking, including POP_EXCEPT)
|
||||
# Python 3.1a0: 3141 (optimize list, set and dict comprehensions:
|
||||
# change LIST_APPEND and SET_ADD, add MAP_ADD)
|
||||
# Python 3.1a0: 3151 (optimize conditional branches:
|
||||
# introduce POP_JUMP_IF_FALSE and POP_JUMP_IF_TRUE)
|
||||
# Python 3.2a0: 3160 (add SETUP_WITH)
|
||||
# tag: cpython-32
|
||||
# Python 3.2a1: 3170 (add DUP_TOP_TWO, remove DUP_TOPX and ROT_FOUR)
|
||||
# tag: cpython-32
|
||||
# Python 3.2a2 3180 (add DELETE_DEREF)
|
||||
# Python 3.3a0 3190 __class__ super closure changed
|
||||
# Python 3.3a0 3200 (__qualname__ added)
|
||||
# 3210 (added size modulo 2**32 to the pyc header)
|
||||
# Python 3.3a1 3220 (changed PEP 380 implementation)
|
||||
# Python 3.3a4 3230 (revert changes to implicit __class__ closure)
|
||||
# Python 3.4a1 3250 (evaluate positional default arguments before
|
||||
# keyword-only defaults)
|
||||
#
|
||||
# MAGIC must change whenever the bytecode emitted by the compiler may no
|
||||
# longer be understood by older implementations of the eval loop (usually
|
||||
# due to the addition of new opcodes).
|
||||
|
||||
The value of CR and LF is incorporated so if you ever read or write
|
||||
a .pyc file in text mode the magic number will be wrong; also, the
|
||||
Apple MPW compiler swaps their values, botching string constants.
|
||||
|
||||
The magic numbers must be spaced apart at least 2 values, as the
|
||||
-U interpeter flag will cause MAGIC+1 being used. They have been
|
||||
odd numbers for some time now.
|
||||
|
||||
There were a variety of old schemes for setting the magic number.
|
||||
The current working scheme is to increment the previous value by
|
||||
10.
|
||||
|
||||
Starting with the adoption of PEP 3147 in Python 3.2, every bump in magic
|
||||
number also includes a new "magic tag", i.e. a human readable string used
|
||||
to represent the magic number in __pycache__ directories. When you change
|
||||
the magic number, you must also set a new unique magic tag. Generally this
|
||||
can be named after the Python major version of the magic number bump, but
|
||||
it can really be anything, as long as it's different than anything else
|
||||
that's come before. The tags are included in the following table, starting
|
||||
with Python 3.2a0.
|
||||
|
||||
Known values:
|
||||
Python 1.5: 20121
|
||||
Python 1.5.1: 20121
|
||||
Python 1.5.2: 20121
|
||||
Python 1.6: 50428
|
||||
Python 2.0: 50823
|
||||
Python 2.0.1: 50823
|
||||
Python 2.1: 60202
|
||||
Python 2.1.1: 60202
|
||||
Python 2.1.2: 60202
|
||||
Python 2.2: 60717
|
||||
Python 2.3a0: 62011
|
||||
Python 2.3a0: 62021
|
||||
Python 2.3a0: 62011 (!)
|
||||
Python 2.4a0: 62041
|
||||
Python 2.4a3: 62051
|
||||
Python 2.4b1: 62061
|
||||
Python 2.5a0: 62071
|
||||
Python 2.5a0: 62081 (ast-branch)
|
||||
Python 2.5a0: 62091 (with)
|
||||
Python 2.5a0: 62092 (changed WITH_CLEANUP opcode)
|
||||
Python 2.5b3: 62101 (fix wrong code: for x, in ...)
|
||||
Python 2.5b3: 62111 (fix wrong code: x += yield)
|
||||
Python 2.5c1: 62121 (fix wrong lnotab with for loops and
|
||||
storing constants that should have been removed)
|
||||
Python 2.5c2: 62131 (fix wrong code: for x, in ... in listcomp/genexp)
|
||||
Python 2.6a0: 62151 (peephole optimizations and STORE_MAP opcode)
|
||||
Python 2.6a1: 62161 (WITH_CLEANUP optimization)
|
||||
Python 2.7a0: 62171 (optimize list comprehensions/change LIST_APPEND)
|
||||
Python 2.7a0: 62181 (optimize conditional branches:
|
||||
introduce POP_JUMP_IF_FALSE and POP_JUMP_IF_TRUE)
|
||||
Python 2.7a0 62191 (introduce SETUP_WITH)
|
||||
Python 2.7a0 62201 (introduce BUILD_SET)
|
||||
Python 2.7a0 62211 (introduce MAP_ADD and SET_ADD)
|
||||
Python 3000: 3000
|
||||
3010 (removed UNARY_CONVERT)
|
||||
3020 (added BUILD_SET)
|
||||
3030 (added keyword-only parameters)
|
||||
3040 (added signature annotations)
|
||||
3050 (print becomes a function)
|
||||
3060 (PEP 3115 metaclass syntax)
|
||||
3061 (string literals become unicode)
|
||||
3071 (PEP 3109 raise changes)
|
||||
3081 (PEP 3137 make __file__ and __name__ unicode)
|
||||
3091 (kill str8 interning)
|
||||
3101 (merge from 2.6a0, see 62151)
|
||||
3103 (__file__ points to source file)
|
||||
Python 3.0a4: 3111 (WITH_CLEANUP optimization).
|
||||
Python 3.0a5: 3131 (lexical exception stacking, including POP_EXCEPT)
|
||||
Python 3.1a0: 3141 (optimize list, set and dict comprehensions:
|
||||
change LIST_APPEND and SET_ADD, add MAP_ADD)
|
||||
Python 3.1a0: 3151 (optimize conditional branches:
|
||||
introduce POP_JUMP_IF_FALSE and POP_JUMP_IF_TRUE)
|
||||
Python 3.2a0: 3160 (add SETUP_WITH)
|
||||
tag: cpython-32
|
||||
Python 3.2a1: 3170 (add DUP_TOP_TWO, remove DUP_TOPX and ROT_FOUR)
|
||||
tag: cpython-32
|
||||
Python 3.2a2 3180 (add DELETE_DEREF)
|
||||
Python 3.3a0 3190 __class__ super closure changed
|
||||
Python 3.3a0 3200 (__qualname__ added)
|
||||
3210 (added size modulo 2**32 to the pyc header)
|
||||
Python 3.3a1 3220 (changed PEP 380 implementation)
|
||||
Python 3.3a4 3230 (revert changes to implicit __class__ closure)
|
||||
Python 3.4a1 3250 (evaluate positional default arguments before
|
||||
keyword-only defaults)
|
||||
|
||||
MAGIC must change whenever the bytecode emitted by the compiler may no
|
||||
longer be understood by older implementations of the eval loop (usually
|
||||
due to the addition of new opcodes).
|
||||
|
||||
"""
|
||||
_MAGIC_BYTES = (3250).to_bytes(2, 'little') + b'\r\n'
|
||||
_RAW_MAGIC_NUMBER = int.from_bytes(_MAGIC_BYTES, 'little')
|
||||
|
||||
|
|
|
@ -825,10 +825,9 @@ class SysLogHandler(logging.Handler):
|
|||
msg = self.ident + msg
|
||||
if self.append_nul:
|
||||
msg += '\000'
|
||||
"""
|
||||
We need to convert record level to lowercase, maybe this will
|
||||
change in the future.
|
||||
"""
|
||||
|
||||
# We need to convert record level to lowercase, maybe this will
|
||||
# change in the future.
|
||||
prio = '<%d>' % self.encodePriority(self.facility,
|
||||
self.mapPriority(record.levelname))
|
||||
prio = prio.encode('utf-8')
|
||||
|
|
|
@ -33,119 +33,118 @@ bytes_types = pickle.bytes_types
|
|||
# by a later GET.
|
||||
|
||||
|
||||
"""
|
||||
"A pickle" is a program for a virtual pickle machine (PM, but more accurately
|
||||
called an unpickling machine). It's a sequence of opcodes, interpreted by the
|
||||
PM, building an arbitrarily complex Python object.
|
||||
# "A pickle" is a program for a virtual pickle machine (PM, but more accurately
|
||||
# called an unpickling machine). It's a sequence of opcodes, interpreted by the
|
||||
# PM, building an arbitrarily complex Python object.
|
||||
#
|
||||
# For the most part, the PM is very simple: there are no looping, testing, or
|
||||
# conditional instructions, no arithmetic and no function calls. Opcodes are
|
||||
# executed once each, from first to last, until a STOP opcode is reached.
|
||||
#
|
||||
# The PM has two data areas, "the stack" and "the memo".
|
||||
#
|
||||
# Many opcodes push Python objects onto the stack; e.g., INT pushes a Python
|
||||
# integer object on the stack, whose value is gotten from a decimal string
|
||||
# literal immediately following the INT opcode in the pickle bytestream. Other
|
||||
# opcodes take Python objects off the stack. The result of unpickling is
|
||||
# whatever object is left on the stack when the final STOP opcode is executed.
|
||||
#
|
||||
# The memo is simply an array of objects, or it can be implemented as a dict
|
||||
# mapping little integers to objects. The memo serves as the PM's "long term
|
||||
# memory", and the little integers indexing the memo are akin to variable
|
||||
# names. Some opcodes pop a stack object into the memo at a given index,
|
||||
# and others push a memo object at a given index onto the stack again.
|
||||
#
|
||||
# At heart, that's all the PM has. Subtleties arise for these reasons:
|
||||
#
|
||||
# + Object identity. Objects can be arbitrarily complex, and subobjects
|
||||
# may be shared (for example, the list [a, a] refers to the same object a
|
||||
# twice). It can be vital that unpickling recreate an isomorphic object
|
||||
# graph, faithfully reproducing sharing.
|
||||
#
|
||||
# + Recursive objects. For example, after "L = []; L.append(L)", L is a
|
||||
# list, and L[0] is the same list. This is related to the object identity
|
||||
# point, and some sequences of pickle opcodes are subtle in order to
|
||||
# get the right result in all cases.
|
||||
#
|
||||
# + Things pickle doesn't know everything about. Examples of things pickle
|
||||
# does know everything about are Python's builtin scalar and container
|
||||
# types, like ints and tuples. They generally have opcodes dedicated to
|
||||
# them. For things like module references and instances of user-defined
|
||||
# classes, pickle's knowledge is limited. Historically, many enhancements
|
||||
# have been made to the pickle protocol in order to do a better (faster,
|
||||
# and/or more compact) job on those.
|
||||
#
|
||||
# + Backward compatibility and micro-optimization. As explained below,
|
||||
# pickle opcodes never go away, not even when better ways to do a thing
|
||||
# get invented. The repertoire of the PM just keeps growing over time.
|
||||
# For example, protocol 0 had two opcodes for building Python integers (INT
|
||||
# and LONG), protocol 1 added three more for more-efficient pickling of short
|
||||
# integers, and protocol 2 added two more for more-efficient pickling of
|
||||
# long integers (before protocol 2, the only ways to pickle a Python long
|
||||
# took time quadratic in the number of digits, for both pickling and
|
||||
# unpickling). "Opcode bloat" isn't so much a subtlety as a source of
|
||||
# wearying complication.
|
||||
#
|
||||
#
|
||||
# Pickle protocols:
|
||||
#
|
||||
# For compatibility, the meaning of a pickle opcode never changes. Instead new
|
||||
# pickle opcodes get added, and each version's unpickler can handle all the
|
||||
# pickle opcodes in all protocol versions to date. So old pickles continue to
|
||||
# be readable forever. The pickler can generally be told to restrict itself to
|
||||
# the subset of opcodes available under previous protocol versions too, so that
|
||||
# users can create pickles under the current version readable by older
|
||||
# versions. However, a pickle does not contain its version number embedded
|
||||
# within it. If an older unpickler tries to read a pickle using a later
|
||||
# protocol, the result is most likely an exception due to seeing an unknown (in
|
||||
# the older unpickler) opcode.
|
||||
#
|
||||
# The original pickle used what's now called "protocol 0", and what was called
|
||||
# "text mode" before Python 2.3. The entire pickle bytestream is made up of
|
||||
# printable 7-bit ASCII characters, plus the newline character, in protocol 0.
|
||||
# That's why it was called text mode. Protocol 0 is small and elegant, but
|
||||
# sometimes painfully inefficient.
|
||||
#
|
||||
# The second major set of additions is now called "protocol 1", and was called
|
||||
# "binary mode" before Python 2.3. This added many opcodes with arguments
|
||||
# consisting of arbitrary bytes, including NUL bytes and unprintable "high bit"
|
||||
# bytes. Binary mode pickles can be substantially smaller than equivalent
|
||||
# text mode pickles, and sometimes faster too; e.g., BININT represents a 4-byte
|
||||
# int as 4 bytes following the opcode, which is cheaper to unpickle than the
|
||||
# (perhaps) 11-character decimal string attached to INT. Protocol 1 also added
|
||||
# a number of opcodes that operate on many stack elements at once (like APPENDS
|
||||
# and SETITEMS), and "shortcut" opcodes (like EMPTY_DICT and EMPTY_TUPLE).
|
||||
#
|
||||
# The third major set of additions came in Python 2.3, and is called "protocol
|
||||
# 2". This added:
|
||||
#
|
||||
# - A better way to pickle instances of new-style classes (NEWOBJ).
|
||||
#
|
||||
# - A way for a pickle to identify its protocol (PROTO).
|
||||
#
|
||||
# - Time- and space- efficient pickling of long ints (LONG{1,4}).
|
||||
#
|
||||
# - Shortcuts for small tuples (TUPLE{1,2,3}}.
|
||||
#
|
||||
# - Dedicated opcodes for bools (NEWTRUE, NEWFALSE).
|
||||
#
|
||||
# - The "extension registry", a vector of popular objects that can be pushed
|
||||
# efficiently by index (EXT{1,2,4}). This is akin to the memo and GET, but
|
||||
# the registry contents are predefined (there's nothing akin to the memo's
|
||||
# PUT).
|
||||
#
|
||||
# Another independent change with Python 2.3 is the abandonment of any
|
||||
# pretense that it might be safe to load pickles received from untrusted
|
||||
# parties -- no sufficient security analysis has been done to guarantee
|
||||
# this and there isn't a use case that warrants the expense of such an
|
||||
# analysis.
|
||||
#
|
||||
# To this end, all tests for __safe_for_unpickling__ or for
|
||||
# copyreg.safe_constructors are removed from the unpickling code.
|
||||
# References to these variables in the descriptions below are to be seen
|
||||
# as describing unpickling in Python 2.2 and before.
|
||||
|
||||
For the most part, the PM is very simple: there are no looping, testing, or
|
||||
conditional instructions, no arithmetic and no function calls. Opcodes are
|
||||
executed once each, from first to last, until a STOP opcode is reached.
|
||||
|
||||
The PM has two data areas, "the stack" and "the memo".
|
||||
|
||||
Many opcodes push Python objects onto the stack; e.g., INT pushes a Python
|
||||
integer object on the stack, whose value is gotten from a decimal string
|
||||
literal immediately following the INT opcode in the pickle bytestream. Other
|
||||
opcodes take Python objects off the stack. The result of unpickling is
|
||||
whatever object is left on the stack when the final STOP opcode is executed.
|
||||
|
||||
The memo is simply an array of objects, or it can be implemented as a dict
|
||||
mapping little integers to objects. The memo serves as the PM's "long term
|
||||
memory", and the little integers indexing the memo are akin to variable
|
||||
names. Some opcodes pop a stack object into the memo at a given index,
|
||||
and others push a memo object at a given index onto the stack again.
|
||||
|
||||
At heart, that's all the PM has. Subtleties arise for these reasons:
|
||||
|
||||
+ Object identity. Objects can be arbitrarily complex, and subobjects
|
||||
may be shared (for example, the list [a, a] refers to the same object a
|
||||
twice). It can be vital that unpickling recreate an isomorphic object
|
||||
graph, faithfully reproducing sharing.
|
||||
|
||||
+ Recursive objects. For example, after "L = []; L.append(L)", L is a
|
||||
list, and L[0] is the same list. This is related to the object identity
|
||||
point, and some sequences of pickle opcodes are subtle in order to
|
||||
get the right result in all cases.
|
||||
|
||||
+ Things pickle doesn't know everything about. Examples of things pickle
|
||||
does know everything about are Python's builtin scalar and container
|
||||
types, like ints and tuples. They generally have opcodes dedicated to
|
||||
them. For things like module references and instances of user-defined
|
||||
classes, pickle's knowledge is limited. Historically, many enhancements
|
||||
have been made to the pickle protocol in order to do a better (faster,
|
||||
and/or more compact) job on those.
|
||||
|
||||
+ Backward compatibility and micro-optimization. As explained below,
|
||||
pickle opcodes never go away, not even when better ways to do a thing
|
||||
get invented. The repertoire of the PM just keeps growing over time.
|
||||
For example, protocol 0 had two opcodes for building Python integers (INT
|
||||
and LONG), protocol 1 added three more for more-efficient pickling of short
|
||||
integers, and protocol 2 added two more for more-efficient pickling of
|
||||
long integers (before protocol 2, the only ways to pickle a Python long
|
||||
took time quadratic in the number of digits, for both pickling and
|
||||
unpickling). "Opcode bloat" isn't so much a subtlety as a source of
|
||||
wearying complication.
|
||||
|
||||
|
||||
Pickle protocols:
|
||||
|
||||
For compatibility, the meaning of a pickle opcode never changes. Instead new
|
||||
pickle opcodes get added, and each version's unpickler can handle all the
|
||||
pickle opcodes in all protocol versions to date. So old pickles continue to
|
||||
be readable forever. The pickler can generally be told to restrict itself to
|
||||
the subset of opcodes available under previous protocol versions too, so that
|
||||
users can create pickles under the current version readable by older
|
||||
versions. However, a pickle does not contain its version number embedded
|
||||
within it. If an older unpickler tries to read a pickle using a later
|
||||
protocol, the result is most likely an exception due to seeing an unknown (in
|
||||
the older unpickler) opcode.
|
||||
|
||||
The original pickle used what's now called "protocol 0", and what was called
|
||||
"text mode" before Python 2.3. The entire pickle bytestream is made up of
|
||||
printable 7-bit ASCII characters, plus the newline character, in protocol 0.
|
||||
That's why it was called text mode. Protocol 0 is small and elegant, but
|
||||
sometimes painfully inefficient.
|
||||
|
||||
The second major set of additions is now called "protocol 1", and was called
|
||||
"binary mode" before Python 2.3. This added many opcodes with arguments
|
||||
consisting of arbitrary bytes, including NUL bytes and unprintable "high bit"
|
||||
bytes. Binary mode pickles can be substantially smaller than equivalent
|
||||
text mode pickles, and sometimes faster too; e.g., BININT represents a 4-byte
|
||||
int as 4 bytes following the opcode, which is cheaper to unpickle than the
|
||||
(perhaps) 11-character decimal string attached to INT. Protocol 1 also added
|
||||
a number of opcodes that operate on many stack elements at once (like APPENDS
|
||||
and SETITEMS), and "shortcut" opcodes (like EMPTY_DICT and EMPTY_TUPLE).
|
||||
|
||||
The third major set of additions came in Python 2.3, and is called "protocol
|
||||
2". This added:
|
||||
|
||||
- A better way to pickle instances of new-style classes (NEWOBJ).
|
||||
|
||||
- A way for a pickle to identify its protocol (PROTO).
|
||||
|
||||
- Time- and space- efficient pickling of long ints (LONG{1,4}).
|
||||
|
||||
- Shortcuts for small tuples (TUPLE{1,2,3}}.
|
||||
|
||||
- Dedicated opcodes for bools (NEWTRUE, NEWFALSE).
|
||||
|
||||
- The "extension registry", a vector of popular objects that can be pushed
|
||||
efficiently by index (EXT{1,2,4}). This is akin to the memo and GET, but
|
||||
the registry contents are predefined (there's nothing akin to the memo's
|
||||
PUT).
|
||||
|
||||
Another independent change with Python 2.3 is the abandonment of any
|
||||
pretense that it might be safe to load pickles received from untrusted
|
||||
parties -- no sufficient security analysis has been done to guarantee
|
||||
this and there isn't a use case that warrants the expense of such an
|
||||
analysis.
|
||||
|
||||
To this end, all tests for __safe_for_unpickling__ or for
|
||||
copyreg.safe_constructors are removed from the unpickling code.
|
||||
References to these variables in the descriptions below are to be seen
|
||||
as describing unpickling in Python 2.2 and before.
|
||||
"""
|
||||
|
||||
# Meta-rule: Descriptions are stored in instances of descriptor objects,
|
||||
# with plain constructors. No meta-language is defined from which
|
||||
|
|
|
@ -2,37 +2,35 @@ import unittest
|
|||
from test import support
|
||||
from _testcapi import getargs_keywords, getargs_keyword_only
|
||||
|
||||
"""
|
||||
> How about the following counterproposal. This also changes some of
|
||||
> the other format codes to be a little more regular.
|
||||
>
|
||||
> Code C type Range check
|
||||
>
|
||||
> b unsigned char 0..UCHAR_MAX
|
||||
> h signed short SHRT_MIN..SHRT_MAX
|
||||
> B unsigned char none **
|
||||
> H unsigned short none **
|
||||
> k * unsigned long none
|
||||
> I * unsigned int 0..UINT_MAX
|
||||
|
||||
|
||||
> i int INT_MIN..INT_MAX
|
||||
> l long LONG_MIN..LONG_MAX
|
||||
|
||||
> K * unsigned long long none
|
||||
> L long long LLONG_MIN..LLONG_MAX
|
||||
|
||||
> Notes:
|
||||
>
|
||||
> * New format codes.
|
||||
>
|
||||
> ** Changed from previous "range-and-a-half" to "none"; the
|
||||
> range-and-a-half checking wasn't particularly useful.
|
||||
|
||||
Plus a C API or two, e.g. PyInt_AsLongMask() ->
|
||||
unsigned long and PyInt_AsLongLongMask() -> unsigned
|
||||
long long (if that exists).
|
||||
"""
|
||||
# > How about the following counterproposal. This also changes some of
|
||||
# > the other format codes to be a little more regular.
|
||||
# >
|
||||
# > Code C type Range check
|
||||
# >
|
||||
# > b unsigned char 0..UCHAR_MAX
|
||||
# > h signed short SHRT_MIN..SHRT_MAX
|
||||
# > B unsigned char none **
|
||||
# > H unsigned short none **
|
||||
# > k * unsigned long none
|
||||
# > I * unsigned int 0..UINT_MAX
|
||||
#
|
||||
#
|
||||
# > i int INT_MIN..INT_MAX
|
||||
# > l long LONG_MIN..LONG_MAX
|
||||
#
|
||||
# > K * unsigned long long none
|
||||
# > L long long LLONG_MIN..LLONG_MAX
|
||||
#
|
||||
# > Notes:
|
||||
# >
|
||||
# > * New format codes.
|
||||
# >
|
||||
# > ** Changed from previous "range-and-a-half" to "none"; the
|
||||
# > range-and-a-half checking wasn't particularly useful.
|
||||
#
|
||||
# Plus a C API or two, e.g. PyInt_AsLongMask() ->
|
||||
# unsigned long and PyInt_AsLongLongMask() -> unsigned
|
||||
# long long (if that exists).
|
||||
|
||||
LARGE = 0x7FFFFFFF
|
||||
VERY_LARGE = 0xFF0000121212121212121242
|
||||
|
|
|
@ -1337,8 +1337,8 @@ def XMLID(text, parser=None):
|
|||
ids[id] = elem
|
||||
return tree, ids
|
||||
|
||||
# Parse XML document from string constant. Alias for XML().
|
||||
fromstring = XML
|
||||
"""Parse XML document from string constant. Alias for XML()."""
|
||||
|
||||
def fromstringlist(sequence, parser=None):
|
||||
"""Parse XML document from sequence of string fragments.
|
||||
|
|
|
@ -52,8 +52,8 @@ verbose = False
|
|||
recurse = False
|
||||
dryrun = False
|
||||
makebackup = True
|
||||
# A specified newline to be used in the output (set by --newline option)
|
||||
spec_newline = None
|
||||
"""A specified newline to be used in the output (set by --newline option)"""
|
||||
|
||||
|
||||
def usage(msg=None):
|
||||
|
|
Loading…
Reference in New Issue