Issue #17516: use comment syntax for comments, instead of multiline string
This commit is contained in:
parent
1f8898a591
commit
765531d2d0
|
@ -34,17 +34,15 @@ from _ctypes import FUNCFLAG_CDECL as _FUNCFLAG_CDECL, \
|
||||||
FUNCFLAG_USE_ERRNO as _FUNCFLAG_USE_ERRNO, \
|
FUNCFLAG_USE_ERRNO as _FUNCFLAG_USE_ERRNO, \
|
||||||
FUNCFLAG_USE_LASTERROR as _FUNCFLAG_USE_LASTERROR
|
FUNCFLAG_USE_LASTERROR as _FUNCFLAG_USE_LASTERROR
|
||||||
|
|
||||||
"""
|
# WINOLEAPI -> HRESULT
|
||||||
WINOLEAPI -> HRESULT
|
# WINOLEAPI_(type)
|
||||||
WINOLEAPI_(type)
|
#
|
||||||
|
# STDMETHODCALLTYPE
|
||||||
STDMETHODCALLTYPE
|
#
|
||||||
|
# STDMETHOD(name)
|
||||||
STDMETHOD(name)
|
# STDMETHOD_(type, name)
|
||||||
STDMETHOD_(type, name)
|
#
|
||||||
|
# STDAPICALLTYPE
|
||||||
STDAPICALLTYPE
|
|
||||||
"""
|
|
||||||
|
|
||||||
def create_string_buffer(init, size=None):
|
def create_string_buffer(init, size=None):
|
||||||
"""create_string_buffer(aBytes) -> character array
|
"""create_string_buffer(aBytes) -> character array
|
||||||
|
|
|
@ -5,17 +5,14 @@ from sys import getrefcount as grc
|
||||||
|
|
||||||
# XXX This test must be reviewed for correctness!!!
|
# XXX This test must be reviewed for correctness!!!
|
||||||
|
|
||||||
"""
|
# ctypes' types are container types.
|
||||||
ctypes' types are container types.
|
#
|
||||||
|
# They have an internal memory block, which only consists of some bytes,
|
||||||
They have an internal memory block, which only consists of some bytes,
|
# but it has to keep references to other objects as well. This is not
|
||||||
but it has to keep references to other objects as well. This is not
|
# really needed for trivial C types like int or char, but it is important
|
||||||
really needed for trivial C types like int or char, but it is important
|
# for aggregate types like strings or pointers in particular.
|
||||||
for aggregate types like strings or pointers in particular.
|
#
|
||||||
|
# What about pointers?
|
||||||
What about pointers?
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
class ObjectsTestCase(unittest.TestCase):
|
class ObjectsTestCase(unittest.TestCase):
|
||||||
def assertSame(self, a, b):
|
def assertSame(self, a, b):
|
||||||
|
|
|
@ -3,35 +3,33 @@ import sys
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
# Bob Ippolito:
|
# Bob Ippolito:
|
||||||
"""
|
#
|
||||||
Ok.. the code to find the filename for __getattr__ should look
|
# Ok.. the code to find the filename for __getattr__ should look
|
||||||
something like:
|
# something like:
|
||||||
|
#
|
||||||
import os
|
# import os
|
||||||
from macholib.dyld import dyld_find
|
# from macholib.dyld import dyld_find
|
||||||
|
#
|
||||||
def find_lib(name):
|
# def find_lib(name):
|
||||||
possible = ['lib'+name+'.dylib', name+'.dylib',
|
# possible = ['lib'+name+'.dylib', name+'.dylib',
|
||||||
name+'.framework/'+name]
|
# name+'.framework/'+name]
|
||||||
for dylib in possible:
|
# for dylib in possible:
|
||||||
try:
|
# try:
|
||||||
return os.path.realpath(dyld_find(dylib))
|
# return os.path.realpath(dyld_find(dylib))
|
||||||
except ValueError:
|
# except ValueError:
|
||||||
pass
|
# pass
|
||||||
raise ValueError, "%s not found" % (name,)
|
# raise ValueError, "%s not found" % (name,)
|
||||||
|
#
|
||||||
It'll have output like this:
|
# It'll have output like this:
|
||||||
|
#
|
||||||
>>> find_lib('pthread')
|
# >>> find_lib('pthread')
|
||||||
'/usr/lib/libSystem.B.dylib'
|
# '/usr/lib/libSystem.B.dylib'
|
||||||
>>> find_lib('z')
|
# >>> find_lib('z')
|
||||||
'/usr/lib/libz.1.dylib'
|
# '/usr/lib/libz.1.dylib'
|
||||||
>>> find_lib('IOKit')
|
# >>> find_lib('IOKit')
|
||||||
'/System/Library/Frameworks/IOKit.framework/Versions/A/IOKit'
|
# '/System/Library/Frameworks/IOKit.framework/Versions/A/IOKit'
|
||||||
|
#
|
||||||
-bob
|
# -bob
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
from ctypes.macholib.dyld import dyld_find
|
from ctypes.macholib.dyld import dyld_find
|
||||||
|
|
||||||
|
|
390
Lib/datetime.py
390
Lib/datetime.py
|
@ -1929,203 +1929,203 @@ timezone.utc = timezone._create(timedelta(0))
|
||||||
timezone.min = timezone._create(timezone._minoffset)
|
timezone.min = timezone._create(timezone._minoffset)
|
||||||
timezone.max = timezone._create(timezone._maxoffset)
|
timezone.max = timezone._create(timezone._maxoffset)
|
||||||
_EPOCH = datetime(1970, 1, 1, tzinfo=timezone.utc)
|
_EPOCH = datetime(1970, 1, 1, tzinfo=timezone.utc)
|
||||||
"""
|
|
||||||
Some time zone algebra. For a datetime x, let
|
|
||||||
x.n = x stripped of its timezone -- its naive time.
|
|
||||||
x.o = x.utcoffset(), and assuming that doesn't raise an exception or
|
|
||||||
return None
|
|
||||||
x.d = x.dst(), and assuming that doesn't raise an exception or
|
|
||||||
return None
|
|
||||||
x.s = x's standard offset, x.o - x.d
|
|
||||||
|
|
||||||
Now some derived rules, where k is a duration (timedelta).
|
# Some time zone algebra. For a datetime x, let
|
||||||
|
# x.n = x stripped of its timezone -- its naive time.
|
||||||
|
# x.o = x.utcoffset(), and assuming that doesn't raise an exception or
|
||||||
|
# return None
|
||||||
|
# x.d = x.dst(), and assuming that doesn't raise an exception or
|
||||||
|
# return None
|
||||||
|
# x.s = x's standard offset, x.o - x.d
|
||||||
|
#
|
||||||
|
# Now some derived rules, where k is a duration (timedelta).
|
||||||
|
#
|
||||||
|
# 1. x.o = x.s + x.d
|
||||||
|
# This follows from the definition of x.s.
|
||||||
|
#
|
||||||
|
# 2. If x and y have the same tzinfo member, x.s = y.s.
|
||||||
|
# This is actually a requirement, an assumption we need to make about
|
||||||
|
# sane tzinfo classes.
|
||||||
|
#
|
||||||
|
# 3. The naive UTC time corresponding to x is x.n - x.o.
|
||||||
|
# This is again a requirement for a sane tzinfo class.
|
||||||
|
#
|
||||||
|
# 4. (x+k).s = x.s
|
||||||
|
# This follows from #2, and that datimetimetz+timedelta preserves tzinfo.
|
||||||
|
#
|
||||||
|
# 5. (x+k).n = x.n + k
|
||||||
|
# Again follows from how arithmetic is defined.
|
||||||
|
#
|
||||||
|
# Now we can explain tz.fromutc(x). Let's assume it's an interesting case
|
||||||
|
# (meaning that the various tzinfo methods exist, and don't blow up or return
|
||||||
|
# None when called).
|
||||||
|
#
|
||||||
|
# The function wants to return a datetime y with timezone tz, equivalent to x.
|
||||||
|
# x is already in UTC.
|
||||||
|
#
|
||||||
|
# By #3, we want
|
||||||
|
#
|
||||||
|
# y.n - y.o = x.n [1]
|
||||||
|
#
|
||||||
|
# The algorithm starts by attaching tz to x.n, and calling that y. So
|
||||||
|
# x.n = y.n at the start. Then it wants to add a duration k to y, so that [1]
|
||||||
|
# becomes true; in effect, we want to solve [2] for k:
|
||||||
|
#
|
||||||
|
# (y+k).n - (y+k).o = x.n [2]
|
||||||
|
#
|
||||||
|
# By #1, this is the same as
|
||||||
|
#
|
||||||
|
# (y+k).n - ((y+k).s + (y+k).d) = x.n [3]
|
||||||
|
#
|
||||||
|
# By #5, (y+k).n = y.n + k, which equals x.n + k because x.n=y.n at the start.
|
||||||
|
# Substituting that into [3],
|
||||||
|
#
|
||||||
|
# x.n + k - (y+k).s - (y+k).d = x.n; the x.n terms cancel, leaving
|
||||||
|
# k - (y+k).s - (y+k).d = 0; rearranging,
|
||||||
|
# k = (y+k).s - (y+k).d; by #4, (y+k).s == y.s, so
|
||||||
|
# k = y.s - (y+k).d
|
||||||
|
#
|
||||||
|
# On the RHS, (y+k).d can't be computed directly, but y.s can be, and we
|
||||||
|
# approximate k by ignoring the (y+k).d term at first. Note that k can't be
|
||||||
|
# very large, since all offset-returning methods return a duration of magnitude
|
||||||
|
# less than 24 hours. For that reason, if y is firmly in std time, (y+k).d must
|
||||||
|
# be 0, so ignoring it has no consequence then.
|
||||||
|
#
|
||||||
|
# In any case, the new value is
|
||||||
|
#
|
||||||
|
# z = y + y.s [4]
|
||||||
|
#
|
||||||
|
# It's helpful to step back at look at [4] from a higher level: it's simply
|
||||||
|
# mapping from UTC to tz's standard time.
|
||||||
|
#
|
||||||
|
# At this point, if
|
||||||
|
#
|
||||||
|
# z.n - z.o = x.n [5]
|
||||||
|
#
|
||||||
|
# we have an equivalent time, and are almost done. The insecurity here is
|
||||||
|
# at the start of daylight time. Picture US Eastern for concreteness. The wall
|
||||||
|
# time jumps from 1:59 to 3:00, and wall hours of the form 2:MM don't make good
|
||||||
|
# sense then. The docs ask that an Eastern tzinfo class consider such a time to
|
||||||
|
# be EDT (because it's "after 2"), which is a redundant spelling of 1:MM EST
|
||||||
|
# on the day DST starts. We want to return the 1:MM EST spelling because that's
|
||||||
|
# the only spelling that makes sense on the local wall clock.
|
||||||
|
#
|
||||||
|
# In fact, if [5] holds at this point, we do have the standard-time spelling,
|
||||||
|
# but that takes a bit of proof. We first prove a stronger result. What's the
|
||||||
|
# difference between the LHS and RHS of [5]? Let
|
||||||
|
#
|
||||||
|
# diff = x.n - (z.n - z.o) [6]
|
||||||
|
#
|
||||||
|
# Now
|
||||||
|
# z.n = by [4]
|
||||||
|
# (y + y.s).n = by #5
|
||||||
|
# y.n + y.s = since y.n = x.n
|
||||||
|
# x.n + y.s = since z and y are have the same tzinfo member,
|
||||||
|
# y.s = z.s by #2
|
||||||
|
# x.n + z.s
|
||||||
|
#
|
||||||
|
# Plugging that back into [6] gives
|
||||||
|
#
|
||||||
|
# diff =
|
||||||
|
# x.n - ((x.n + z.s) - z.o) = expanding
|
||||||
|
# x.n - x.n - z.s + z.o = cancelling
|
||||||
|
# - z.s + z.o = by #2
|
||||||
|
# z.d
|
||||||
|
#
|
||||||
|
# So diff = z.d.
|
||||||
|
#
|
||||||
|
# If [5] is true now, diff = 0, so z.d = 0 too, and we have the standard-time
|
||||||
|
# spelling we wanted in the endcase described above. We're done. Contrarily,
|
||||||
|
# if z.d = 0, then we have a UTC equivalent, and are also done.
|
||||||
|
#
|
||||||
|
# If [5] is not true now, diff = z.d != 0, and z.d is the offset we need to
|
||||||
|
# add to z (in effect, z is in tz's standard time, and we need to shift the
|
||||||
|
# local clock into tz's daylight time).
|
||||||
|
#
|
||||||
|
# Let
|
||||||
|
#
|
||||||
|
# z' = z + z.d = z + diff [7]
|
||||||
|
#
|
||||||
|
# and we can again ask whether
|
||||||
|
#
|
||||||
|
# z'.n - z'.o = x.n [8]
|
||||||
|
#
|
||||||
|
# If so, we're done. If not, the tzinfo class is insane, according to the
|
||||||
|
# assumptions we've made. This also requires a bit of proof. As before, let's
|
||||||
|
# compute the difference between the LHS and RHS of [8] (and skipping some of
|
||||||
|
# the justifications for the kinds of substitutions we've done several times
|
||||||
|
# already):
|
||||||
|
#
|
||||||
|
# diff' = x.n - (z'.n - z'.o) = replacing z'.n via [7]
|
||||||
|
# x.n - (z.n + diff - z'.o) = replacing diff via [6]
|
||||||
|
# x.n - (z.n + x.n - (z.n - z.o) - z'.o) =
|
||||||
|
# x.n - z.n - x.n + z.n - z.o + z'.o = cancel x.n
|
||||||
|
# - z.n + z.n - z.o + z'.o = cancel z.n
|
||||||
|
# - z.o + z'.o = #1 twice
|
||||||
|
# -z.s - z.d + z'.s + z'.d = z and z' have same tzinfo
|
||||||
|
# z'.d - z.d
|
||||||
|
#
|
||||||
|
# So z' is UTC-equivalent to x iff z'.d = z.d at this point. If they are equal,
|
||||||
|
# we've found the UTC-equivalent so are done. In fact, we stop with [7] and
|
||||||
|
# return z', not bothering to compute z'.d.
|
||||||
|
#
|
||||||
|
# How could z.d and z'd differ? z' = z + z.d [7], so merely moving z' by
|
||||||
|
# a dst() offset, and starting *from* a time already in DST (we know z.d != 0),
|
||||||
|
# would have to change the result dst() returns: we start in DST, and moving
|
||||||
|
# a little further into it takes us out of DST.
|
||||||
|
#
|
||||||
|
# There isn't a sane case where this can happen. The closest it gets is at
|
||||||
|
# the end of DST, where there's an hour in UTC with no spelling in a hybrid
|
||||||
|
# tzinfo class. In US Eastern, that's 5:MM UTC = 0:MM EST = 1:MM EDT. During
|
||||||
|
# that hour, on an Eastern clock 1:MM is taken as being in standard time (6:MM
|
||||||
|
# UTC) because the docs insist on that, but 0:MM is taken as being in daylight
|
||||||
|
# time (4:MM UTC). There is no local time mapping to 5:MM UTC. The local
|
||||||
|
# clock jumps from 1:59 back to 1:00 again, and repeats the 1:MM hour in
|
||||||
|
# standard time. Since that's what the local clock *does*, we want to map both
|
||||||
|
# UTC hours 5:MM and 6:MM to 1:MM Eastern. The result is ambiguous
|
||||||
|
# in local time, but so it goes -- it's the way the local clock works.
|
||||||
|
#
|
||||||
|
# When x = 5:MM UTC is the input to this algorithm, x.o=0, y.o=-5 and y.d=0,
|
||||||
|
# so z=0:MM. z.d=60 (minutes) then, so [5] doesn't hold and we keep going.
|
||||||
|
# z' = z + z.d = 1:MM then, and z'.d=0, and z'.d - z.d = -60 != 0 so [8]
|
||||||
|
# (correctly) concludes that z' is not UTC-equivalent to x.
|
||||||
|
#
|
||||||
|
# Because we know z.d said z was in daylight time (else [5] would have held and
|
||||||
|
# we would have stopped then), and we know z.d != z'.d (else [8] would have held
|
||||||
|
# and we have stopped then), and there are only 2 possible values dst() can
|
||||||
|
# return in Eastern, it follows that z'.d must be 0 (which it is in the example,
|
||||||
|
# but the reasoning doesn't depend on the example -- it depends on there being
|
||||||
|
# two possible dst() outcomes, one zero and the other non-zero). Therefore
|
||||||
|
# z' must be in standard time, and is the spelling we want in this case.
|
||||||
|
#
|
||||||
|
# Note again that z' is not UTC-equivalent as far as the hybrid tzinfo class is
|
||||||
|
# concerned (because it takes z' as being in standard time rather than the
|
||||||
|
# daylight time we intend here), but returning it gives the real-life "local
|
||||||
|
# clock repeats an hour" behavior when mapping the "unspellable" UTC hour into
|
||||||
|
# tz.
|
||||||
|
#
|
||||||
|
# When the input is 6:MM, z=1:MM and z.d=0, and we stop at once, again with
|
||||||
|
# the 1:MM standard time spelling we want.
|
||||||
|
#
|
||||||
|
# So how can this break? One of the assumptions must be violated. Two
|
||||||
|
# possibilities:
|
||||||
|
#
|
||||||
|
# 1) [2] effectively says that y.s is invariant across all y belong to a given
|
||||||
|
# time zone. This isn't true if, for political reasons or continental drift,
|
||||||
|
# a region decides to change its base offset from UTC.
|
||||||
|
#
|
||||||
|
# 2) There may be versions of "double daylight" time where the tail end of
|
||||||
|
# the analysis gives up a step too early. I haven't thought about that
|
||||||
|
# enough to say.
|
||||||
|
#
|
||||||
|
# In any case, it's clear that the default fromutc() is strong enough to handle
|
||||||
|
# "almost all" time zones: so long as the standard offset is invariant, it
|
||||||
|
# doesn't matter if daylight time transition points change from year to year, or
|
||||||
|
# if daylight time is skipped in some years; it doesn't matter how large or
|
||||||
|
# small dst() may get within its bounds; and it doesn't even matter if some
|
||||||
|
# perverse time zone returns a negative dst()). So a breaking case must be
|
||||||
|
# pretty bizarre, and a tzinfo subclass can override fromutc() if it is.
|
||||||
|
|
||||||
1. x.o = x.s + x.d
|
|
||||||
This follows from the definition of x.s.
|
|
||||||
|
|
||||||
2. If x and y have the same tzinfo member, x.s = y.s.
|
|
||||||
This is actually a requirement, an assumption we need to make about
|
|
||||||
sane tzinfo classes.
|
|
||||||
|
|
||||||
3. The naive UTC time corresponding to x is x.n - x.o.
|
|
||||||
This is again a requirement for a sane tzinfo class.
|
|
||||||
|
|
||||||
4. (x+k).s = x.s
|
|
||||||
This follows from #2, and that datimetimetz+timedelta preserves tzinfo.
|
|
||||||
|
|
||||||
5. (x+k).n = x.n + k
|
|
||||||
Again follows from how arithmetic is defined.
|
|
||||||
|
|
||||||
Now we can explain tz.fromutc(x). Let's assume it's an interesting case
|
|
||||||
(meaning that the various tzinfo methods exist, and don't blow up or return
|
|
||||||
None when called).
|
|
||||||
|
|
||||||
The function wants to return a datetime y with timezone tz, equivalent to x.
|
|
||||||
x is already in UTC.
|
|
||||||
|
|
||||||
By #3, we want
|
|
||||||
|
|
||||||
y.n - y.o = x.n [1]
|
|
||||||
|
|
||||||
The algorithm starts by attaching tz to x.n, and calling that y. So
|
|
||||||
x.n = y.n at the start. Then it wants to add a duration k to y, so that [1]
|
|
||||||
becomes true; in effect, we want to solve [2] for k:
|
|
||||||
|
|
||||||
(y+k).n - (y+k).o = x.n [2]
|
|
||||||
|
|
||||||
By #1, this is the same as
|
|
||||||
|
|
||||||
(y+k).n - ((y+k).s + (y+k).d) = x.n [3]
|
|
||||||
|
|
||||||
By #5, (y+k).n = y.n + k, which equals x.n + k because x.n=y.n at the start.
|
|
||||||
Substituting that into [3],
|
|
||||||
|
|
||||||
x.n + k - (y+k).s - (y+k).d = x.n; the x.n terms cancel, leaving
|
|
||||||
k - (y+k).s - (y+k).d = 0; rearranging,
|
|
||||||
k = (y+k).s - (y+k).d; by #4, (y+k).s == y.s, so
|
|
||||||
k = y.s - (y+k).d
|
|
||||||
|
|
||||||
On the RHS, (y+k).d can't be computed directly, but y.s can be, and we
|
|
||||||
approximate k by ignoring the (y+k).d term at first. Note that k can't be
|
|
||||||
very large, since all offset-returning methods return a duration of magnitude
|
|
||||||
less than 24 hours. For that reason, if y is firmly in std time, (y+k).d must
|
|
||||||
be 0, so ignoring it has no consequence then.
|
|
||||||
|
|
||||||
In any case, the new value is
|
|
||||||
|
|
||||||
z = y + y.s [4]
|
|
||||||
|
|
||||||
It's helpful to step back at look at [4] from a higher level: it's simply
|
|
||||||
mapping from UTC to tz's standard time.
|
|
||||||
|
|
||||||
At this point, if
|
|
||||||
|
|
||||||
z.n - z.o = x.n [5]
|
|
||||||
|
|
||||||
we have an equivalent time, and are almost done. The insecurity here is
|
|
||||||
at the start of daylight time. Picture US Eastern for concreteness. The wall
|
|
||||||
time jumps from 1:59 to 3:00, and wall hours of the form 2:MM don't make good
|
|
||||||
sense then. The docs ask that an Eastern tzinfo class consider such a time to
|
|
||||||
be EDT (because it's "after 2"), which is a redundant spelling of 1:MM EST
|
|
||||||
on the day DST starts. We want to return the 1:MM EST spelling because that's
|
|
||||||
the only spelling that makes sense on the local wall clock.
|
|
||||||
|
|
||||||
In fact, if [5] holds at this point, we do have the standard-time spelling,
|
|
||||||
but that takes a bit of proof. We first prove a stronger result. What's the
|
|
||||||
difference between the LHS and RHS of [5]? Let
|
|
||||||
|
|
||||||
diff = x.n - (z.n - z.o) [6]
|
|
||||||
|
|
||||||
Now
|
|
||||||
z.n = by [4]
|
|
||||||
(y + y.s).n = by #5
|
|
||||||
y.n + y.s = since y.n = x.n
|
|
||||||
x.n + y.s = since z and y are have the same tzinfo member,
|
|
||||||
y.s = z.s by #2
|
|
||||||
x.n + z.s
|
|
||||||
|
|
||||||
Plugging that back into [6] gives
|
|
||||||
|
|
||||||
diff =
|
|
||||||
x.n - ((x.n + z.s) - z.o) = expanding
|
|
||||||
x.n - x.n - z.s + z.o = cancelling
|
|
||||||
- z.s + z.o = by #2
|
|
||||||
z.d
|
|
||||||
|
|
||||||
So diff = z.d.
|
|
||||||
|
|
||||||
If [5] is true now, diff = 0, so z.d = 0 too, and we have the standard-time
|
|
||||||
spelling we wanted in the endcase described above. We're done. Contrarily,
|
|
||||||
if z.d = 0, then we have a UTC equivalent, and are also done.
|
|
||||||
|
|
||||||
If [5] is not true now, diff = z.d != 0, and z.d is the offset we need to
|
|
||||||
add to z (in effect, z is in tz's standard time, and we need to shift the
|
|
||||||
local clock into tz's daylight time).
|
|
||||||
|
|
||||||
Let
|
|
||||||
|
|
||||||
z' = z + z.d = z + diff [7]
|
|
||||||
|
|
||||||
and we can again ask whether
|
|
||||||
|
|
||||||
z'.n - z'.o = x.n [8]
|
|
||||||
|
|
||||||
If so, we're done. If not, the tzinfo class is insane, according to the
|
|
||||||
assumptions we've made. This also requires a bit of proof. As before, let's
|
|
||||||
compute the difference between the LHS and RHS of [8] (and skipping some of
|
|
||||||
the justifications for the kinds of substitutions we've done several times
|
|
||||||
already):
|
|
||||||
|
|
||||||
diff' = x.n - (z'.n - z'.o) = replacing z'.n via [7]
|
|
||||||
x.n - (z.n + diff - z'.o) = replacing diff via [6]
|
|
||||||
x.n - (z.n + x.n - (z.n - z.o) - z'.o) =
|
|
||||||
x.n - z.n - x.n + z.n - z.o + z'.o = cancel x.n
|
|
||||||
- z.n + z.n - z.o + z'.o = cancel z.n
|
|
||||||
- z.o + z'.o = #1 twice
|
|
||||||
-z.s - z.d + z'.s + z'.d = z and z' have same tzinfo
|
|
||||||
z'.d - z.d
|
|
||||||
|
|
||||||
So z' is UTC-equivalent to x iff z'.d = z.d at this point. If they are equal,
|
|
||||||
we've found the UTC-equivalent so are done. In fact, we stop with [7] and
|
|
||||||
return z', not bothering to compute z'.d.
|
|
||||||
|
|
||||||
How could z.d and z'd differ? z' = z + z.d [7], so merely moving z' by
|
|
||||||
a dst() offset, and starting *from* a time already in DST (we know z.d != 0),
|
|
||||||
would have to change the result dst() returns: we start in DST, and moving
|
|
||||||
a little further into it takes us out of DST.
|
|
||||||
|
|
||||||
There isn't a sane case where this can happen. The closest it gets is at
|
|
||||||
the end of DST, where there's an hour in UTC with no spelling in a hybrid
|
|
||||||
tzinfo class. In US Eastern, that's 5:MM UTC = 0:MM EST = 1:MM EDT. During
|
|
||||||
that hour, on an Eastern clock 1:MM is taken as being in standard time (6:MM
|
|
||||||
UTC) because the docs insist on that, but 0:MM is taken as being in daylight
|
|
||||||
time (4:MM UTC). There is no local time mapping to 5:MM UTC. The local
|
|
||||||
clock jumps from 1:59 back to 1:00 again, and repeats the 1:MM hour in
|
|
||||||
standard time. Since that's what the local clock *does*, we want to map both
|
|
||||||
UTC hours 5:MM and 6:MM to 1:MM Eastern. The result is ambiguous
|
|
||||||
in local time, but so it goes -- it's the way the local clock works.
|
|
||||||
|
|
||||||
When x = 5:MM UTC is the input to this algorithm, x.o=0, y.o=-5 and y.d=0,
|
|
||||||
so z=0:MM. z.d=60 (minutes) then, so [5] doesn't hold and we keep going.
|
|
||||||
z' = z + z.d = 1:MM then, and z'.d=0, and z'.d - z.d = -60 != 0 so [8]
|
|
||||||
(correctly) concludes that z' is not UTC-equivalent to x.
|
|
||||||
|
|
||||||
Because we know z.d said z was in daylight time (else [5] would have held and
|
|
||||||
we would have stopped then), and we know z.d != z'.d (else [8] would have held
|
|
||||||
and we have stopped then), and there are only 2 possible values dst() can
|
|
||||||
return in Eastern, it follows that z'.d must be 0 (which it is in the example,
|
|
||||||
but the reasoning doesn't depend on the example -- it depends on there being
|
|
||||||
two possible dst() outcomes, one zero and the other non-zero). Therefore
|
|
||||||
z' must be in standard time, and is the spelling we want in this case.
|
|
||||||
|
|
||||||
Note again that z' is not UTC-equivalent as far as the hybrid tzinfo class is
|
|
||||||
concerned (because it takes z' as being in standard time rather than the
|
|
||||||
daylight time we intend here), but returning it gives the real-life "local
|
|
||||||
clock repeats an hour" behavior when mapping the "unspellable" UTC hour into
|
|
||||||
tz.
|
|
||||||
|
|
||||||
When the input is 6:MM, z=1:MM and z.d=0, and we stop at once, again with
|
|
||||||
the 1:MM standard time spelling we want.
|
|
||||||
|
|
||||||
So how can this break? One of the assumptions must be violated. Two
|
|
||||||
possibilities:
|
|
||||||
|
|
||||||
1) [2] effectively says that y.s is invariant across all y belong to a given
|
|
||||||
time zone. This isn't true if, for political reasons or continental drift,
|
|
||||||
a region decides to change its base offset from UTC.
|
|
||||||
|
|
||||||
2) There may be versions of "double daylight" time where the tail end of
|
|
||||||
the analysis gives up a step too early. I haven't thought about that
|
|
||||||
enough to say.
|
|
||||||
|
|
||||||
In any case, it's clear that the default fromutc() is strong enough to handle
|
|
||||||
"almost all" time zones: so long as the standard offset is invariant, it
|
|
||||||
doesn't matter if daylight time transition points change from year to year, or
|
|
||||||
if daylight time is skipped in some years; it doesn't matter how large or
|
|
||||||
small dst() may get within its bounds; and it doesn't even matter if some
|
|
||||||
perverse time zone returns a negative dst()). So a breaking case must be
|
|
||||||
pretty bizarre, and a tzinfo subclass can override fromutc() if it is.
|
|
||||||
"""
|
|
||||||
try:
|
try:
|
||||||
from _datetime import *
|
from _datetime import *
|
||||||
except ImportError:
|
except ImportError:
|
||||||
|
|
|
@ -1317,24 +1317,22 @@ RouteComponentMarker = ValueTerminal('@', 'route-component-marker')
|
||||||
# Parser
|
# Parser
|
||||||
#
|
#
|
||||||
|
|
||||||
"""Parse strings according to RFC822/2047/2822/5322 rules.
|
# Parse strings according to RFC822/2047/2822/5322 rules.
|
||||||
|
#
|
||||||
This is a stateless parser. Each get_XXX function accepts a string and
|
# This is a stateless parser. Each get_XXX function accepts a string and
|
||||||
returns either a Terminal or a TokenList representing the RFC object named
|
# returns either a Terminal or a TokenList representing the RFC object named
|
||||||
by the method and a string containing the remaining unparsed characters
|
# by the method and a string containing the remaining unparsed characters
|
||||||
from the input. Thus a parser method consumes the next syntactic construct
|
# from the input. Thus a parser method consumes the next syntactic construct
|
||||||
of a given type and returns a token representing the construct plus the
|
# of a given type and returns a token representing the construct plus the
|
||||||
unparsed remainder of the input string.
|
# unparsed remainder of the input string.
|
||||||
|
#
|
||||||
For example, if the first element of a structured header is a 'phrase',
|
# For example, if the first element of a structured header is a 'phrase',
|
||||||
then:
|
# then:
|
||||||
|
#
|
||||||
phrase, value = get_phrase(value)
|
# phrase, value = get_phrase(value)
|
||||||
|
#
|
||||||
returns the complete phrase from the start of the string value, plus any
|
# returns the complete phrase from the start of the string value, plus any
|
||||||
characters left in the string after the phrase is removed.
|
# characters left in the string after the phrase is removed.
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
_wsp_splitter = re.compile(r'([{}]+)'.format(''.join(WSP))).split
|
_wsp_splitter = re.compile(r'([{}]+)'.format(''.join(WSP))).split
|
||||||
_non_atom_end_matcher = re.compile(r"[^{}]+".format(
|
_non_atom_end_matcher = re.compile(r"[^{}]+".format(
|
||||||
|
|
|
@ -299,101 +299,100 @@ def _call_with_frames_removed(f, *args, **kwds):
|
||||||
|
|
||||||
# Finder/loader utility code ###############################################
|
# Finder/loader utility code ###############################################
|
||||||
|
|
||||||
"""Magic word to reject .pyc files generated by other Python versions.
|
# Magic word to reject .pyc files generated by other Python versions.
|
||||||
It should change for each incompatible change to the bytecode.
|
# It should change for each incompatible change to the bytecode.
|
||||||
|
#
|
||||||
|
# The value of CR and LF is incorporated so if you ever read or write
|
||||||
|
# a .pyc file in text mode the magic number will be wrong; also, the
|
||||||
|
# Apple MPW compiler swaps their values, botching string constants.
|
||||||
|
#
|
||||||
|
# The magic numbers must be spaced apart at least 2 values, as the
|
||||||
|
# -U interpeter flag will cause MAGIC+1 being used. They have been
|
||||||
|
# odd numbers for some time now.
|
||||||
|
#
|
||||||
|
# There were a variety of old schemes for setting the magic number.
|
||||||
|
# The current working scheme is to increment the previous value by
|
||||||
|
# 10.
|
||||||
|
#
|
||||||
|
# Starting with the adoption of PEP 3147 in Python 3.2, every bump in magic
|
||||||
|
# number also includes a new "magic tag", i.e. a human readable string used
|
||||||
|
# to represent the magic number in __pycache__ directories. When you change
|
||||||
|
# the magic number, you must also set a new unique magic tag. Generally this
|
||||||
|
# can be named after the Python major version of the magic number bump, but
|
||||||
|
# it can really be anything, as long as it's different than anything else
|
||||||
|
# that's come before. The tags are included in the following table, starting
|
||||||
|
# with Python 3.2a0.
|
||||||
|
#
|
||||||
|
# Known values:
|
||||||
|
# Python 1.5: 20121
|
||||||
|
# Python 1.5.1: 20121
|
||||||
|
# Python 1.5.2: 20121
|
||||||
|
# Python 1.6: 50428
|
||||||
|
# Python 2.0: 50823
|
||||||
|
# Python 2.0.1: 50823
|
||||||
|
# Python 2.1: 60202
|
||||||
|
# Python 2.1.1: 60202
|
||||||
|
# Python 2.1.2: 60202
|
||||||
|
# Python 2.2: 60717
|
||||||
|
# Python 2.3a0: 62011
|
||||||
|
# Python 2.3a0: 62021
|
||||||
|
# Python 2.3a0: 62011 (!)
|
||||||
|
# Python 2.4a0: 62041
|
||||||
|
# Python 2.4a3: 62051
|
||||||
|
# Python 2.4b1: 62061
|
||||||
|
# Python 2.5a0: 62071
|
||||||
|
# Python 2.5a0: 62081 (ast-branch)
|
||||||
|
# Python 2.5a0: 62091 (with)
|
||||||
|
# Python 2.5a0: 62092 (changed WITH_CLEANUP opcode)
|
||||||
|
# Python 2.5b3: 62101 (fix wrong code: for x, in ...)
|
||||||
|
# Python 2.5b3: 62111 (fix wrong code: x += yield)
|
||||||
|
# Python 2.5c1: 62121 (fix wrong lnotab with for loops and
|
||||||
|
# storing constants that should have been removed)
|
||||||
|
# Python 2.5c2: 62131 (fix wrong code: for x, in ... in listcomp/genexp)
|
||||||
|
# Python 2.6a0: 62151 (peephole optimizations and STORE_MAP opcode)
|
||||||
|
# Python 2.6a1: 62161 (WITH_CLEANUP optimization)
|
||||||
|
# Python 2.7a0: 62171 (optimize list comprehensions/change LIST_APPEND)
|
||||||
|
# Python 2.7a0: 62181 (optimize conditional branches:
|
||||||
|
# introduce POP_JUMP_IF_FALSE and POP_JUMP_IF_TRUE)
|
||||||
|
# Python 2.7a0 62191 (introduce SETUP_WITH)
|
||||||
|
# Python 2.7a0 62201 (introduce BUILD_SET)
|
||||||
|
# Python 2.7a0 62211 (introduce MAP_ADD and SET_ADD)
|
||||||
|
# Python 3000: 3000
|
||||||
|
# 3010 (removed UNARY_CONVERT)
|
||||||
|
# 3020 (added BUILD_SET)
|
||||||
|
# 3030 (added keyword-only parameters)
|
||||||
|
# 3040 (added signature annotations)
|
||||||
|
# 3050 (print becomes a function)
|
||||||
|
# 3060 (PEP 3115 metaclass syntax)
|
||||||
|
# 3061 (string literals become unicode)
|
||||||
|
# 3071 (PEP 3109 raise changes)
|
||||||
|
# 3081 (PEP 3137 make __file__ and __name__ unicode)
|
||||||
|
# 3091 (kill str8 interning)
|
||||||
|
# 3101 (merge from 2.6a0, see 62151)
|
||||||
|
# 3103 (__file__ points to source file)
|
||||||
|
# Python 3.0a4: 3111 (WITH_CLEANUP optimization).
|
||||||
|
# Python 3.0a5: 3131 (lexical exception stacking, including POP_EXCEPT)
|
||||||
|
# Python 3.1a0: 3141 (optimize list, set and dict comprehensions:
|
||||||
|
# change LIST_APPEND and SET_ADD, add MAP_ADD)
|
||||||
|
# Python 3.1a0: 3151 (optimize conditional branches:
|
||||||
|
# introduce POP_JUMP_IF_FALSE and POP_JUMP_IF_TRUE)
|
||||||
|
# Python 3.2a0: 3160 (add SETUP_WITH)
|
||||||
|
# tag: cpython-32
|
||||||
|
# Python 3.2a1: 3170 (add DUP_TOP_TWO, remove DUP_TOPX and ROT_FOUR)
|
||||||
|
# tag: cpython-32
|
||||||
|
# Python 3.2a2 3180 (add DELETE_DEREF)
|
||||||
|
# Python 3.3a0 3190 __class__ super closure changed
|
||||||
|
# Python 3.3a0 3200 (__qualname__ added)
|
||||||
|
# 3210 (added size modulo 2**32 to the pyc header)
|
||||||
|
# Python 3.3a1 3220 (changed PEP 380 implementation)
|
||||||
|
# Python 3.3a4 3230 (revert changes to implicit __class__ closure)
|
||||||
|
# Python 3.4a1 3250 (evaluate positional default arguments before
|
||||||
|
# keyword-only defaults)
|
||||||
|
#
|
||||||
|
# MAGIC must change whenever the bytecode emitted by the compiler may no
|
||||||
|
# longer be understood by older implementations of the eval loop (usually
|
||||||
|
# due to the addition of new opcodes).
|
||||||
|
|
||||||
The value of CR and LF is incorporated so if you ever read or write
|
|
||||||
a .pyc file in text mode the magic number will be wrong; also, the
|
|
||||||
Apple MPW compiler swaps their values, botching string constants.
|
|
||||||
|
|
||||||
The magic numbers must be spaced apart at least 2 values, as the
|
|
||||||
-U interpeter flag will cause MAGIC+1 being used. They have been
|
|
||||||
odd numbers for some time now.
|
|
||||||
|
|
||||||
There were a variety of old schemes for setting the magic number.
|
|
||||||
The current working scheme is to increment the previous value by
|
|
||||||
10.
|
|
||||||
|
|
||||||
Starting with the adoption of PEP 3147 in Python 3.2, every bump in magic
|
|
||||||
number also includes a new "magic tag", i.e. a human readable string used
|
|
||||||
to represent the magic number in __pycache__ directories. When you change
|
|
||||||
the magic number, you must also set a new unique magic tag. Generally this
|
|
||||||
can be named after the Python major version of the magic number bump, but
|
|
||||||
it can really be anything, as long as it's different than anything else
|
|
||||||
that's come before. The tags are included in the following table, starting
|
|
||||||
with Python 3.2a0.
|
|
||||||
|
|
||||||
Known values:
|
|
||||||
Python 1.5: 20121
|
|
||||||
Python 1.5.1: 20121
|
|
||||||
Python 1.5.2: 20121
|
|
||||||
Python 1.6: 50428
|
|
||||||
Python 2.0: 50823
|
|
||||||
Python 2.0.1: 50823
|
|
||||||
Python 2.1: 60202
|
|
||||||
Python 2.1.1: 60202
|
|
||||||
Python 2.1.2: 60202
|
|
||||||
Python 2.2: 60717
|
|
||||||
Python 2.3a0: 62011
|
|
||||||
Python 2.3a0: 62021
|
|
||||||
Python 2.3a0: 62011 (!)
|
|
||||||
Python 2.4a0: 62041
|
|
||||||
Python 2.4a3: 62051
|
|
||||||
Python 2.4b1: 62061
|
|
||||||
Python 2.5a0: 62071
|
|
||||||
Python 2.5a0: 62081 (ast-branch)
|
|
||||||
Python 2.5a0: 62091 (with)
|
|
||||||
Python 2.5a0: 62092 (changed WITH_CLEANUP opcode)
|
|
||||||
Python 2.5b3: 62101 (fix wrong code: for x, in ...)
|
|
||||||
Python 2.5b3: 62111 (fix wrong code: x += yield)
|
|
||||||
Python 2.5c1: 62121 (fix wrong lnotab with for loops and
|
|
||||||
storing constants that should have been removed)
|
|
||||||
Python 2.5c2: 62131 (fix wrong code: for x, in ... in listcomp/genexp)
|
|
||||||
Python 2.6a0: 62151 (peephole optimizations and STORE_MAP opcode)
|
|
||||||
Python 2.6a1: 62161 (WITH_CLEANUP optimization)
|
|
||||||
Python 2.7a0: 62171 (optimize list comprehensions/change LIST_APPEND)
|
|
||||||
Python 2.7a0: 62181 (optimize conditional branches:
|
|
||||||
introduce POP_JUMP_IF_FALSE and POP_JUMP_IF_TRUE)
|
|
||||||
Python 2.7a0 62191 (introduce SETUP_WITH)
|
|
||||||
Python 2.7a0 62201 (introduce BUILD_SET)
|
|
||||||
Python 2.7a0 62211 (introduce MAP_ADD and SET_ADD)
|
|
||||||
Python 3000: 3000
|
|
||||||
3010 (removed UNARY_CONVERT)
|
|
||||||
3020 (added BUILD_SET)
|
|
||||||
3030 (added keyword-only parameters)
|
|
||||||
3040 (added signature annotations)
|
|
||||||
3050 (print becomes a function)
|
|
||||||
3060 (PEP 3115 metaclass syntax)
|
|
||||||
3061 (string literals become unicode)
|
|
||||||
3071 (PEP 3109 raise changes)
|
|
||||||
3081 (PEP 3137 make __file__ and __name__ unicode)
|
|
||||||
3091 (kill str8 interning)
|
|
||||||
3101 (merge from 2.6a0, see 62151)
|
|
||||||
3103 (__file__ points to source file)
|
|
||||||
Python 3.0a4: 3111 (WITH_CLEANUP optimization).
|
|
||||||
Python 3.0a5: 3131 (lexical exception stacking, including POP_EXCEPT)
|
|
||||||
Python 3.1a0: 3141 (optimize list, set and dict comprehensions:
|
|
||||||
change LIST_APPEND and SET_ADD, add MAP_ADD)
|
|
||||||
Python 3.1a0: 3151 (optimize conditional branches:
|
|
||||||
introduce POP_JUMP_IF_FALSE and POP_JUMP_IF_TRUE)
|
|
||||||
Python 3.2a0: 3160 (add SETUP_WITH)
|
|
||||||
tag: cpython-32
|
|
||||||
Python 3.2a1: 3170 (add DUP_TOP_TWO, remove DUP_TOPX and ROT_FOUR)
|
|
||||||
tag: cpython-32
|
|
||||||
Python 3.2a2 3180 (add DELETE_DEREF)
|
|
||||||
Python 3.3a0 3190 __class__ super closure changed
|
|
||||||
Python 3.3a0 3200 (__qualname__ added)
|
|
||||||
3210 (added size modulo 2**32 to the pyc header)
|
|
||||||
Python 3.3a1 3220 (changed PEP 380 implementation)
|
|
||||||
Python 3.3a4 3230 (revert changes to implicit __class__ closure)
|
|
||||||
Python 3.4a1 3250 (evaluate positional default arguments before
|
|
||||||
keyword-only defaults)
|
|
||||||
|
|
||||||
MAGIC must change whenever the bytecode emitted by the compiler may no
|
|
||||||
longer be understood by older implementations of the eval loop (usually
|
|
||||||
due to the addition of new opcodes).
|
|
||||||
|
|
||||||
"""
|
|
||||||
_MAGIC_BYTES = (3250).to_bytes(2, 'little') + b'\r\n'
|
_MAGIC_BYTES = (3250).to_bytes(2, 'little') + b'\r\n'
|
||||||
_RAW_MAGIC_NUMBER = int.from_bytes(_MAGIC_BYTES, 'little')
|
_RAW_MAGIC_NUMBER = int.from_bytes(_MAGIC_BYTES, 'little')
|
||||||
|
|
||||||
|
|
|
@ -825,10 +825,9 @@ class SysLogHandler(logging.Handler):
|
||||||
msg = self.ident + msg
|
msg = self.ident + msg
|
||||||
if self.append_nul:
|
if self.append_nul:
|
||||||
msg += '\000'
|
msg += '\000'
|
||||||
"""
|
|
||||||
We need to convert record level to lowercase, maybe this will
|
# We need to convert record level to lowercase, maybe this will
|
||||||
change in the future.
|
# change in the future.
|
||||||
"""
|
|
||||||
prio = '<%d>' % self.encodePriority(self.facility,
|
prio = '<%d>' % self.encodePriority(self.facility,
|
||||||
self.mapPriority(record.levelname))
|
self.mapPriority(record.levelname))
|
||||||
prio = prio.encode('utf-8')
|
prio = prio.encode('utf-8')
|
||||||
|
|
|
@ -33,119 +33,118 @@ bytes_types = pickle.bytes_types
|
||||||
# by a later GET.
|
# by a later GET.
|
||||||
|
|
||||||
|
|
||||||
"""
|
# "A pickle" is a program for a virtual pickle machine (PM, but more accurately
|
||||||
"A pickle" is a program for a virtual pickle machine (PM, but more accurately
|
# called an unpickling machine). It's a sequence of opcodes, interpreted by the
|
||||||
called an unpickling machine). It's a sequence of opcodes, interpreted by the
|
# PM, building an arbitrarily complex Python object.
|
||||||
PM, building an arbitrarily complex Python object.
|
#
|
||||||
|
# For the most part, the PM is very simple: there are no looping, testing, or
|
||||||
|
# conditional instructions, no arithmetic and no function calls. Opcodes are
|
||||||
|
# executed once each, from first to last, until a STOP opcode is reached.
|
||||||
|
#
|
||||||
|
# The PM has two data areas, "the stack" and "the memo".
|
||||||
|
#
|
||||||
|
# Many opcodes push Python objects onto the stack; e.g., INT pushes a Python
|
||||||
|
# integer object on the stack, whose value is gotten from a decimal string
|
||||||
|
# literal immediately following the INT opcode in the pickle bytestream. Other
|
||||||
|
# opcodes take Python objects off the stack. The result of unpickling is
|
||||||
|
# whatever object is left on the stack when the final STOP opcode is executed.
|
||||||
|
#
|
||||||
|
# The memo is simply an array of objects, or it can be implemented as a dict
|
||||||
|
# mapping little integers to objects. The memo serves as the PM's "long term
|
||||||
|
# memory", and the little integers indexing the memo are akin to variable
|
||||||
|
# names. Some opcodes pop a stack object into the memo at a given index,
|
||||||
|
# and others push a memo object at a given index onto the stack again.
|
||||||
|
#
|
||||||
|
# At heart, that's all the PM has. Subtleties arise for these reasons:
|
||||||
|
#
|
||||||
|
# + Object identity. Objects can be arbitrarily complex, and subobjects
|
||||||
|
# may be shared (for example, the list [a, a] refers to the same object a
|
||||||
|
# twice). It can be vital that unpickling recreate an isomorphic object
|
||||||
|
# graph, faithfully reproducing sharing.
|
||||||
|
#
|
||||||
|
# + Recursive objects. For example, after "L = []; L.append(L)", L is a
|
||||||
|
# list, and L[0] is the same list. This is related to the object identity
|
||||||
|
# point, and some sequences of pickle opcodes are subtle in order to
|
||||||
|
# get the right result in all cases.
|
||||||
|
#
|
||||||
|
# + Things pickle doesn't know everything about. Examples of things pickle
|
||||||
|
# does know everything about are Python's builtin scalar and container
|
||||||
|
# types, like ints and tuples. They generally have opcodes dedicated to
|
||||||
|
# them. For things like module references and instances of user-defined
|
||||||
|
# classes, pickle's knowledge is limited. Historically, many enhancements
|
||||||
|
# have been made to the pickle protocol in order to do a better (faster,
|
||||||
|
# and/or more compact) job on those.
|
||||||
|
#
|
||||||
|
# + Backward compatibility and micro-optimization. As explained below,
|
||||||
|
# pickle opcodes never go away, not even when better ways to do a thing
|
||||||
|
# get invented. The repertoire of the PM just keeps growing over time.
|
||||||
|
# For example, protocol 0 had two opcodes for building Python integers (INT
|
||||||
|
# and LONG), protocol 1 added three more for more-efficient pickling of short
|
||||||
|
# integers, and protocol 2 added two more for more-efficient pickling of
|
||||||
|
# long integers (before protocol 2, the only ways to pickle a Python long
|
||||||
|
# took time quadratic in the number of digits, for both pickling and
|
||||||
|
# unpickling). "Opcode bloat" isn't so much a subtlety as a source of
|
||||||
|
# wearying complication.
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# Pickle protocols:
|
||||||
|
#
|
||||||
|
# For compatibility, the meaning of a pickle opcode never changes. Instead new
|
||||||
|
# pickle opcodes get added, and each version's unpickler can handle all the
|
||||||
|
# pickle opcodes in all protocol versions to date. So old pickles continue to
|
||||||
|
# be readable forever. The pickler can generally be told to restrict itself to
|
||||||
|
# the subset of opcodes available under previous protocol versions too, so that
|
||||||
|
# users can create pickles under the current version readable by older
|
||||||
|
# versions. However, a pickle does not contain its version number embedded
|
||||||
|
# within it. If an older unpickler tries to read a pickle using a later
|
||||||
|
# protocol, the result is most likely an exception due to seeing an unknown (in
|
||||||
|
# the older unpickler) opcode.
|
||||||
|
#
|
||||||
|
# The original pickle used what's now called "protocol 0", and what was called
|
||||||
|
# "text mode" before Python 2.3. The entire pickle bytestream is made up of
|
||||||
|
# printable 7-bit ASCII characters, plus the newline character, in protocol 0.
|
||||||
|
# That's why it was called text mode. Protocol 0 is small and elegant, but
|
||||||
|
# sometimes painfully inefficient.
|
||||||
|
#
|
||||||
|
# The second major set of additions is now called "protocol 1", and was called
|
||||||
|
# "binary mode" before Python 2.3. This added many opcodes with arguments
|
||||||
|
# consisting of arbitrary bytes, including NUL bytes and unprintable "high bit"
|
||||||
|
# bytes. Binary mode pickles can be substantially smaller than equivalent
|
||||||
|
# text mode pickles, and sometimes faster too; e.g., BININT represents a 4-byte
|
||||||
|
# int as 4 bytes following the opcode, which is cheaper to unpickle than the
|
||||||
|
# (perhaps) 11-character decimal string attached to INT. Protocol 1 also added
|
||||||
|
# a number of opcodes that operate on many stack elements at once (like APPENDS
|
||||||
|
# and SETITEMS), and "shortcut" opcodes (like EMPTY_DICT and EMPTY_TUPLE).
|
||||||
|
#
|
||||||
|
# The third major set of additions came in Python 2.3, and is called "protocol
|
||||||
|
# 2". This added:
|
||||||
|
#
|
||||||
|
# - A better way to pickle instances of new-style classes (NEWOBJ).
|
||||||
|
#
|
||||||
|
# - A way for a pickle to identify its protocol (PROTO).
|
||||||
|
#
|
||||||
|
# - Time- and space- efficient pickling of long ints (LONG{1,4}).
|
||||||
|
#
|
||||||
|
# - Shortcuts for small tuples (TUPLE{1,2,3}}.
|
||||||
|
#
|
||||||
|
# - Dedicated opcodes for bools (NEWTRUE, NEWFALSE).
|
||||||
|
#
|
||||||
|
# - The "extension registry", a vector of popular objects that can be pushed
|
||||||
|
# efficiently by index (EXT{1,2,4}). This is akin to the memo and GET, but
|
||||||
|
# the registry contents are predefined (there's nothing akin to the memo's
|
||||||
|
# PUT).
|
||||||
|
#
|
||||||
|
# Another independent change with Python 2.3 is the abandonment of any
|
||||||
|
# pretense that it might be safe to load pickles received from untrusted
|
||||||
|
# parties -- no sufficient security analysis has been done to guarantee
|
||||||
|
# this and there isn't a use case that warrants the expense of such an
|
||||||
|
# analysis.
|
||||||
|
#
|
||||||
|
# To this end, all tests for __safe_for_unpickling__ or for
|
||||||
|
# copyreg.safe_constructors are removed from the unpickling code.
|
||||||
|
# References to these variables in the descriptions below are to be seen
|
||||||
|
# as describing unpickling in Python 2.2 and before.
|
||||||
|
|
||||||
For the most part, the PM is very simple: there are no looping, testing, or
|
|
||||||
conditional instructions, no arithmetic and no function calls. Opcodes are
|
|
||||||
executed once each, from first to last, until a STOP opcode is reached.
|
|
||||||
|
|
||||||
The PM has two data areas, "the stack" and "the memo".
|
|
||||||
|
|
||||||
Many opcodes push Python objects onto the stack; e.g., INT pushes a Python
|
|
||||||
integer object on the stack, whose value is gotten from a decimal string
|
|
||||||
literal immediately following the INT opcode in the pickle bytestream. Other
|
|
||||||
opcodes take Python objects off the stack. The result of unpickling is
|
|
||||||
whatever object is left on the stack when the final STOP opcode is executed.
|
|
||||||
|
|
||||||
The memo is simply an array of objects, or it can be implemented as a dict
|
|
||||||
mapping little integers to objects. The memo serves as the PM's "long term
|
|
||||||
memory", and the little integers indexing the memo are akin to variable
|
|
||||||
names. Some opcodes pop a stack object into the memo at a given index,
|
|
||||||
and others push a memo object at a given index onto the stack again.
|
|
||||||
|
|
||||||
At heart, that's all the PM has. Subtleties arise for these reasons:
|
|
||||||
|
|
||||||
+ Object identity. Objects can be arbitrarily complex, and subobjects
|
|
||||||
may be shared (for example, the list [a, a] refers to the same object a
|
|
||||||
twice). It can be vital that unpickling recreate an isomorphic object
|
|
||||||
graph, faithfully reproducing sharing.
|
|
||||||
|
|
||||||
+ Recursive objects. For example, after "L = []; L.append(L)", L is a
|
|
||||||
list, and L[0] is the same list. This is related to the object identity
|
|
||||||
point, and some sequences of pickle opcodes are subtle in order to
|
|
||||||
get the right result in all cases.
|
|
||||||
|
|
||||||
+ Things pickle doesn't know everything about. Examples of things pickle
|
|
||||||
does know everything about are Python's builtin scalar and container
|
|
||||||
types, like ints and tuples. They generally have opcodes dedicated to
|
|
||||||
them. For things like module references and instances of user-defined
|
|
||||||
classes, pickle's knowledge is limited. Historically, many enhancements
|
|
||||||
have been made to the pickle protocol in order to do a better (faster,
|
|
||||||
and/or more compact) job on those.
|
|
||||||
|
|
||||||
+ Backward compatibility and micro-optimization. As explained below,
|
|
||||||
pickle opcodes never go away, not even when better ways to do a thing
|
|
||||||
get invented. The repertoire of the PM just keeps growing over time.
|
|
||||||
For example, protocol 0 had two opcodes for building Python integers (INT
|
|
||||||
and LONG), protocol 1 added three more for more-efficient pickling of short
|
|
||||||
integers, and protocol 2 added two more for more-efficient pickling of
|
|
||||||
long integers (before protocol 2, the only ways to pickle a Python long
|
|
||||||
took time quadratic in the number of digits, for both pickling and
|
|
||||||
unpickling). "Opcode bloat" isn't so much a subtlety as a source of
|
|
||||||
wearying complication.
|
|
||||||
|
|
||||||
|
|
||||||
Pickle protocols:
|
|
||||||
|
|
||||||
For compatibility, the meaning of a pickle opcode never changes. Instead new
|
|
||||||
pickle opcodes get added, and each version's unpickler can handle all the
|
|
||||||
pickle opcodes in all protocol versions to date. So old pickles continue to
|
|
||||||
be readable forever. The pickler can generally be told to restrict itself to
|
|
||||||
the subset of opcodes available under previous protocol versions too, so that
|
|
||||||
users can create pickles under the current version readable by older
|
|
||||||
versions. However, a pickle does not contain its version number embedded
|
|
||||||
within it. If an older unpickler tries to read a pickle using a later
|
|
||||||
protocol, the result is most likely an exception due to seeing an unknown (in
|
|
||||||
the older unpickler) opcode.
|
|
||||||
|
|
||||||
The original pickle used what's now called "protocol 0", and what was called
|
|
||||||
"text mode" before Python 2.3. The entire pickle bytestream is made up of
|
|
||||||
printable 7-bit ASCII characters, plus the newline character, in protocol 0.
|
|
||||||
That's why it was called text mode. Protocol 0 is small and elegant, but
|
|
||||||
sometimes painfully inefficient.
|
|
||||||
|
|
||||||
The second major set of additions is now called "protocol 1", and was called
|
|
||||||
"binary mode" before Python 2.3. This added many opcodes with arguments
|
|
||||||
consisting of arbitrary bytes, including NUL bytes and unprintable "high bit"
|
|
||||||
bytes. Binary mode pickles can be substantially smaller than equivalent
|
|
||||||
text mode pickles, and sometimes faster too; e.g., BININT represents a 4-byte
|
|
||||||
int as 4 bytes following the opcode, which is cheaper to unpickle than the
|
|
||||||
(perhaps) 11-character decimal string attached to INT. Protocol 1 also added
|
|
||||||
a number of opcodes that operate on many stack elements at once (like APPENDS
|
|
||||||
and SETITEMS), and "shortcut" opcodes (like EMPTY_DICT and EMPTY_TUPLE).
|
|
||||||
|
|
||||||
The third major set of additions came in Python 2.3, and is called "protocol
|
|
||||||
2". This added:
|
|
||||||
|
|
||||||
- A better way to pickle instances of new-style classes (NEWOBJ).
|
|
||||||
|
|
||||||
- A way for a pickle to identify its protocol (PROTO).
|
|
||||||
|
|
||||||
- Time- and space- efficient pickling of long ints (LONG{1,4}).
|
|
||||||
|
|
||||||
- Shortcuts for small tuples (TUPLE{1,2,3}}.
|
|
||||||
|
|
||||||
- Dedicated opcodes for bools (NEWTRUE, NEWFALSE).
|
|
||||||
|
|
||||||
- The "extension registry", a vector of popular objects that can be pushed
|
|
||||||
efficiently by index (EXT{1,2,4}). This is akin to the memo and GET, but
|
|
||||||
the registry contents are predefined (there's nothing akin to the memo's
|
|
||||||
PUT).
|
|
||||||
|
|
||||||
Another independent change with Python 2.3 is the abandonment of any
|
|
||||||
pretense that it might be safe to load pickles received from untrusted
|
|
||||||
parties -- no sufficient security analysis has been done to guarantee
|
|
||||||
this and there isn't a use case that warrants the expense of such an
|
|
||||||
analysis.
|
|
||||||
|
|
||||||
To this end, all tests for __safe_for_unpickling__ or for
|
|
||||||
copyreg.safe_constructors are removed from the unpickling code.
|
|
||||||
References to these variables in the descriptions below are to be seen
|
|
||||||
as describing unpickling in Python 2.2 and before.
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Meta-rule: Descriptions are stored in instances of descriptor objects,
|
# Meta-rule: Descriptions are stored in instances of descriptor objects,
|
||||||
# with plain constructors. No meta-language is defined from which
|
# with plain constructors. No meta-language is defined from which
|
||||||
|
|
|
@ -2,37 +2,35 @@ import unittest
|
||||||
from test import support
|
from test import support
|
||||||
from _testcapi import getargs_keywords, getargs_keyword_only
|
from _testcapi import getargs_keywords, getargs_keyword_only
|
||||||
|
|
||||||
"""
|
# > How about the following counterproposal. This also changes some of
|
||||||
> How about the following counterproposal. This also changes some of
|
# > the other format codes to be a little more regular.
|
||||||
> the other format codes to be a little more regular.
|
# >
|
||||||
>
|
# > Code C type Range check
|
||||||
> Code C type Range check
|
# >
|
||||||
>
|
# > b unsigned char 0..UCHAR_MAX
|
||||||
> b unsigned char 0..UCHAR_MAX
|
# > h signed short SHRT_MIN..SHRT_MAX
|
||||||
> h signed short SHRT_MIN..SHRT_MAX
|
# > B unsigned char none **
|
||||||
> B unsigned char none **
|
# > H unsigned short none **
|
||||||
> H unsigned short none **
|
# > k * unsigned long none
|
||||||
> k * unsigned long none
|
# > I * unsigned int 0..UINT_MAX
|
||||||
> I * unsigned int 0..UINT_MAX
|
#
|
||||||
|
#
|
||||||
|
# > i int INT_MIN..INT_MAX
|
||||||
> i int INT_MIN..INT_MAX
|
# > l long LONG_MIN..LONG_MAX
|
||||||
> l long LONG_MIN..LONG_MAX
|
#
|
||||||
|
# > K * unsigned long long none
|
||||||
> K * unsigned long long none
|
# > L long long LLONG_MIN..LLONG_MAX
|
||||||
> L long long LLONG_MIN..LLONG_MAX
|
#
|
||||||
|
# > Notes:
|
||||||
> Notes:
|
# >
|
||||||
>
|
# > * New format codes.
|
||||||
> * New format codes.
|
# >
|
||||||
>
|
# > ** Changed from previous "range-and-a-half" to "none"; the
|
||||||
> ** Changed from previous "range-and-a-half" to "none"; the
|
# > range-and-a-half checking wasn't particularly useful.
|
||||||
> range-and-a-half checking wasn't particularly useful.
|
#
|
||||||
|
# Plus a C API or two, e.g. PyInt_AsLongMask() ->
|
||||||
Plus a C API or two, e.g. PyInt_AsLongMask() ->
|
# unsigned long and PyInt_AsLongLongMask() -> unsigned
|
||||||
unsigned long and PyInt_AsLongLongMask() -> unsigned
|
# long long (if that exists).
|
||||||
long long (if that exists).
|
|
||||||
"""
|
|
||||||
|
|
||||||
LARGE = 0x7FFFFFFF
|
LARGE = 0x7FFFFFFF
|
||||||
VERY_LARGE = 0xFF0000121212121212121242
|
VERY_LARGE = 0xFF0000121212121212121242
|
||||||
|
|
|
@ -1337,8 +1337,8 @@ def XMLID(text, parser=None):
|
||||||
ids[id] = elem
|
ids[id] = elem
|
||||||
return tree, ids
|
return tree, ids
|
||||||
|
|
||||||
|
# Parse XML document from string constant. Alias for XML().
|
||||||
fromstring = XML
|
fromstring = XML
|
||||||
"""Parse XML document from string constant. Alias for XML()."""
|
|
||||||
|
|
||||||
def fromstringlist(sequence, parser=None):
|
def fromstringlist(sequence, parser=None):
|
||||||
"""Parse XML document from sequence of string fragments.
|
"""Parse XML document from sequence of string fragments.
|
||||||
|
|
|
@ -52,8 +52,8 @@ verbose = False
|
||||||
recurse = False
|
recurse = False
|
||||||
dryrun = False
|
dryrun = False
|
||||||
makebackup = True
|
makebackup = True
|
||||||
|
# A specified newline to be used in the output (set by --newline option)
|
||||||
spec_newline = None
|
spec_newline = None
|
||||||
"""A specified newline to be used in the output (set by --newline option)"""
|
|
||||||
|
|
||||||
|
|
||||||
def usage(msg=None):
|
def usage(msg=None):
|
||||||
|
|
Loading…
Reference in New Issue