Issue #17516: use comment syntax for comments, instead of multiline string

This commit is contained in:
Victor Stinner 2013-03-26 01:11:54 +01:00
parent 1f8898a591
commit 765531d2d0
11 changed files with 493 additions and 507 deletions

View File

@ -34,17 +34,15 @@ from _ctypes import FUNCFLAG_CDECL as _FUNCFLAG_CDECL, \
FUNCFLAG_USE_ERRNO as _FUNCFLAG_USE_ERRNO, \ FUNCFLAG_USE_ERRNO as _FUNCFLAG_USE_ERRNO, \
FUNCFLAG_USE_LASTERROR as _FUNCFLAG_USE_LASTERROR FUNCFLAG_USE_LASTERROR as _FUNCFLAG_USE_LASTERROR
""" # WINOLEAPI -> HRESULT
WINOLEAPI -> HRESULT # WINOLEAPI_(type)
WINOLEAPI_(type) #
# STDMETHODCALLTYPE
STDMETHODCALLTYPE #
# STDMETHOD(name)
STDMETHOD(name) # STDMETHOD_(type, name)
STDMETHOD_(type, name) #
# STDAPICALLTYPE
STDAPICALLTYPE
"""
def create_string_buffer(init, size=None): def create_string_buffer(init, size=None):
"""create_string_buffer(aBytes) -> character array """create_string_buffer(aBytes) -> character array

View File

@ -5,17 +5,14 @@ from sys import getrefcount as grc
# XXX This test must be reviewed for correctness!!! # XXX This test must be reviewed for correctness!!!
""" # ctypes' types are container types.
ctypes' types are container types. #
# They have an internal memory block, which only consists of some bytes,
They have an internal memory block, which only consists of some bytes, # but it has to keep references to other objects as well. This is not
but it has to keep references to other objects as well. This is not # really needed for trivial C types like int or char, but it is important
really needed for trivial C types like int or char, but it is important # for aggregate types like strings or pointers in particular.
for aggregate types like strings or pointers in particular. #
# What about pointers?
What about pointers?
"""
class ObjectsTestCase(unittest.TestCase): class ObjectsTestCase(unittest.TestCase):
def assertSame(self, a, b): def assertSame(self, a, b):

View File

@ -3,35 +3,33 @@ import sys
import unittest import unittest
# Bob Ippolito: # Bob Ippolito:
""" #
Ok.. the code to find the filename for __getattr__ should look # Ok.. the code to find the filename for __getattr__ should look
something like: # something like:
#
import os # import os
from macholib.dyld import dyld_find # from macholib.dyld import dyld_find
#
def find_lib(name): # def find_lib(name):
possible = ['lib'+name+'.dylib', name+'.dylib', # possible = ['lib'+name+'.dylib', name+'.dylib',
name+'.framework/'+name] # name+'.framework/'+name]
for dylib in possible: # for dylib in possible:
try: # try:
return os.path.realpath(dyld_find(dylib)) # return os.path.realpath(dyld_find(dylib))
except ValueError: # except ValueError:
pass # pass
raise ValueError, "%s not found" % (name,) # raise ValueError, "%s not found" % (name,)
#
It'll have output like this: # It'll have output like this:
#
>>> find_lib('pthread') # >>> find_lib('pthread')
'/usr/lib/libSystem.B.dylib' # '/usr/lib/libSystem.B.dylib'
>>> find_lib('z') # >>> find_lib('z')
'/usr/lib/libz.1.dylib' # '/usr/lib/libz.1.dylib'
>>> find_lib('IOKit') # >>> find_lib('IOKit')
'/System/Library/Frameworks/IOKit.framework/Versions/A/IOKit' # '/System/Library/Frameworks/IOKit.framework/Versions/A/IOKit'
#
-bob # -bob
"""
from ctypes.macholib.dyld import dyld_find from ctypes.macholib.dyld import dyld_find

View File

@ -1929,203 +1929,203 @@ timezone.utc = timezone._create(timedelta(0))
timezone.min = timezone._create(timezone._minoffset) timezone.min = timezone._create(timezone._minoffset)
timezone.max = timezone._create(timezone._maxoffset) timezone.max = timezone._create(timezone._maxoffset)
_EPOCH = datetime(1970, 1, 1, tzinfo=timezone.utc) _EPOCH = datetime(1970, 1, 1, tzinfo=timezone.utc)
"""
Some time zone algebra. For a datetime x, let
x.n = x stripped of its timezone -- its naive time.
x.o = x.utcoffset(), and assuming that doesn't raise an exception or
return None
x.d = x.dst(), and assuming that doesn't raise an exception or
return None
x.s = x's standard offset, x.o - x.d
Now some derived rules, where k is a duration (timedelta). # Some time zone algebra. For a datetime x, let
# x.n = x stripped of its timezone -- its naive time.
# x.o = x.utcoffset(), and assuming that doesn't raise an exception or
# return None
# x.d = x.dst(), and assuming that doesn't raise an exception or
# return None
# x.s = x's standard offset, x.o - x.d
#
# Now some derived rules, where k is a duration (timedelta).
#
# 1. x.o = x.s + x.d
# This follows from the definition of x.s.
#
# 2. If x and y have the same tzinfo member, x.s = y.s.
# This is actually a requirement, an assumption we need to make about
# sane tzinfo classes.
#
# 3. The naive UTC time corresponding to x is x.n - x.o.
# This is again a requirement for a sane tzinfo class.
#
# 4. (x+k).s = x.s
# This follows from #2, and that datimetimetz+timedelta preserves tzinfo.
#
# 5. (x+k).n = x.n + k
# Again follows from how arithmetic is defined.
#
# Now we can explain tz.fromutc(x). Let's assume it's an interesting case
# (meaning that the various tzinfo methods exist, and don't blow up or return
# None when called).
#
# The function wants to return a datetime y with timezone tz, equivalent to x.
# x is already in UTC.
#
# By #3, we want
#
# y.n - y.o = x.n [1]
#
# The algorithm starts by attaching tz to x.n, and calling that y. So
# x.n = y.n at the start. Then it wants to add a duration k to y, so that [1]
# becomes true; in effect, we want to solve [2] for k:
#
# (y+k).n - (y+k).o = x.n [2]
#
# By #1, this is the same as
#
# (y+k).n - ((y+k).s + (y+k).d) = x.n [3]
#
# By #5, (y+k).n = y.n + k, which equals x.n + k because x.n=y.n at the start.
# Substituting that into [3],
#
# x.n + k - (y+k).s - (y+k).d = x.n; the x.n terms cancel, leaving
# k - (y+k).s - (y+k).d = 0; rearranging,
# k = (y+k).s - (y+k).d; by #4, (y+k).s == y.s, so
# k = y.s - (y+k).d
#
# On the RHS, (y+k).d can't be computed directly, but y.s can be, and we
# approximate k by ignoring the (y+k).d term at first. Note that k can't be
# very large, since all offset-returning methods return a duration of magnitude
# less than 24 hours. For that reason, if y is firmly in std time, (y+k).d must
# be 0, so ignoring it has no consequence then.
#
# In any case, the new value is
#
# z = y + y.s [4]
#
# It's helpful to step back at look at [4] from a higher level: it's simply
# mapping from UTC to tz's standard time.
#
# At this point, if
#
# z.n - z.o = x.n [5]
#
# we have an equivalent time, and are almost done. The insecurity here is
# at the start of daylight time. Picture US Eastern for concreteness. The wall
# time jumps from 1:59 to 3:00, and wall hours of the form 2:MM don't make good
# sense then. The docs ask that an Eastern tzinfo class consider such a time to
# be EDT (because it's "after 2"), which is a redundant spelling of 1:MM EST
# on the day DST starts. We want to return the 1:MM EST spelling because that's
# the only spelling that makes sense on the local wall clock.
#
# In fact, if [5] holds at this point, we do have the standard-time spelling,
# but that takes a bit of proof. We first prove a stronger result. What's the
# difference between the LHS and RHS of [5]? Let
#
# diff = x.n - (z.n - z.o) [6]
#
# Now
# z.n = by [4]
# (y + y.s).n = by #5
# y.n + y.s = since y.n = x.n
# x.n + y.s = since z and y are have the same tzinfo member,
# y.s = z.s by #2
# x.n + z.s
#
# Plugging that back into [6] gives
#
# diff =
# x.n - ((x.n + z.s) - z.o) = expanding
# x.n - x.n - z.s + z.o = cancelling
# - z.s + z.o = by #2
# z.d
#
# So diff = z.d.
#
# If [5] is true now, diff = 0, so z.d = 0 too, and we have the standard-time
# spelling we wanted in the endcase described above. We're done. Contrarily,
# if z.d = 0, then we have a UTC equivalent, and are also done.
#
# If [5] is not true now, diff = z.d != 0, and z.d is the offset we need to
# add to z (in effect, z is in tz's standard time, and we need to shift the
# local clock into tz's daylight time).
#
# Let
#
# z' = z + z.d = z + diff [7]
#
# and we can again ask whether
#
# z'.n - z'.o = x.n [8]
#
# If so, we're done. If not, the tzinfo class is insane, according to the
# assumptions we've made. This also requires a bit of proof. As before, let's
# compute the difference between the LHS and RHS of [8] (and skipping some of
# the justifications for the kinds of substitutions we've done several times
# already):
#
# diff' = x.n - (z'.n - z'.o) = replacing z'.n via [7]
# x.n - (z.n + diff - z'.o) = replacing diff via [6]
# x.n - (z.n + x.n - (z.n - z.o) - z'.o) =
# x.n - z.n - x.n + z.n - z.o + z'.o = cancel x.n
# - z.n + z.n - z.o + z'.o = cancel z.n
# - z.o + z'.o = #1 twice
# -z.s - z.d + z'.s + z'.d = z and z' have same tzinfo
# z'.d - z.d
#
# So z' is UTC-equivalent to x iff z'.d = z.d at this point. If they are equal,
# we've found the UTC-equivalent so are done. In fact, we stop with [7] and
# return z', not bothering to compute z'.d.
#
# How could z.d and z'd differ? z' = z + z.d [7], so merely moving z' by
# a dst() offset, and starting *from* a time already in DST (we know z.d != 0),
# would have to change the result dst() returns: we start in DST, and moving
# a little further into it takes us out of DST.
#
# There isn't a sane case where this can happen. The closest it gets is at
# the end of DST, where there's an hour in UTC with no spelling in a hybrid
# tzinfo class. In US Eastern, that's 5:MM UTC = 0:MM EST = 1:MM EDT. During
# that hour, on an Eastern clock 1:MM is taken as being in standard time (6:MM
# UTC) because the docs insist on that, but 0:MM is taken as being in daylight
# time (4:MM UTC). There is no local time mapping to 5:MM UTC. The local
# clock jumps from 1:59 back to 1:00 again, and repeats the 1:MM hour in
# standard time. Since that's what the local clock *does*, we want to map both
# UTC hours 5:MM and 6:MM to 1:MM Eastern. The result is ambiguous
# in local time, but so it goes -- it's the way the local clock works.
#
# When x = 5:MM UTC is the input to this algorithm, x.o=0, y.o=-5 and y.d=0,
# so z=0:MM. z.d=60 (minutes) then, so [5] doesn't hold and we keep going.
# z' = z + z.d = 1:MM then, and z'.d=0, and z'.d - z.d = -60 != 0 so [8]
# (correctly) concludes that z' is not UTC-equivalent to x.
#
# Because we know z.d said z was in daylight time (else [5] would have held and
# we would have stopped then), and we know z.d != z'.d (else [8] would have held
# and we have stopped then), and there are only 2 possible values dst() can
# return in Eastern, it follows that z'.d must be 0 (which it is in the example,
# but the reasoning doesn't depend on the example -- it depends on there being
# two possible dst() outcomes, one zero and the other non-zero). Therefore
# z' must be in standard time, and is the spelling we want in this case.
#
# Note again that z' is not UTC-equivalent as far as the hybrid tzinfo class is
# concerned (because it takes z' as being in standard time rather than the
# daylight time we intend here), but returning it gives the real-life "local
# clock repeats an hour" behavior when mapping the "unspellable" UTC hour into
# tz.
#
# When the input is 6:MM, z=1:MM and z.d=0, and we stop at once, again with
# the 1:MM standard time spelling we want.
#
# So how can this break? One of the assumptions must be violated. Two
# possibilities:
#
# 1) [2] effectively says that y.s is invariant across all y belong to a given
# time zone. This isn't true if, for political reasons or continental drift,
# a region decides to change its base offset from UTC.
#
# 2) There may be versions of "double daylight" time where the tail end of
# the analysis gives up a step too early. I haven't thought about that
# enough to say.
#
# In any case, it's clear that the default fromutc() is strong enough to handle
# "almost all" time zones: so long as the standard offset is invariant, it
# doesn't matter if daylight time transition points change from year to year, or
# if daylight time is skipped in some years; it doesn't matter how large or
# small dst() may get within its bounds; and it doesn't even matter if some
# perverse time zone returns a negative dst()). So a breaking case must be
# pretty bizarre, and a tzinfo subclass can override fromutc() if it is.
1. x.o = x.s + x.d
This follows from the definition of x.s.
2. If x and y have the same tzinfo member, x.s = y.s.
This is actually a requirement, an assumption we need to make about
sane tzinfo classes.
3. The naive UTC time corresponding to x is x.n - x.o.
This is again a requirement for a sane tzinfo class.
4. (x+k).s = x.s
This follows from #2, and that datimetimetz+timedelta preserves tzinfo.
5. (x+k).n = x.n + k
Again follows from how arithmetic is defined.
Now we can explain tz.fromutc(x). Let's assume it's an interesting case
(meaning that the various tzinfo methods exist, and don't blow up or return
None when called).
The function wants to return a datetime y with timezone tz, equivalent to x.
x is already in UTC.
By #3, we want
y.n - y.o = x.n [1]
The algorithm starts by attaching tz to x.n, and calling that y. So
x.n = y.n at the start. Then it wants to add a duration k to y, so that [1]
becomes true; in effect, we want to solve [2] for k:
(y+k).n - (y+k).o = x.n [2]
By #1, this is the same as
(y+k).n - ((y+k).s + (y+k).d) = x.n [3]
By #5, (y+k).n = y.n + k, which equals x.n + k because x.n=y.n at the start.
Substituting that into [3],
x.n + k - (y+k).s - (y+k).d = x.n; the x.n terms cancel, leaving
k - (y+k).s - (y+k).d = 0; rearranging,
k = (y+k).s - (y+k).d; by #4, (y+k).s == y.s, so
k = y.s - (y+k).d
On the RHS, (y+k).d can't be computed directly, but y.s can be, and we
approximate k by ignoring the (y+k).d term at first. Note that k can't be
very large, since all offset-returning methods return a duration of magnitude
less than 24 hours. For that reason, if y is firmly in std time, (y+k).d must
be 0, so ignoring it has no consequence then.
In any case, the new value is
z = y + y.s [4]
It's helpful to step back at look at [4] from a higher level: it's simply
mapping from UTC to tz's standard time.
At this point, if
z.n - z.o = x.n [5]
we have an equivalent time, and are almost done. The insecurity here is
at the start of daylight time. Picture US Eastern for concreteness. The wall
time jumps from 1:59 to 3:00, and wall hours of the form 2:MM don't make good
sense then. The docs ask that an Eastern tzinfo class consider such a time to
be EDT (because it's "after 2"), which is a redundant spelling of 1:MM EST
on the day DST starts. We want to return the 1:MM EST spelling because that's
the only spelling that makes sense on the local wall clock.
In fact, if [5] holds at this point, we do have the standard-time spelling,
but that takes a bit of proof. We first prove a stronger result. What's the
difference between the LHS and RHS of [5]? Let
diff = x.n - (z.n - z.o) [6]
Now
z.n = by [4]
(y + y.s).n = by #5
y.n + y.s = since y.n = x.n
x.n + y.s = since z and y are have the same tzinfo member,
y.s = z.s by #2
x.n + z.s
Plugging that back into [6] gives
diff =
x.n - ((x.n + z.s) - z.o) = expanding
x.n - x.n - z.s + z.o = cancelling
- z.s + z.o = by #2
z.d
So diff = z.d.
If [5] is true now, diff = 0, so z.d = 0 too, and we have the standard-time
spelling we wanted in the endcase described above. We're done. Contrarily,
if z.d = 0, then we have a UTC equivalent, and are also done.
If [5] is not true now, diff = z.d != 0, and z.d is the offset we need to
add to z (in effect, z is in tz's standard time, and we need to shift the
local clock into tz's daylight time).
Let
z' = z + z.d = z + diff [7]
and we can again ask whether
z'.n - z'.o = x.n [8]
If so, we're done. If not, the tzinfo class is insane, according to the
assumptions we've made. This also requires a bit of proof. As before, let's
compute the difference between the LHS and RHS of [8] (and skipping some of
the justifications for the kinds of substitutions we've done several times
already):
diff' = x.n - (z'.n - z'.o) = replacing z'.n via [7]
x.n - (z.n + diff - z'.o) = replacing diff via [6]
x.n - (z.n + x.n - (z.n - z.o) - z'.o) =
x.n - z.n - x.n + z.n - z.o + z'.o = cancel x.n
- z.n + z.n - z.o + z'.o = cancel z.n
- z.o + z'.o = #1 twice
-z.s - z.d + z'.s + z'.d = z and z' have same tzinfo
z'.d - z.d
So z' is UTC-equivalent to x iff z'.d = z.d at this point. If they are equal,
we've found the UTC-equivalent so are done. In fact, we stop with [7] and
return z', not bothering to compute z'.d.
How could z.d and z'd differ? z' = z + z.d [7], so merely moving z' by
a dst() offset, and starting *from* a time already in DST (we know z.d != 0),
would have to change the result dst() returns: we start in DST, and moving
a little further into it takes us out of DST.
There isn't a sane case where this can happen. The closest it gets is at
the end of DST, where there's an hour in UTC with no spelling in a hybrid
tzinfo class. In US Eastern, that's 5:MM UTC = 0:MM EST = 1:MM EDT. During
that hour, on an Eastern clock 1:MM is taken as being in standard time (6:MM
UTC) because the docs insist on that, but 0:MM is taken as being in daylight
time (4:MM UTC). There is no local time mapping to 5:MM UTC. The local
clock jumps from 1:59 back to 1:00 again, and repeats the 1:MM hour in
standard time. Since that's what the local clock *does*, we want to map both
UTC hours 5:MM and 6:MM to 1:MM Eastern. The result is ambiguous
in local time, but so it goes -- it's the way the local clock works.
When x = 5:MM UTC is the input to this algorithm, x.o=0, y.o=-5 and y.d=0,
so z=0:MM. z.d=60 (minutes) then, so [5] doesn't hold and we keep going.
z' = z + z.d = 1:MM then, and z'.d=0, and z'.d - z.d = -60 != 0 so [8]
(correctly) concludes that z' is not UTC-equivalent to x.
Because we know z.d said z was in daylight time (else [5] would have held and
we would have stopped then), and we know z.d != z'.d (else [8] would have held
and we have stopped then), and there are only 2 possible values dst() can
return in Eastern, it follows that z'.d must be 0 (which it is in the example,
but the reasoning doesn't depend on the example -- it depends on there being
two possible dst() outcomes, one zero and the other non-zero). Therefore
z' must be in standard time, and is the spelling we want in this case.
Note again that z' is not UTC-equivalent as far as the hybrid tzinfo class is
concerned (because it takes z' as being in standard time rather than the
daylight time we intend here), but returning it gives the real-life "local
clock repeats an hour" behavior when mapping the "unspellable" UTC hour into
tz.
When the input is 6:MM, z=1:MM and z.d=0, and we stop at once, again with
the 1:MM standard time spelling we want.
So how can this break? One of the assumptions must be violated. Two
possibilities:
1) [2] effectively says that y.s is invariant across all y belong to a given
time zone. This isn't true if, for political reasons or continental drift,
a region decides to change its base offset from UTC.
2) There may be versions of "double daylight" time where the tail end of
the analysis gives up a step too early. I haven't thought about that
enough to say.
In any case, it's clear that the default fromutc() is strong enough to handle
"almost all" time zones: so long as the standard offset is invariant, it
doesn't matter if daylight time transition points change from year to year, or
if daylight time is skipped in some years; it doesn't matter how large or
small dst() may get within its bounds; and it doesn't even matter if some
perverse time zone returns a negative dst()). So a breaking case must be
pretty bizarre, and a tzinfo subclass can override fromutc() if it is.
"""
try: try:
from _datetime import * from _datetime import *
except ImportError: except ImportError:

View File

@ -1317,24 +1317,22 @@ RouteComponentMarker = ValueTerminal('@', 'route-component-marker')
# Parser # Parser
# #
"""Parse strings according to RFC822/2047/2822/5322 rules. # Parse strings according to RFC822/2047/2822/5322 rules.
#
This is a stateless parser. Each get_XXX function accepts a string and # This is a stateless parser. Each get_XXX function accepts a string and
returns either a Terminal or a TokenList representing the RFC object named # returns either a Terminal or a TokenList representing the RFC object named
by the method and a string containing the remaining unparsed characters # by the method and a string containing the remaining unparsed characters
from the input. Thus a parser method consumes the next syntactic construct # from the input. Thus a parser method consumes the next syntactic construct
of a given type and returns a token representing the construct plus the # of a given type and returns a token representing the construct plus the
unparsed remainder of the input string. # unparsed remainder of the input string.
#
For example, if the first element of a structured header is a 'phrase', # For example, if the first element of a structured header is a 'phrase',
then: # then:
#
phrase, value = get_phrase(value) # phrase, value = get_phrase(value)
#
returns the complete phrase from the start of the string value, plus any # returns the complete phrase from the start of the string value, plus any
characters left in the string after the phrase is removed. # characters left in the string after the phrase is removed.
"""
_wsp_splitter = re.compile(r'([{}]+)'.format(''.join(WSP))).split _wsp_splitter = re.compile(r'([{}]+)'.format(''.join(WSP))).split
_non_atom_end_matcher = re.compile(r"[^{}]+".format( _non_atom_end_matcher = re.compile(r"[^{}]+".format(

View File

@ -299,101 +299,100 @@ def _call_with_frames_removed(f, *args, **kwds):
# Finder/loader utility code ############################################### # Finder/loader utility code ###############################################
"""Magic word to reject .pyc files generated by other Python versions. # Magic word to reject .pyc files generated by other Python versions.
It should change for each incompatible change to the bytecode. # It should change for each incompatible change to the bytecode.
#
# The value of CR and LF is incorporated so if you ever read or write
# a .pyc file in text mode the magic number will be wrong; also, the
# Apple MPW compiler swaps their values, botching string constants.
#
# The magic numbers must be spaced apart at least 2 values, as the
# -U interpeter flag will cause MAGIC+1 being used. They have been
# odd numbers for some time now.
#
# There were a variety of old schemes for setting the magic number.
# The current working scheme is to increment the previous value by
# 10.
#
# Starting with the adoption of PEP 3147 in Python 3.2, every bump in magic
# number also includes a new "magic tag", i.e. a human readable string used
# to represent the magic number in __pycache__ directories. When you change
# the magic number, you must also set a new unique magic tag. Generally this
# can be named after the Python major version of the magic number bump, but
# it can really be anything, as long as it's different than anything else
# that's come before. The tags are included in the following table, starting
# with Python 3.2a0.
#
# Known values:
# Python 1.5: 20121
# Python 1.5.1: 20121
# Python 1.5.2: 20121
# Python 1.6: 50428
# Python 2.0: 50823
# Python 2.0.1: 50823
# Python 2.1: 60202
# Python 2.1.1: 60202
# Python 2.1.2: 60202
# Python 2.2: 60717
# Python 2.3a0: 62011
# Python 2.3a0: 62021
# Python 2.3a0: 62011 (!)
# Python 2.4a0: 62041
# Python 2.4a3: 62051
# Python 2.4b1: 62061
# Python 2.5a0: 62071
# Python 2.5a0: 62081 (ast-branch)
# Python 2.5a0: 62091 (with)
# Python 2.5a0: 62092 (changed WITH_CLEANUP opcode)
# Python 2.5b3: 62101 (fix wrong code: for x, in ...)
# Python 2.5b3: 62111 (fix wrong code: x += yield)
# Python 2.5c1: 62121 (fix wrong lnotab with for loops and
# storing constants that should have been removed)
# Python 2.5c2: 62131 (fix wrong code: for x, in ... in listcomp/genexp)
# Python 2.6a0: 62151 (peephole optimizations and STORE_MAP opcode)
# Python 2.6a1: 62161 (WITH_CLEANUP optimization)
# Python 2.7a0: 62171 (optimize list comprehensions/change LIST_APPEND)
# Python 2.7a0: 62181 (optimize conditional branches:
# introduce POP_JUMP_IF_FALSE and POP_JUMP_IF_TRUE)
# Python 2.7a0 62191 (introduce SETUP_WITH)
# Python 2.7a0 62201 (introduce BUILD_SET)
# Python 2.7a0 62211 (introduce MAP_ADD and SET_ADD)
# Python 3000: 3000
# 3010 (removed UNARY_CONVERT)
# 3020 (added BUILD_SET)
# 3030 (added keyword-only parameters)
# 3040 (added signature annotations)
# 3050 (print becomes a function)
# 3060 (PEP 3115 metaclass syntax)
# 3061 (string literals become unicode)
# 3071 (PEP 3109 raise changes)
# 3081 (PEP 3137 make __file__ and __name__ unicode)
# 3091 (kill str8 interning)
# 3101 (merge from 2.6a0, see 62151)
# 3103 (__file__ points to source file)
# Python 3.0a4: 3111 (WITH_CLEANUP optimization).
# Python 3.0a5: 3131 (lexical exception stacking, including POP_EXCEPT)
# Python 3.1a0: 3141 (optimize list, set and dict comprehensions:
# change LIST_APPEND and SET_ADD, add MAP_ADD)
# Python 3.1a0: 3151 (optimize conditional branches:
# introduce POP_JUMP_IF_FALSE and POP_JUMP_IF_TRUE)
# Python 3.2a0: 3160 (add SETUP_WITH)
# tag: cpython-32
# Python 3.2a1: 3170 (add DUP_TOP_TWO, remove DUP_TOPX and ROT_FOUR)
# tag: cpython-32
# Python 3.2a2 3180 (add DELETE_DEREF)
# Python 3.3a0 3190 __class__ super closure changed
# Python 3.3a0 3200 (__qualname__ added)
# 3210 (added size modulo 2**32 to the pyc header)
# Python 3.3a1 3220 (changed PEP 380 implementation)
# Python 3.3a4 3230 (revert changes to implicit __class__ closure)
# Python 3.4a1 3250 (evaluate positional default arguments before
# keyword-only defaults)
#
# MAGIC must change whenever the bytecode emitted by the compiler may no
# longer be understood by older implementations of the eval loop (usually
# due to the addition of new opcodes).
The value of CR and LF is incorporated so if you ever read or write
a .pyc file in text mode the magic number will be wrong; also, the
Apple MPW compiler swaps their values, botching string constants.
The magic numbers must be spaced apart at least 2 values, as the
-U interpeter flag will cause MAGIC+1 being used. They have been
odd numbers for some time now.
There were a variety of old schemes for setting the magic number.
The current working scheme is to increment the previous value by
10.
Starting with the adoption of PEP 3147 in Python 3.2, every bump in magic
number also includes a new "magic tag", i.e. a human readable string used
to represent the magic number in __pycache__ directories. When you change
the magic number, you must also set a new unique magic tag. Generally this
can be named after the Python major version of the magic number bump, but
it can really be anything, as long as it's different than anything else
that's come before. The tags are included in the following table, starting
with Python 3.2a0.
Known values:
Python 1.5: 20121
Python 1.5.1: 20121
Python 1.5.2: 20121
Python 1.6: 50428
Python 2.0: 50823
Python 2.0.1: 50823
Python 2.1: 60202
Python 2.1.1: 60202
Python 2.1.2: 60202
Python 2.2: 60717
Python 2.3a0: 62011
Python 2.3a0: 62021
Python 2.3a0: 62011 (!)
Python 2.4a0: 62041
Python 2.4a3: 62051
Python 2.4b1: 62061
Python 2.5a0: 62071
Python 2.5a0: 62081 (ast-branch)
Python 2.5a0: 62091 (with)
Python 2.5a0: 62092 (changed WITH_CLEANUP opcode)
Python 2.5b3: 62101 (fix wrong code: for x, in ...)
Python 2.5b3: 62111 (fix wrong code: x += yield)
Python 2.5c1: 62121 (fix wrong lnotab with for loops and
storing constants that should have been removed)
Python 2.5c2: 62131 (fix wrong code: for x, in ... in listcomp/genexp)
Python 2.6a0: 62151 (peephole optimizations and STORE_MAP opcode)
Python 2.6a1: 62161 (WITH_CLEANUP optimization)
Python 2.7a0: 62171 (optimize list comprehensions/change LIST_APPEND)
Python 2.7a0: 62181 (optimize conditional branches:
introduce POP_JUMP_IF_FALSE and POP_JUMP_IF_TRUE)
Python 2.7a0 62191 (introduce SETUP_WITH)
Python 2.7a0 62201 (introduce BUILD_SET)
Python 2.7a0 62211 (introduce MAP_ADD and SET_ADD)
Python 3000: 3000
3010 (removed UNARY_CONVERT)
3020 (added BUILD_SET)
3030 (added keyword-only parameters)
3040 (added signature annotations)
3050 (print becomes a function)
3060 (PEP 3115 metaclass syntax)
3061 (string literals become unicode)
3071 (PEP 3109 raise changes)
3081 (PEP 3137 make __file__ and __name__ unicode)
3091 (kill str8 interning)
3101 (merge from 2.6a0, see 62151)
3103 (__file__ points to source file)
Python 3.0a4: 3111 (WITH_CLEANUP optimization).
Python 3.0a5: 3131 (lexical exception stacking, including POP_EXCEPT)
Python 3.1a0: 3141 (optimize list, set and dict comprehensions:
change LIST_APPEND and SET_ADD, add MAP_ADD)
Python 3.1a0: 3151 (optimize conditional branches:
introduce POP_JUMP_IF_FALSE and POP_JUMP_IF_TRUE)
Python 3.2a0: 3160 (add SETUP_WITH)
tag: cpython-32
Python 3.2a1: 3170 (add DUP_TOP_TWO, remove DUP_TOPX and ROT_FOUR)
tag: cpython-32
Python 3.2a2 3180 (add DELETE_DEREF)
Python 3.3a0 3190 __class__ super closure changed
Python 3.3a0 3200 (__qualname__ added)
3210 (added size modulo 2**32 to the pyc header)
Python 3.3a1 3220 (changed PEP 380 implementation)
Python 3.3a4 3230 (revert changes to implicit __class__ closure)
Python 3.4a1 3250 (evaluate positional default arguments before
keyword-only defaults)
MAGIC must change whenever the bytecode emitted by the compiler may no
longer be understood by older implementations of the eval loop (usually
due to the addition of new opcodes).
"""
_MAGIC_BYTES = (3250).to_bytes(2, 'little') + b'\r\n' _MAGIC_BYTES = (3250).to_bytes(2, 'little') + b'\r\n'
_RAW_MAGIC_NUMBER = int.from_bytes(_MAGIC_BYTES, 'little') _RAW_MAGIC_NUMBER = int.from_bytes(_MAGIC_BYTES, 'little')

View File

@ -825,10 +825,9 @@ class SysLogHandler(logging.Handler):
msg = self.ident + msg msg = self.ident + msg
if self.append_nul: if self.append_nul:
msg += '\000' msg += '\000'
"""
We need to convert record level to lowercase, maybe this will # We need to convert record level to lowercase, maybe this will
change in the future. # change in the future.
"""
prio = '<%d>' % self.encodePriority(self.facility, prio = '<%d>' % self.encodePriority(self.facility,
self.mapPriority(record.levelname)) self.mapPriority(record.levelname))
prio = prio.encode('utf-8') prio = prio.encode('utf-8')

View File

@ -33,119 +33,118 @@ bytes_types = pickle.bytes_types
# by a later GET. # by a later GET.
""" # "A pickle" is a program for a virtual pickle machine (PM, but more accurately
"A pickle" is a program for a virtual pickle machine (PM, but more accurately # called an unpickling machine). It's a sequence of opcodes, interpreted by the
called an unpickling machine). It's a sequence of opcodes, interpreted by the # PM, building an arbitrarily complex Python object.
PM, building an arbitrarily complex Python object. #
# For the most part, the PM is very simple: there are no looping, testing, or
# conditional instructions, no arithmetic and no function calls. Opcodes are
# executed once each, from first to last, until a STOP opcode is reached.
#
# The PM has two data areas, "the stack" and "the memo".
#
# Many opcodes push Python objects onto the stack; e.g., INT pushes a Python
# integer object on the stack, whose value is gotten from a decimal string
# literal immediately following the INT opcode in the pickle bytestream. Other
# opcodes take Python objects off the stack. The result of unpickling is
# whatever object is left on the stack when the final STOP opcode is executed.
#
# The memo is simply an array of objects, or it can be implemented as a dict
# mapping little integers to objects. The memo serves as the PM's "long term
# memory", and the little integers indexing the memo are akin to variable
# names. Some opcodes pop a stack object into the memo at a given index,
# and others push a memo object at a given index onto the stack again.
#
# At heart, that's all the PM has. Subtleties arise for these reasons:
#
# + Object identity. Objects can be arbitrarily complex, and subobjects
# may be shared (for example, the list [a, a] refers to the same object a
# twice). It can be vital that unpickling recreate an isomorphic object
# graph, faithfully reproducing sharing.
#
# + Recursive objects. For example, after "L = []; L.append(L)", L is a
# list, and L[0] is the same list. This is related to the object identity
# point, and some sequences of pickle opcodes are subtle in order to
# get the right result in all cases.
#
# + Things pickle doesn't know everything about. Examples of things pickle
# does know everything about are Python's builtin scalar and container
# types, like ints and tuples. They generally have opcodes dedicated to
# them. For things like module references and instances of user-defined
# classes, pickle's knowledge is limited. Historically, many enhancements
# have been made to the pickle protocol in order to do a better (faster,
# and/or more compact) job on those.
#
# + Backward compatibility and micro-optimization. As explained below,
# pickle opcodes never go away, not even when better ways to do a thing
# get invented. The repertoire of the PM just keeps growing over time.
# For example, protocol 0 had two opcodes for building Python integers (INT
# and LONG), protocol 1 added three more for more-efficient pickling of short
# integers, and protocol 2 added two more for more-efficient pickling of
# long integers (before protocol 2, the only ways to pickle a Python long
# took time quadratic in the number of digits, for both pickling and
# unpickling). "Opcode bloat" isn't so much a subtlety as a source of
# wearying complication.
#
#
# Pickle protocols:
#
# For compatibility, the meaning of a pickle opcode never changes. Instead new
# pickle opcodes get added, and each version's unpickler can handle all the
# pickle opcodes in all protocol versions to date. So old pickles continue to
# be readable forever. The pickler can generally be told to restrict itself to
# the subset of opcodes available under previous protocol versions too, so that
# users can create pickles under the current version readable by older
# versions. However, a pickle does not contain its version number embedded
# within it. If an older unpickler tries to read a pickle using a later
# protocol, the result is most likely an exception due to seeing an unknown (in
# the older unpickler) opcode.
#
# The original pickle used what's now called "protocol 0", and what was called
# "text mode" before Python 2.3. The entire pickle bytestream is made up of
# printable 7-bit ASCII characters, plus the newline character, in protocol 0.
# That's why it was called text mode. Protocol 0 is small and elegant, but
# sometimes painfully inefficient.
#
# The second major set of additions is now called "protocol 1", and was called
# "binary mode" before Python 2.3. This added many opcodes with arguments
# consisting of arbitrary bytes, including NUL bytes and unprintable "high bit"
# bytes. Binary mode pickles can be substantially smaller than equivalent
# text mode pickles, and sometimes faster too; e.g., BININT represents a 4-byte
# int as 4 bytes following the opcode, which is cheaper to unpickle than the
# (perhaps) 11-character decimal string attached to INT. Protocol 1 also added
# a number of opcodes that operate on many stack elements at once (like APPENDS
# and SETITEMS), and "shortcut" opcodes (like EMPTY_DICT and EMPTY_TUPLE).
#
# The third major set of additions came in Python 2.3, and is called "protocol
# 2". This added:
#
# - A better way to pickle instances of new-style classes (NEWOBJ).
#
# - A way for a pickle to identify its protocol (PROTO).
#
# - Time- and space- efficient pickling of long ints (LONG{1,4}).
#
# - Shortcuts for small tuples (TUPLE{1,2,3}}.
#
# - Dedicated opcodes for bools (NEWTRUE, NEWFALSE).
#
# - The "extension registry", a vector of popular objects that can be pushed
# efficiently by index (EXT{1,2,4}). This is akin to the memo and GET, but
# the registry contents are predefined (there's nothing akin to the memo's
# PUT).
#
# Another independent change with Python 2.3 is the abandonment of any
# pretense that it might be safe to load pickles received from untrusted
# parties -- no sufficient security analysis has been done to guarantee
# this and there isn't a use case that warrants the expense of such an
# analysis.
#
# To this end, all tests for __safe_for_unpickling__ or for
# copyreg.safe_constructors are removed from the unpickling code.
# References to these variables in the descriptions below are to be seen
# as describing unpickling in Python 2.2 and before.
For the most part, the PM is very simple: there are no looping, testing, or
conditional instructions, no arithmetic and no function calls. Opcodes are
executed once each, from first to last, until a STOP opcode is reached.
The PM has two data areas, "the stack" and "the memo".
Many opcodes push Python objects onto the stack; e.g., INT pushes a Python
integer object on the stack, whose value is gotten from a decimal string
literal immediately following the INT opcode in the pickle bytestream. Other
opcodes take Python objects off the stack. The result of unpickling is
whatever object is left on the stack when the final STOP opcode is executed.
The memo is simply an array of objects, or it can be implemented as a dict
mapping little integers to objects. The memo serves as the PM's "long term
memory", and the little integers indexing the memo are akin to variable
names. Some opcodes pop a stack object into the memo at a given index,
and others push a memo object at a given index onto the stack again.
At heart, that's all the PM has. Subtleties arise for these reasons:
+ Object identity. Objects can be arbitrarily complex, and subobjects
may be shared (for example, the list [a, a] refers to the same object a
twice). It can be vital that unpickling recreate an isomorphic object
graph, faithfully reproducing sharing.
+ Recursive objects. For example, after "L = []; L.append(L)", L is a
list, and L[0] is the same list. This is related to the object identity
point, and some sequences of pickle opcodes are subtle in order to
get the right result in all cases.
+ Things pickle doesn't know everything about. Examples of things pickle
does know everything about are Python's builtin scalar and container
types, like ints and tuples. They generally have opcodes dedicated to
them. For things like module references and instances of user-defined
classes, pickle's knowledge is limited. Historically, many enhancements
have been made to the pickle protocol in order to do a better (faster,
and/or more compact) job on those.
+ Backward compatibility and micro-optimization. As explained below,
pickle opcodes never go away, not even when better ways to do a thing
get invented. The repertoire of the PM just keeps growing over time.
For example, protocol 0 had two opcodes for building Python integers (INT
and LONG), protocol 1 added three more for more-efficient pickling of short
integers, and protocol 2 added two more for more-efficient pickling of
long integers (before protocol 2, the only ways to pickle a Python long
took time quadratic in the number of digits, for both pickling and
unpickling). "Opcode bloat" isn't so much a subtlety as a source of
wearying complication.
Pickle protocols:
For compatibility, the meaning of a pickle opcode never changes. Instead new
pickle opcodes get added, and each version's unpickler can handle all the
pickle opcodes in all protocol versions to date. So old pickles continue to
be readable forever. The pickler can generally be told to restrict itself to
the subset of opcodes available under previous protocol versions too, so that
users can create pickles under the current version readable by older
versions. However, a pickle does not contain its version number embedded
within it. If an older unpickler tries to read a pickle using a later
protocol, the result is most likely an exception due to seeing an unknown (in
the older unpickler) opcode.
The original pickle used what's now called "protocol 0", and what was called
"text mode" before Python 2.3. The entire pickle bytestream is made up of
printable 7-bit ASCII characters, plus the newline character, in protocol 0.
That's why it was called text mode. Protocol 0 is small and elegant, but
sometimes painfully inefficient.
The second major set of additions is now called "protocol 1", and was called
"binary mode" before Python 2.3. This added many opcodes with arguments
consisting of arbitrary bytes, including NUL bytes and unprintable "high bit"
bytes. Binary mode pickles can be substantially smaller than equivalent
text mode pickles, and sometimes faster too; e.g., BININT represents a 4-byte
int as 4 bytes following the opcode, which is cheaper to unpickle than the
(perhaps) 11-character decimal string attached to INT. Protocol 1 also added
a number of opcodes that operate on many stack elements at once (like APPENDS
and SETITEMS), and "shortcut" opcodes (like EMPTY_DICT and EMPTY_TUPLE).
The third major set of additions came in Python 2.3, and is called "protocol
2". This added:
- A better way to pickle instances of new-style classes (NEWOBJ).
- A way for a pickle to identify its protocol (PROTO).
- Time- and space- efficient pickling of long ints (LONG{1,4}).
- Shortcuts for small tuples (TUPLE{1,2,3}}.
- Dedicated opcodes for bools (NEWTRUE, NEWFALSE).
- The "extension registry", a vector of popular objects that can be pushed
efficiently by index (EXT{1,2,4}). This is akin to the memo and GET, but
the registry contents are predefined (there's nothing akin to the memo's
PUT).
Another independent change with Python 2.3 is the abandonment of any
pretense that it might be safe to load pickles received from untrusted
parties -- no sufficient security analysis has been done to guarantee
this and there isn't a use case that warrants the expense of such an
analysis.
To this end, all tests for __safe_for_unpickling__ or for
copyreg.safe_constructors are removed from the unpickling code.
References to these variables in the descriptions below are to be seen
as describing unpickling in Python 2.2 and before.
"""
# Meta-rule: Descriptions are stored in instances of descriptor objects, # Meta-rule: Descriptions are stored in instances of descriptor objects,
# with plain constructors. No meta-language is defined from which # with plain constructors. No meta-language is defined from which

View File

@ -2,37 +2,35 @@ import unittest
from test import support from test import support
from _testcapi import getargs_keywords, getargs_keyword_only from _testcapi import getargs_keywords, getargs_keyword_only
""" # > How about the following counterproposal. This also changes some of
> How about the following counterproposal. This also changes some of # > the other format codes to be a little more regular.
> the other format codes to be a little more regular. # >
> # > Code C type Range check
> Code C type Range check # >
> # > b unsigned char 0..UCHAR_MAX
> b unsigned char 0..UCHAR_MAX # > h signed short SHRT_MIN..SHRT_MAX
> h signed short SHRT_MIN..SHRT_MAX # > B unsigned char none **
> B unsigned char none ** # > H unsigned short none **
> H unsigned short none ** # > k * unsigned long none
> k * unsigned long none # > I * unsigned int 0..UINT_MAX
> I * unsigned int 0..UINT_MAX #
#
# > i int INT_MIN..INT_MAX
> i int INT_MIN..INT_MAX # > l long LONG_MIN..LONG_MAX
> l long LONG_MIN..LONG_MAX #
# > K * unsigned long long none
> K * unsigned long long none # > L long long LLONG_MIN..LLONG_MAX
> L long long LLONG_MIN..LLONG_MAX #
# > Notes:
> Notes: # >
> # > * New format codes.
> * New format codes. # >
> # > ** Changed from previous "range-and-a-half" to "none"; the
> ** Changed from previous "range-and-a-half" to "none"; the # > range-and-a-half checking wasn't particularly useful.
> range-and-a-half checking wasn't particularly useful. #
# Plus a C API or two, e.g. PyInt_AsLongMask() ->
Plus a C API or two, e.g. PyInt_AsLongMask() -> # unsigned long and PyInt_AsLongLongMask() -> unsigned
unsigned long and PyInt_AsLongLongMask() -> unsigned # long long (if that exists).
long long (if that exists).
"""
LARGE = 0x7FFFFFFF LARGE = 0x7FFFFFFF
VERY_LARGE = 0xFF0000121212121212121242 VERY_LARGE = 0xFF0000121212121212121242

View File

@ -1337,8 +1337,8 @@ def XMLID(text, parser=None):
ids[id] = elem ids[id] = elem
return tree, ids return tree, ids
# Parse XML document from string constant. Alias for XML().
fromstring = XML fromstring = XML
"""Parse XML document from string constant. Alias for XML()."""
def fromstringlist(sequence, parser=None): def fromstringlist(sequence, parser=None):
"""Parse XML document from sequence of string fragments. """Parse XML document from sequence of string fragments.

View File

@ -52,8 +52,8 @@ verbose = False
recurse = False recurse = False
dryrun = False dryrun = False
makebackup = True makebackup = True
# A specified newline to be used in the output (set by --newline option)
spec_newline = None spec_newline = None
"""A specified newline to be used in the output (set by --newline option)"""
def usage(msg=None): def usage(msg=None):