cpython/Lib/asynchat.py

308 lines
11 KiB
Python
Raw Permalink Normal View History

# -*- Mode: Python; tab-width: 4 -*-
2001-02-09 16:06:00 -04:00
# Id: asynchat.py,v 2.26 2000/09/07 22:29:26 rushing Exp
2001-01-14 14:09:23 -04:00
# Author: Sam Rushing <rushing@nightmare.com>
# ======================================================================
# Copyright 1996 by Sam Rushing
2001-01-14 14:09:23 -04:00
#
# All Rights Reserved
2001-01-14 14:09:23 -04:00
#
# Permission to use, copy, modify, and distribute this software and
# its documentation for any purpose and without fee is hereby
# granted, provided that the above copyright notice appear in all
# copies and that both that copyright notice and this permission
# notice appear in supporting documentation, and that the name of Sam
# Rushing not be used in advertising or publicity pertaining to
# distribution of the software without specific, written prior
# permission.
2001-01-14 14:09:23 -04:00
#
# SAM RUSHING DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
# INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN
# NO EVENT SHALL SAM RUSHING BE LIABLE FOR ANY SPECIAL, INDIRECT OR
# CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
# NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
# ======================================================================
r"""A class supporting chat-style (command/response) protocols.
This class adds support for 'chat' style protocols - where one side
sends a 'command', and the other sends a response (examples would be
the common internet protocols - smtp, nntp, ftp, etc..).
The handle_read() method looks at the input stream for the current
'terminator' (usually '\r\n' for single-line responses, '\r\n.\r\n'
for multi-line output), calling self.found_terminator() on its
receipt.
for example:
Say you build an async nntp client using this class. At the start
of the connection, you'll have self.terminator set to '\r\n', in
order to process the single-line greeting. Just before issuing a
'LIST' command you'll set it to '\r\n.\r\n'. The output of the LIST
command will be accumulated (using your own 'collect_incoming_data'
method) up to the terminator, and then control will be returned to
you - by calling your self.found_terminator() method.
"""
import asyncore
from collections import deque
2014-07-07 19:16:54 -03:00
class async_chat(asyncore.dispatcher):
2001-01-14 14:09:23 -04:00
"""This is an abstract class. You must derive from this class, and add
the two methods collect_incoming_data() and found_terminator()"""
# these are overridable defaults
2014-07-07 19:16:54 -03:00
ac_in_buffer_size = 65536
ac_out_buffer_size = 65536
2001-01-14 14:09:23 -04:00
# we don't want to enable the use of encoding by default, because that is a
# sign of an application bug that we don't want to pass silently
2014-07-07 19:16:54 -03:00
use_encoding = 0
encoding = 'latin-1'
2014-07-07 19:16:54 -03:00
def __init__(self, sock=None, map=None):
# for string terminator matching
self.ac_in_buffer = b''
# we use a list here rather than io.BytesIO for a few reasons...
# del lst[:] is faster than bio.truncate(0)
# lst = [] is faster than bio.truncate(0)
self.incoming = []
# we toss the use of the "simple producer" and replace it with
# a pure deque, which the original fifo was a wrapping of
self.producer_fifo = deque()
2014-07-07 19:16:54 -03:00
asyncore.dispatcher.__init__(self, sock, map)
2001-01-14 14:09:23 -04:00
def collect_incoming_data(self, data):
2007-08-29 22:19:48 -03:00
raise NotImplementedError("must be implemented in subclass")
2002-04-15 22:38:40 -03:00
def _collect_incoming_data(self, data):
self.incoming.append(data)
def _get_data(self):
d = b''.join(self.incoming)
del self.incoming[:]
return d
def found_terminator(self):
2007-08-29 22:19:48 -03:00
raise NotImplementedError("must be implemented in subclass")
2002-04-15 22:38:40 -03:00
2014-07-07 19:16:54 -03:00
def set_terminator(self, term):
"""Set the input delimiter.
Can be a fixed string of any length, an integer, or None.
"""
if isinstance(term, str) and self.use_encoding:
term = bytes(term, self.encoding)
elif isinstance(term, int) and term < 0:
raise ValueError('the number of received bytes must be positive')
2001-01-14 14:09:23 -04:00
self.terminator = term
2014-07-07 19:16:54 -03:00
def get_terminator(self):
2001-01-14 14:09:23 -04:00
return self.terminator
# grab some more data from the socket,
# throw it to the collector method,
# check for the terminator,
# if found, transition to the next state.
2014-07-07 19:16:54 -03:00
def handle_read(self):
2001-01-14 14:09:23 -04:00
try:
2014-07-07 19:16:54 -03:00
data = self.recv(self.ac_in_buffer_size)
except BlockingIOError:
return
except OSError:
2001-01-14 14:09:23 -04:00
self.handle_error()
return
if isinstance(data, str) and self.use_encoding:
data = bytes(str, self.encoding)
self.ac_in_buffer = self.ac_in_buffer + data
2001-01-14 14:09:23 -04:00
# Continue to search for self.terminator in self.ac_in_buffer,
# while calling self.collect_incoming_data. The while loop
# is necessary because we might read several data+terminator
# combos with a single recv(4096).
2001-01-14 14:09:23 -04:00
while self.ac_in_buffer:
lb = len(self.ac_in_buffer)
terminator = self.get_terminator()
if not terminator:
2001-01-14 14:09:23 -04:00
# no terminator, collect it all
2014-07-07 19:16:54 -03:00
self.collect_incoming_data(self.ac_in_buffer)
Merged revisions 56753-56781 via svnmerge from svn+ssh://pythondev@svn.python.org/python/branches/p3yk ................ r56760 | neal.norwitz | 2007-08-05 18:55:39 -0700 (Sun, 05 Aug 2007) | 178 lines Merged revisions 56477-56759 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk ........ r56485 | facundo.batista | 2007-07-21 17:13:00 -0700 (Sat, 21 Jul 2007) | 5 lines Selectively enable tests for asyncore.readwrite based on the presence of poll support in the select module (since this is the only case in which readwrite can be called). [GSoC - Alan McIntyre] ........ r56488 | nick.coghlan | 2007-07-22 03:18:07 -0700 (Sun, 22 Jul 2007) | 1 line Add explicit relative import tests for runpy.run_module ........ r56509 | nick.coghlan | 2007-07-23 06:41:45 -0700 (Mon, 23 Jul 2007) | 5 lines Correctly cleanup sys.modules after executing runpy relative import tests Restore Python 2.4 ImportError when attempting to execute a package (as imports cannot be guaranteed to work properly if you try it) ........ r56519 | nick.coghlan | 2007-07-24 06:07:38 -0700 (Tue, 24 Jul 2007) | 1 line Tweak runpy test to do a better job of confirming that sys has been manipulated correctly ........ r56520 | nick.coghlan | 2007-07-24 06:58:28 -0700 (Tue, 24 Jul 2007) | 1 line Fix an incompatibility between the -i and -m command line switches as reported on python-dev by PJE - runpy.run_module now leaves any changes it makes to the sys module intact after the function terminates ........ r56523 | nick.coghlan | 2007-07-24 07:39:23 -0700 (Tue, 24 Jul 2007) | 1 line Try to get rid of spurious failure in test_resource on the Debian buildbots by changing the file size limit before attempting to close the file ........ r56533 | facundo.batista | 2007-07-24 14:20:42 -0700 (Tue, 24 Jul 2007) | 7 lines New tests for basic behavior of smtplib.SMTP and smtpd.DebuggingServer. Change to use global host & port number variables. Modified the 'server' to take a string to send back in order to vary test server responses. Added a test for the reaction of smtplib.SMTP to a non-200 HELO response. [GSoC - Alan McIntyre] ........ r56538 | nick.coghlan | 2007-07-25 05:57:48 -0700 (Wed, 25 Jul 2007) | 1 line More buildbot cleanup - let the OS assign the port for test_urllib2_localnet ........ r56539 | nick.coghlan | 2007-07-25 06:18:58 -0700 (Wed, 25 Jul 2007) | 1 line Add a temporary diagnostic message before a strange failure on the alpha Debian buildbot ........ r56543 | martin.v.loewis | 2007-07-25 09:24:23 -0700 (Wed, 25 Jul 2007) | 2 lines Change location of the package index to pypi.python.org/pypi ........ r56551 | georg.brandl | 2007-07-26 02:36:25 -0700 (Thu, 26 Jul 2007) | 2 lines tabs, newlines and crs are valid XML characters. ........ r56553 | nick.coghlan | 2007-07-26 07:03:00 -0700 (Thu, 26 Jul 2007) | 1 line Add explicit test for a misbehaving math.floor ........ r56561 | mark.hammond | 2007-07-26 21:52:32 -0700 (Thu, 26 Jul 2007) | 3 lines In consultation with Kristjan Jonsson, only define WINVER and _WINNT_WIN32 if (a) we are building Python itself and (b) no one previously defined them ........ r56562 | mark.hammond | 2007-07-26 22:08:54 -0700 (Thu, 26 Jul 2007) | 2 lines Correctly detect AMD64 architecture on VC2003 ........ r56566 | nick.coghlan | 2007-07-27 03:36:30 -0700 (Fri, 27 Jul 2007) | 1 line Make test_math error messages more meaningful for small discrepancies in results ........ r56588 | martin.v.loewis | 2007-07-27 11:28:22 -0700 (Fri, 27 Jul 2007) | 2 lines Bug #978833: Close https sockets by releasing the _ssl object. ........ r56601 | martin.v.loewis | 2007-07-28 00:03:05 -0700 (Sat, 28 Jul 2007) | 3 lines Bug #1704793: Return UTF-16 pair if unicodedata.lookup cannot represent the result in a single character. ........ r56604 | facundo.batista | 2007-07-28 07:21:22 -0700 (Sat, 28 Jul 2007) | 9 lines Moved all of the capture_server socket setup code into the try block so that the event gets set if a failure occurs during server setup (otherwise the test will block forever). Changed to let the OS assign the server port number, and client side of test waits for port number assignment before proceeding. The test data in DispatcherWithSendTests is also sent in multiple send() calls instead of one to make sure this works properly. [GSoC - Alan McIntyre] ........ r56611 | georg.brandl | 2007-07-29 01:26:10 -0700 (Sun, 29 Jul 2007) | 2 lines Clarify PEP 343 description. ........ r56614 | georg.brandl | 2007-07-29 02:11:15 -0700 (Sun, 29 Jul 2007) | 2 lines try-except-finally is new in 2.5. ........ r56617 | facundo.batista | 2007-07-29 07:23:08 -0700 (Sun, 29 Jul 2007) | 9 lines Added tests for asynchat classes simple_producer & fifo, and the find_prefix_at_end function. Check behavior of a string given as a producer. Added tests for behavior of asynchat.async_chat when given int, long, and None terminator arguments. Added usepoll attribute to TestAsynchat to allow running the asynchat tests with poll support chosen whether it's available or not (improves coverage of asyncore code). [GSoC - Alan McIntyre] ........ r56620 | georg.brandl | 2007-07-29 10:38:35 -0700 (Sun, 29 Jul 2007) | 2 lines Bug #1763149: use proper slice syntax in docstring. (backport) ........ r56624 | mark.hammond | 2007-07-29 17:45:29 -0700 (Sun, 29 Jul 2007) | 4 lines Correct use of Py_BUILD_CORE - now make sure it is defined before it is referenced, and also fix definition of _WIN32_WINNT. Resolves patch 1761803. ........ r56632 | facundo.batista | 2007-07-30 20:03:34 -0700 (Mon, 30 Jul 2007) | 8 lines When running asynchat tests on OS X (darwin), the test client now overrides asyncore.dispatcher.handle_expt to do nothing, since select.poll gives a POLLHUP error at the completion of these tests. Added timeout & count arguments to several asyncore.loop calls to avoid the possibility of a test hanging up a build. [GSoC - Alan McIntyre] ........ r56633 | nick.coghlan | 2007-07-31 06:38:01 -0700 (Tue, 31 Jul 2007) | 1 line Eliminate RLock race condition reported in SF bug #1764059 ........ r56636 | martin.v.loewis | 2007-07-31 12:57:56 -0700 (Tue, 31 Jul 2007) | 2 lines Define _BSD_SOURCE, to get access to POSIX extensions on OpenBSD 4.1+. ........ r56653 | facundo.batista | 2007-08-01 16:18:36 -0700 (Wed, 01 Aug 2007) | 9 lines Allow the OS to select a free port for each test server. For DebuggingServerTests, construct SMTP objects with a localhost argument to avoid abysmally long FQDN lookups (not relevant to items under test) on some machines that would cause the test to fail. Moved server setup code in the server function inside the try block to avoid the possibility of setup failure hanging the test. Minor edits to conform to PEP 8. [GSoC - Alan McIntyre] ........ r56681 | matthias.klose | 2007-08-02 14:33:13 -0700 (Thu, 02 Aug 2007) | 2 lines - Allow Emacs 22 for building the documentation in info format. ........ r56689 | neal.norwitz | 2007-08-02 23:46:29 -0700 (Thu, 02 Aug 2007) | 1 line Py_ssize_t is defined regardless of HAVE_LONG_LONG. Will backport ........ r56727 | hyeshik.chang | 2007-08-03 21:10:18 -0700 (Fri, 03 Aug 2007) | 3 lines Fix gb18030 codec's bug that doesn't map two-byte characters on GB18030 extension in encoding. (bug reported by Bjorn Stabell) ........ r56751 | neal.norwitz | 2007-08-04 20:23:31 -0700 (Sat, 04 Aug 2007) | 7 lines Handle errors when generating a warning. The value is always written to the returned pointer if getting it was successful, even if a warning causes an error. (This probably doesn't matter as the caller will probably discard the value.) Will backport. ........ ................
2007-08-06 20:33:07 -03:00
self.ac_in_buffer = b''
elif isinstance(terminator, int):
2001-01-14 14:09:23 -04:00
# numeric terminator
n = terminator
if lb < n:
2014-07-07 19:16:54 -03:00
self.collect_incoming_data(self.ac_in_buffer)
self.ac_in_buffer = b''
2001-01-14 14:09:23 -04:00
self.terminator = self.terminator - lb
else:
2014-07-07 19:16:54 -03:00
self.collect_incoming_data(self.ac_in_buffer[:n])
2001-01-14 14:09:23 -04:00
self.ac_in_buffer = self.ac_in_buffer[n:]
self.terminator = 0
self.found_terminator()
else:
# 3 cases:
# 1) end of buffer matches terminator exactly:
# collect data, transition
# 2) end of buffer matches some prefix:
# collect data to the prefix
# 3) end of buffer does not match any prefix:
# collect data
terminator_len = len(terminator)
2001-04-05 19:38:32 -03:00
index = self.ac_in_buffer.find(terminator)
2001-01-14 14:09:23 -04:00
if index != -1:
# we found the terminator
if index > 0:
2014-07-07 19:16:54 -03:00
# don't bother reporting the empty string
# (source of subtle bugs)
self.collect_incoming_data(self.ac_in_buffer[:index])
2001-01-14 14:09:23 -04:00
self.ac_in_buffer = self.ac_in_buffer[index+terminator_len:]
2014-07-07 19:16:54 -03:00
# This does the Right Thing if the terminator
# is changed here.
2001-01-14 14:09:23 -04:00
self.found_terminator()
else:
# check for a prefix of the terminator
2014-07-07 19:16:54 -03:00
index = find_prefix_at_end(self.ac_in_buffer, terminator)
2001-01-14 14:09:23 -04:00
if index:
if index != lb:
# we found a prefix, collect up to the prefix
2014-07-07 19:16:54 -03:00
self.collect_incoming_data(self.ac_in_buffer[:-index])
2001-01-14 14:09:23 -04:00
self.ac_in_buffer = self.ac_in_buffer[-index:]
break
else:
# no prefix, collect it all
2014-07-07 19:16:54 -03:00
self.collect_incoming_data(self.ac_in_buffer)
self.ac_in_buffer = b''
2001-01-14 14:09:23 -04:00
2014-07-07 19:16:54 -03:00
def handle_write(self):
self.initiate_send()
2001-01-14 14:09:23 -04:00
2014-07-07 19:16:54 -03:00
def handle_close(self):
2001-01-14 14:09:23 -04:00
self.close()
2014-07-07 19:16:54 -03:00
def push(self, data):
if not isinstance(data, (bytes, bytearray, memoryview)):
raise TypeError('data argument must be byte-ish (%r)',
type(data))
sabs = self.ac_out_buffer_size
if len(data) > sabs:
for i in range(0, len(data), sabs):
self.producer_fifo.append(data[i:i+sabs])
else:
self.producer_fifo.append(data)
2001-01-14 14:09:23 -04:00
self.initiate_send()
2014-07-07 19:16:54 -03:00
def push_with_producer(self, producer):
self.producer_fifo.append(producer)
2001-01-14 14:09:23 -04:00
self.initiate_send()
2014-07-07 19:16:54 -03:00
def readable(self):
2001-01-14 14:09:23 -04:00
"predicate for inclusion in the readable for select()"
# cannot use the old predicate, it violates the claim of the
# set_terminator method.
# return (len(self.ac_in_buffer) <= self.ac_in_buffer_size)
return 1
2001-01-14 14:09:23 -04:00
2014-07-07 19:16:54 -03:00
def writable(self):
2001-01-14 14:09:23 -04:00
"predicate for inclusion in the writable for select()"
return self.producer_fifo or (not self.connected)
2001-01-14 14:09:23 -04:00
2014-07-07 19:16:54 -03:00
def close_when_done(self):
2001-01-14 14:09:23 -04:00
"automatically close this channel once the outgoing queue is empty"
self.producer_fifo.append(None)
def initiate_send(self):
while self.producer_fifo and self.connected:
first = self.producer_fifo[0]
# handle empty string/buffer or None entry
if not first:
del self.producer_fifo[0]
if first is None:
self.handle_close()
2001-01-14 14:09:23 -04:00
return
# handle classic producer behavior
obs = self.ac_out_buffer_size
try:
data = first[:obs]
except TypeError:
data = first.more()
2001-01-14 14:09:23 -04:00
if data:
self.producer_fifo.appendleft(data)
2001-01-14 14:09:23 -04:00
else:
del self.producer_fifo[0]
continue
2001-01-14 14:09:23 -04:00
if isinstance(data, str) and self.use_encoding:
data = bytes(data, self.encoding)
2001-01-14 14:09:23 -04:00
# send the data
2001-01-14 14:09:23 -04:00
try:
num_sent = self.send(data)
except OSError:
2001-01-14 14:09:23 -04:00
self.handle_error()
return
if num_sent:
if num_sent < len(data) or obs < len(first):
self.producer_fifo[0] = first[num_sent:]
else:
del self.producer_fifo[0]
# we tried to send some actual data
return
2014-07-07 19:16:54 -03:00
def discard_buffers(self):
2001-01-14 14:09:23 -04:00
# Emergencies only!
self.ac_in_buffer = b''
del self.incoming[:]
self.producer_fifo.clear()
2014-07-07 19:16:54 -03:00
class simple_producer:
1999-06-08 10:20:05 -03:00
2014-07-07 19:16:54 -03:00
def __init__(self, data, buffer_size=512):
2001-01-14 14:09:23 -04:00
self.data = data
self.buffer_size = buffer_size
2014-07-07 19:16:54 -03:00
def more(self):
if len(self.data) > self.buffer_size:
2001-01-14 14:09:23 -04:00
result = self.data[:self.buffer_size]
self.data = self.data[self.buffer_size:]
return result
else:
result = self.data
self.data = b''
2001-01-14 14:09:23 -04:00
return result
2014-07-07 19:16:54 -03:00
# Given 'haystack', see if any prefix of 'needle' is at its end. This
# assumes an exact match has already been checked. Return the number of
# characters matched.
# for example:
2014-07-07 19:16:54 -03:00
# f_p_a_e("qwerty\r", "\r\n") => 1
# f_p_a_e("qwertydkjf", "\r\n") => 0
# f_p_a_e("qwerty\r\n", "\r\n") => <undefined>
# this could maybe be made faster with a computed regex?
# [answer: no; circa Python-2.0, Jan 2001]
# new python: 28961/s
# old python: 18307/s
# re: 12820/s
# regex: 14035/s
2014-07-07 19:16:54 -03:00
def find_prefix_at_end(haystack, needle):
2002-04-15 22:38:40 -03:00
l = len(needle) - 1
while l and not haystack.endswith(needle[:l]):
l -= 1
return l