From b1af86a1d7ada842ede1d5ea02a9a04f3939b8ca Mon Sep 17 00:00:00 2001 From: Steve Holden Date: Wed, 3 Jul 2002 18:36:39 +0000 Subject: [PATCH] Revise asyncore documentation and document asynchat for the first time. --- Doc/Makefile.deps | 1 + Doc/lib/lib.tex | 1 + Doc/lib/libasynchat.tex | 254 ++++++++++++++++++++++++++++++++++++++++ Doc/lib/libasyncore.tex | 146 ++++++++++++++--------- 4 files changed, 347 insertions(+), 55 deletions(-) create mode 100644 Doc/lib/libasynchat.tex diff --git a/Doc/Makefile.deps b/Doc/Makefile.deps index 29a6bc10e77..7693c21e40f 100644 --- a/Doc/Makefile.deps +++ b/Doc/Makefile.deps @@ -315,6 +315,7 @@ LIBFILES= $(MANSTYLES) $(INDEXSTYLES) $(COMMONTEX) \ lib/libstatvfs.tex \ lib/libtty.tex \ lib/libasyncore.tex \ + lib/libasynchat.tex \ lib/libatexit.tex \ lib/libmmap.tex \ lib/tkinter.tex \ diff --git a/Doc/lib/lib.tex b/Doc/lib/lib.tex index 70b1c937159..d87a1cfbfbd 100644 --- a/Doc/lib/lib.tex +++ b/Doc/lib/lib.tex @@ -217,6 +217,7 @@ and how to embed it in other applications. \input{libxmlrpclib} \input{libsimplexmlrpc} \input{libasyncore} +\input{libasynchat} \input{netdata} % Internet Data Handling \input{libformatter} diff --git a/Doc/lib/libasynchat.tex b/Doc/lib/libasynchat.tex new file mode 100644 index 00000000000..eea4f181a55 --- /dev/null +++ b/Doc/lib/libasynchat.tex @@ -0,0 +1,254 @@ +\section{\module{asynchat} --- + Asynchronous socket command/response handler} + +\declaremodule{standard}{asynchat} +\modulesynopsis{Support for asynchronous command/response protocols.} +\moduleauthor{Sam Rushing}{rushing@nightmare.com} +\sectionauthor{Steve Holden}{sholden@holdenweb.com} + +This module builds on the \refmodule{asyncore} infrastructure, +simplifying asynchronous clients and servers and making it easier to +handle protocols whose elements are terminated by arbitrary strings, or +are of variable length. \refmodule{asynchat} defines the abstract class +\class{async_chat} that you subclass, providing implementations of the +\method{collect_incoming_data()} and \method{found_terminator()} +methods. It uses the same asynchronous loop as \refmodule{asyncore}, and +the two types of channel, \class{asyncore.despatcher} and +\class{asynchat.async_chat}, can freely be mixed in the channel map. +Typically an \class{asyncore.despatcher} server channel generates new +\class{asynchat.async_chat} channel objects as it receives incoming +connection requests. + +\begin{classdesc}{async_chat}{} + This class is an abstract subclass of \class{asyncore.despatcher}. To make + practical use of the code you must subclass \class{async_chat}, providing + meaningful \method{collect_incoming_data()} and \method{found_terminator()} + methods. The \class{asyncore.despatcher} methods can be + used, although not all make sense in a message/response context. + + Like \class{asyncore.despatcher}, \class{async_chat} defines a set of events + that are generated by an analysis of socket conditions after a + \cfunction{select()} call. Once the polling loop has been started the + \class{async_chat} object's methods are called by the event-processing + framework with no action on the part of the programmer. + + Unlike \class{asyncore.despatcher}, \class{async_chat} allows you to define + a first-in-first-out queue (fifo) of \emph{producers}. A producer need have + only one method, \method{more()}, which should return data to be transmitted + on the channel. The producer indicates exhaustion (\emph{i.e.} that it contains + no more data) by having its \method{more()} method return the empty string. At + this point the \class{async_chat} object removes the producer from the fifo + and starts using the next producer, if any. When the producer fifo is empty + the \method{handle_write()} method does nothing. You use the channel object's + \method{set_terminator()} method to describe how to recognize the end + of, or an important breakpoint in, an incoming transmission from the + remote endpoint. + + To build a functioning \class{async_chat} subclass your + input methods \method{collect_incoming_data()} and + \method{found_terminator()} must handle the data that the channel receives + asynchronously. The methods are described below. +\end{classdesc} + +\begin{methoddesc}{close_when_done}{} + Pushes a \code{None} on to the producer fifo. When this producer is + popped off the fifo it causes the channel to be closed. +\end{methoddesc} + +\begin{methoddesc}{collect_incoming_data}{data} + Called with \var{data} holding an arbitrary amount of received data. + The default method, which must be overridden, raises a \exception{NotImplementedError} exception. +\end{methoddesc} + +\begin{methoddesc}{discard_buffers}{} + In emergencies this method will discard any data held in the input and/or + output buffers and the producer fifo. +\end{methoddesc} + +\begin{methoddesc}{found_terminator}{} + Called when the incoming data stream matches the termination condition + set by \method{set_terminator}. The default method, which must be overridden, + raises a \exception{NotImplementedError} exception. The buffered input data should + be available via an instance attribute. +\end{methoddesc} + +\begin{methoddesc}{get_terminator}{} + Returns the current terminator for the channel. +\end{methoddesc} + +\begin{methoddesc}{handle_close}{} + Called when the channel is closed. The default method silently closes + the channel's socket. +\end{methoddesc} + +\begin{methoddesc}{handle_read}{} + Called when a read event fires on the channel's socket in the + asynchronous loop. The default method checks for the termination + condition established by \method{set_terminator()}, which can be either + the appearance of a particular string in the input stream or the receipt + of a particular number of characters. When the terminator is found, + \method{handle_read} calls the \method{found_terminator()} method after + calling \method{collect_incoming_data()} with any data preceding the + terminating condition. +\end{methoddesc} + +\begin{methoddesc}{handle_write}{} + Called when the application may write data to the channel. + The default method calls the \method{initiate_send()} method, which in turn + will call \method{refill_buffer()} to collect data from the producer + fifo associated with the channel. +\end{methoddesc} + +\begin{methoddesc}{push}{data} + Creates a \class{simple_producer} object (\emph{see below}) containing the data and + pushes it on to the channel's \code{producer_fifo} to ensure its + transmission. This is all you need to do to have the channel write + the data out to the network, although it is possible to use your + own producers in more complex schemes to implement encryption and + chunking, for example. +\end{methoddesc} + +\begin{methoddesc}{push_with_producer}{producer} + Takes a producer object and adds it to the producer fifo associated with + the channel. When all currently-pushed producers have been exhausted + the channel will consume this producer's data by calling its + \method{more()} method and send the data to the remote endpoint. +\end{methoddesc} + +\begin{methoddesc}{readable}{} + Should return \code{True} for the channel to be included in the set of + channels tested by the \cfunction{select()} loop for readability. +\end{methoddesc} + +\begin{methoddesc}{refill_buffer}{} + Refills the output buffer by calling the \method{more()} method of the + producer at the head of the fifo. If it is exhausted then the + producer is popped off the fifo and the next producer is activated. + If the current producer is, or becomes, \code{None} then the channel + is closed. +\end{methoddesc} + +\begin{methoddesc}{set_terminator}{term} + Sets the terminating condition to be recognised on the channel. \code{term} + may be any of three types of value, corresponding to three different ways + to handle incoming protocol data. + + \begin{tableii}{l|l}{}{term}{Description} + \lineii{\emph{string}}{Will call \method{found_terminator()} when the + string is found in the input stream} + \lineii{\emph{integer}}{Will call \method{found_terminator()} when the + indicated number of characters have been received} + \lineii{\code{None}}{The channel continues to collect data forever} + \end{tableii} + + Note that any data following the terminator will be available for reading by + the channel after \method{found_terminator()} is called. +\end{methoddesc} + +\begin{methoddesc}{writable}{} + Should return \code{True} as long as items remain on the producer fifo, + or the channel is connected and the channel's output buffer is non-empty. +\end{methoddesc} + +\subsection{asynchat - Auxiliary Classes and Functions} + +\begin{classdesc}{simple_producer}{data\optional{, buffer_size=512}} + A \class{simple_producer} takes a chunk of data and an optional buffer size. + Repeated calls to its \method{more()} method yield successive chunks of the + data no larger than \var{buffer_size}. +\end{classdesc} + +\begin{methoddesc}{more}{} + Produces the next chunk of information from the producer, or returns the empty string. +\end{methoddesc} + +\begin{classdesc}{fifo}{\optional{list=None}} + Each channel maintains a \class{fifo} holding data which has been pushed by the + application but not yet popped for writing to the channel. + A \class{fifo} is a list used to hold data and/or producers until they are required. + If the \var{list} argument is provided then it should contain producers or + data items to be written to the channel. +\end{classdesc} + +\begin{methoddesc}{is_empty}{} + Returns \code{True} iff the fifo is empty. +\end{methoddesc} + +\begin{methoddesc}{first}{} + Returns the least-recently \method{push()}ed item from the fifo. +\end{methoddesc} + +\begin{methoddesc}{push}{data} + Adds the given data (which may be a string or a producer object) to the + producer fifo. +\end{methoddesc} + +\begin{methoddesc}{pop}{} + If the fifo is not empty, returns \code{True, first()}, deleting the popped + item. Returns \code{False, None} for an empty fifo. +\end{methoddesc} + +The \module{asynchat} module also defines one utility function, which may be +of use in network and textual analysis operations. + +\begin{funcdesc}{find_prefix_at_end}{haystack, needle} + Returns \code{True} if string \var{haystack} ends with any non-empty + prefix of string \var{needle}. +\end{funcdesc} + +\subsection{asynchat Example \label{asynchat-example}} + +The following partial example shows how HTTP requests can be read with +\class{async_chat}. A web server might create an \class{http_request_handler} object for +each incoming client connection. Notice that initially the +channel terminator is set to match the blank line at the end of the HTTP +headers, and a flag indicates that the headers are being read. + +Once the headers have been read, if the request is of type POST +(indicating that further data are present in the input stream) then the +\code{Content-Length:} header is used to set a numeric terminator to +read the right amount of data from the channel. + +The \method{handle_request()} method is called once all relevant input +has been marshalled, after setting the channel terminator to \code{None} +to ensure that any extraneous data sent by the web client are ignored. + +\begin{verbatim} +class http_request_handler(asynchat.async_chat): + + def __init__(self, conn, addr, sessions, log): + asynchat.async_chat.__init__(self, conn=conn) + self.addr = addr + self.sessions = sessions + self.ibuffer = [] + self.obuffer = "" + self.set_terminator("\r\n\r\n") + self.reading_headers = True + self.handling = False + self.cgi_data = None + self.log = log + + def collect_incoming_data(self, data): + """Buffer the data""" + self.ibuffer.append(data) + + def found_terminator(self): + if self.reading_headers: + self.reading_headers = False + self.parse_headers("".join(self.ibuffer) + self.ibuffer = [] + if self.op.upper() == "POST": + clen = self.headers.getheader("content-length") + self.set_terminator(int(clen)) + else: + self.handling = True + self.set_terminator(None) + self.handle_request() + elif not self.handling: + self.set_terminator(None) # browsers sometimes over-send + self.cgi_data = parse(self.headers, "".join(self.ibuffer)) + self.handling = True + self.ibuffer = [] + self.handle_request() +\end{verbatim} + diff --git a/Doc/lib/libasyncore.tex b/Doc/lib/libasyncore.tex index a85998e62f7..be192951515 100644 --- a/Doc/lib/libasyncore.tex +++ b/Doc/lib/libasyncore.tex @@ -6,6 +6,7 @@ handling services.} \moduleauthor{Sam Rushing}{rushing@nightmare.com} \sectionauthor{Christopher Petrilli}{petrilli@amber.org} +\sectionauthor{Steve Holden}{sholden@holdenweb.com} % Heavily adapted from original documentation by Sam Rushing. This module provides the basic infrastructure for writing asynchronous @@ -26,35 +27,21 @@ multiple communication channels at once; doing other work while your I/O is taking place in the ``background.'' Although this strategy can seem strange and complex, especially at first, it is in many ways easier to understand and control than multi-threaded programming. -The module documented here solves many of the difficult problems for +The \module{asyncore} module solves many of the difficult problems for you, making the task of building sophisticated high-performance -network servers and clients a snap. +network servers and clients a snap. For ``conversational'' applications +and protocols the companion \refmodule{asynchat} module is invaluable. -\begin{classdesc}{dispatcher}{} - The first class we will introduce is the \class{dispatcher} class. - This is a thin wrapper around a low-level socket object. To make - it more useful, it has a few methods for event-handling on it. - Otherwise, it can be treated as a normal non-blocking socket object. +The basic idea behind both modules is to create one or more network +\emph{channels}, instances of class \class{asyncore.dispatcher} and +\class{asynchat.async_chat}. Creating the channels adds them to a global +map, used by the \function{loop()} function if you do not provide it +with your own \var{map}. - The direct interface between the select loop and the socket object - are the \method{handle_read_event()} and - \method{handle_write_event()} methods. These are called whenever an - object `fires' that event. - - The firing of these low-level events can tell us whether certain - higher-level events have taken place, depending on the timing and - the state of the connection. For example, if we have asked for a - socket to connect to another host, we know that the connection has - been made when the socket fires a write event (at this point you - know that you may write to it with the expectation of success). - The implied higher-level events are: - - \begin{tableii}{l|l}{code}{Event}{Description} - \lineii{handle_connect()}{Implied by a write event} - \lineii{handle_close()}{Implied by a read event with no data available} - \lineii{handle_accept()}{Implied by a read event on a listening socket} - \end{tableii} -\end{classdesc} +Once the initial channel(s) is(are) created, calling the \function{loop()} +function activates channel service, which continues until the last +channel (including any that have been added to the map during asynchronous +service) is closed. \begin{funcdesc}{loop}{\optional{timeout\optional{, use_poll\optional{, map}}}} @@ -64,21 +51,67 @@ network servers and clients a snap. \function{select()} or \function{poll()} call, measured in seconds; the default is 30 seconds. The \var{use_poll} parameter, if true, indicates that \function{poll()} should be used in preference to - \function{select()} (the default is false). The \var{map} parameter - is a dictionary that gives a list of channels to watch. As channels + \function{select()} (the default is \code{False}). The \var{map} parameter + is a dictionary whose items are the channels to watch. As channels are closed they are deleted from their map. If \var{map} is - omitted, a global map is used. + omitted, a global map is used (this map is updated by the default + class \method{__init__()} + -- make sure you extend, rather than override, \method{__init__()} + if you want to retain this behavior). + + Channels (instances of \class{asyncore.despatcher}, \class{asynchat.async_chat} + and subclasses thereof) can freely be mixed in the map. \end{funcdesc} -This set of user-level events is larger than the basics. The -full set of methods that can be overridden in your subclass are: +\begin{classdesc}{dispatcher}{} + The \class{dispatcher} class is a thin wrapper around a low-level socket object. + To make it more useful, it has a few methods for event-handling which are called + from the asynchronous loop. + Otherwise, it can be treated as a normal non-blocking socket object. + + Two class attributes can be modified, to improve performance, + or possibly even to conserve memory. + + \begin{datadesc}{ac_in_buffer_size} + The asynchronous input buffer size (default \code{4096}). + \end{datadesc} + + \begin{datadesc}{ac_out_buffer_size} + The asynchronous output buffer size (default \code{4096}). + \end{datadesc} + + The firing of low-level events at certain times or in certain connection + states tells the asynchronous loop that certain higher-level events have + taken place. For example, if we have asked for a socket to connect to + another host, we know that the connection has been made when the socket + becomes writable for the first time (at this point you know that you may + write to it with the expectation of success). The implied higher-level + events are: + + \begin{tableii}{l|l}{code}{Event}{Description} + \lineii{handle_connect()}{Implied by the first write event} + \lineii{handle_close()}{Implied by a read event with no data available} + \lineii{handle_accept()}{Implied by a read event on a listening socket} + \end{tableii} + + During asynchronous processing, each mapped channel's \method{readable()} + and \method{writable()} methods are used to determine whether the channel's + socket should be added to the list of channels \cfunction{select()}ed or + \cfunction{poll()}ed for read and write events. + +\end{classdesc} + +Thus, the set of channel events is larger than the basic socket events. +The full set of methods that can be overridden in your subclass follows: \begin{methoddesc}{handle_read}{} - Called when there is new data to be read from a socket. + Called when the asynchronous loop detects that a \method{read()} + call on the channel's socket will succeed. \end{methoddesc} \begin{methoddesc}{handle_write}{} - Called when there is an attempt to write data to the object. + Called when the asynchronous loop detects that a writable socket + can be written. Often this method will implement the necessary buffering for performance. For example: @@ -96,9 +129,9 @@ def handle_write(self): \end{methoddesc} \begin{methoddesc}{handle_connect}{} - Called when the socket actually makes a connection. This - might be used to send a ``welcome'' banner, or something - similar. + Called when the active opener's socket actually makes a connection. + Might send a ``welcome'' banner, or initiate a protocol + negotiation with the remote endpoint, for example. \end{methoddesc} \begin{methoddesc}{handle_close}{} @@ -111,28 +144,29 @@ def handle_write(self): \end{methoddesc} \begin{methoddesc}{handle_accept}{} - Called on listening sockets when they actually accept a new - connection. + Called on listening channels (passive openers) when a + connection can be established with a new remote endpoint that + has issued a \method{connect()} call for the local endpoint. \end{methoddesc} \begin{methoddesc}{readable}{} - Each time through the \method{select()} loop, the set of sockets - is scanned, and this method is called to see if there is any - interest in reading. The default method simply returns \code{True}, - indicating that by default, all channels will be interested. + Called each time around the asynchronous loop to determine whether a + channel's socket should be added to the list on which read events can + occur. The default method simply returns \code{True}, + indicating that by default, all channels will be interested in + read events. \end{methoddesc} \begin{methoddesc}{writable}{} - Each time through the \method{select()} loop, the set of sockets - is scanned, and this method is called to see if there is any - interest in writing. The default method simply returns \code{True}, - indicating that by default, all channels will be interested. + Called each time around the asynchronous loop to determine whether a + channel's socket should be added to the list on which write events can + occur. The default method simply returns \code{True}, + indicating that by default, all channels will be interested in + write events. \end{methoddesc} -In addition, there are the basic methods needed to construct and -manipulate ``channels,'' which are what we will call the socket -connections in this context. Note that most of these are nearly -identical to their socket partners. +In addition, each channel delegates or extends many of the socket methods. +Most of these are nearly identical to their socket partners. \begin{methoddesc}{create_socket}{family, type} This is identical to the creation of a normal socket, and @@ -144,15 +178,17 @@ identical to their socket partners. \begin{methoddesc}{connect}{address} As with the normal socket object, \var{address} is a tuple with the first element the host to connect to, and the - second the port. + second the port number. \end{methoddesc} \begin{methoddesc}{send}{data} - Send \var{data} out the socket. + Send \var{data} to the remote end-point of the socket. \end{methoddesc} \begin{methoddesc}{recv}{buffer_size} - Read at most \var{buffer_size} bytes from the socket. + Read at most \var{buffer_size} bytes from the socket's remote end-point. + An empty string implies that the channel has been closed from the other + end. \end{methoddesc} \begin{methoddesc}{listen}{backlog} @@ -179,13 +215,13 @@ identical to their socket partners. \begin{methoddesc}{close}{} Close the socket. All future operations on the socket object - will fail. The remote end will receive no more data (after + will fail. The remote end-point will receive no more data (after queued data is flushed). Sockets are automatically closed when they are garbage-collected. \end{methoddesc} -\subsection{Example basic HTTP client \label{asyncore-example}} +\subsection{asyncore Example basic HTTP client \label{asyncore-example}} As a basic example, below is a very basic HTTP client that uses the \class{dispatcher} class to implement its socket handling: