From 8c071d4388efa50e1f65a9f5a726b8ff9ca384ad Mon Sep 17 00:00:00 2001
From: Fred Drake <fdrake@acm.org>
Date: Fri, 26 Jan 2001 20:48:35 +0000
Subject: [PATCH] Massively improved documentation for string formatting
 operations, primarily from Evelyn Mitchell (thanks!).

This closes SF patch #103412.
---
 Doc/lib/libstdtypes.tex | 145 +++++++++++++++++++++++++++++-----------
 1 file changed, 107 insertions(+), 38 deletions(-)

diff --git a/Doc/lib/libstdtypes.tex b/Doc/lib/libstdtypes.tex
index 7d65806d268..df53ec3a4a0 100644
--- a/Doc/lib/libstdtypes.tex
+++ b/Doc/lib/libstdtypes.tex
@@ -62,10 +62,13 @@ These are the Boolean operations, ordered by ascending priority:
 \indexii{Boolean}{operations}
 
 \begin{tableiii}{c|l|c}{code}{Operation}{Result}{Notes}
-  \lineiii{\var{x} or \var{y}}{if \var{x} is false, then \var{y}, else \var{x}}{(1)}
-  \lineiii{\var{x} and \var{y}}{if \var{x} is false, then \var{x}, else \var{y}}{(1)}
+  \lineiii{\var{x} or \var{y}}
+          {if \var{x} is false, then \var{y}, else \var{x}}{(1)}
+  \lineiii{\var{x} and \var{y}}
+          {if \var{x} is false, then \var{x}, else \var{y}}{(1)}
   \hline
-  \lineiii{not \var{x}}{if \var{x} is false, then \code{1}, else \code{0}}{(2)}
+  \lineiii{not \var{x}}
+          {if \var{x} is false, then \code{1}, else \code{0}}{(2)}
 \end{tableiii}
 \opindex{and}
 \opindex{or}
@@ -591,45 +594,51 @@ Return a copy of the string converted to uppercase.
 \index{printf-style formatting}
 \index{sprintf-style formatting}
 
-String objects have one unique built-in operation: the \code{\%}
-operator (modulo) with a string left argument interprets this string
-as a C \cfunction{sprintf()} format string to be applied to the
-right argument, and returns the string resulting from this formatting
-operation.
+String and Unicode objects have one unique built-in operation: the
+\code{\%} operator (modulo).  Given \code{\var{format} \%
+\var{values}} (where \var{format} is a string or Unicode object),
+\code{\%} conversion specifications in \var{format} are replaced with
+zero or more elements of \var{values}.  The effect is similar to the
+using \cfunction{sprintf()} in the C language.  If \var{format} is a
+Unicode object, or if any of the objects being converted using the
+\code{\%s} conversion are Unicode objects, the result will be a
+Unicode object as well.
 
-The right argument should be a tuple with one item for each argument
-required by the format string; if the string requires a single
-argument, the right argument may also be a single non-tuple
-object.\footnote{A tuple object in this case should be a singleton.
-}  The following format characters are understood: \code{\%},
-\code{c}, \code{r}, \code{s}, \code{i}, \code{d}, \code{u}, \code{o},
-\code{x}, \code{X}, \code{e}, \code{E}, \code{f}, \code{g}, \code{G}.
-Width and precision may be a \code{*} to specify that an integer argument
-specifies the actual width or precision.  The flag characters
-\code{-}, \code{+}, blank, \code{\#} and \code{0} are understood.  The 
-size specifiers \code{h}, \code{l} or \code{L} may be present but are
-ignored.  The \code{\%s} conversion takes any Python object and
-converts it to a string using \code{str()} before formatting it; the
-\code{\%r} conversion is similar but applies the \function{repr()}
-function instead.  The
-ANSI features \code{\%p} and \code{\%n} are not supported.  Since
-Python strings have an explicit length, \code{\%s} conversions don't
-assume that \code{'\e0'} is the end of the string.
+If \var{format} requires a single argument, \var{values} may be a
+single non-tuple object. \footnote{A tuple object in this case should
+  be a singleton.}  Otherwise, \var{values} must be a tuple with
+exactly the number of items specified by the format string, or a
+single mapping object (for example, a dictionary).
 
-For safety reasons, floating point precisions are clipped to 50;
-\code{\%f} conversions for numbers whose absolute value is over 1e25
-are replaced by \code{\%g} conversions.\footnote{
-  These numbers are fairly arbitrary.  They are intended to
-  avoid printing endless strings of meaningless digits without hampering
-  correct use and without having to know the exact precision of floating
-  point values on a particular machine.
-}  All other errors raise exceptions.
+A conversion specifier contains two or more characters and has the
+following components, which must occur in this order:
+
+\begin{enumerate}
+  \item  The \character{\%} character, which marks the start of the
+         specifier.
+  \item  Mapping key value (optional), consisting of an identifier in
+         parentheses (for example, \code{(somename)}).
+  \item  Conversion flags (optional), which affect the result of some
+         conversion types.
+  \item  Minimum field width (optional).  If specified as an
+         \character{*} (asterisk), the actual width is read from the
+         next element of the tuple in \var{values}, and the object to
+         convert comes after the minimum field width and optional
+         precision.
+  \item  Precision (optional), given as a \character{.} (dot) followed
+         by the precision.  If specified as \character{*} (an
+         asterisk), the actual width is read from the next element of
+         the tuple in \var{values}, and the value to convert comes after
+         the precision.
+  \item  Length modifier (optional).
+  \item  Conversion type.
+\end{enumerate}
 
 If the right argument is a dictionary (or any kind of mapping), then
-the formats in the string must have a parenthesized key into that
-dictionary inserted immediately after the \character{\%} character,
-and each format formats the corresponding entry from the mapping.
-For example:
+the formats in the string \emph{must} have a parenthesized key into
+that dictionary inserted immediately after the \character{\%}
+character, and each format formats the corresponding entry from the
+mapping.  For example:
 
 \begin{verbatim}
 >>> count = 2
@@ -641,6 +650,66 @@ Python has 002 quote types.
 In this case no \code{*} specifiers may occur in a format (since they
 require a sequential parameter list).
 
+The conversion flag characters are:
+
+\begin{tableii}{c|l}{character}{Flag}{Meaning}
+  \lineii{\#}{The value conversion will use the ``alternate form''
+              (where defined below).}
+  \lineii{0}{The conversion will be zero padded.}
+  \lineii{-}{The converted value is left adjusted (overrides
+             \character{-}).}
+  \lineii{{~}}{(a space) A blank should be left before a positive number
+             (or empty string) produced by a signed conversion.}
+  \lineii{+}{A sign character (\character{+} or \character{-}) will
+             precede the conversion (overrides a "space" flag).}
+\end{tableii}
+
+The length modifier may be \code{h}, \code{l}, and \code{L} may be
+present, but are ignored as they are not necessary for Python.
+
+The conversion types are:
+
+\begin{tableii}{c|l}{character}{Conversion}{Meaning}
+  \lineii{d}{Signed integer decimal.}
+  \lineii{i}{Signed integer decimal.}
+  \lineii{o}{Unsigned octal.}
+  \lineii{u}{Unsigned decimal.}
+  \lineii{x}{Unsigned hexidecimal (lowercase).}
+  \lineii{X}{Unsigned hexidecimal (uppercase).}
+  \lineii{e}{Floating point exponential format (lowercase).}
+  \lineii{E}{Floating point exponential format (uppercase).}
+  \lineii{f}{Floating point decimal format.}
+  \lineii{F}{Floating point decimal format.}
+  \lineii{g}{Same as \character{e} if exponent is greater than -4 or
+             less than precision, \character{f} otherwise.}
+  \lineii{G}{Same as \character{E} if exponent is greater than -4 or
+             less than precision, \character{F} otherwise.}
+  \lineii{c}{Single character (accepts integer or single character
+             string).}
+  \lineii{r}{String (converts any python object using
+             \function{repr()}).}
+  \lineii{s}{String (converts any python object using
+             \function{str()}).}
+  \lineii{\%}{No argument is converted, results in a \character{\%}
+              character in the result.  (The complete specification is
+              \code{\%\%}.)}
+\end{tableii}
+
+% XXX Examples?
+
+
+Since Python strings have an explicit length, \code{\%s} conversions
+do not assume that \code{'\e0'} is the end of the string.
+
+For safety reasons, floating point precisions are clipped to 50;
+\code{\%f} conversions for numbers whose absolute value is over 1e25
+are replaced by \code{\%g} conversions.\footnote{
+  These numbers are fairly arbitrary.  They are intended to
+  avoid printing endless strings of meaningless digits without hampering
+  correct use and without having to know the exact precision of floating
+  point values on a particular machine.
+}  All other errors raise exceptions.
+
 Additional string operations are defined in standard module
 \refmodule{string} and in built-in module \refmodule{re}.
 \refstmodindex{string}