Clean up tools: remove "world" and "framer", move single SSL script to scripts/.

This commit is contained in:
Georg Brandl 2010-12-30 17:32:22 +00:00
parent 4cf83f4d12
commit 5684a9ac30
15 changed files with 0 additions and 1559 deletions

View File

@ -1,8 +0,0 @@
framer is a tool to generate boilerplate code for C extension types.
The boilerplate is generated from a specification object written in
Python. The specification uses the class statement to describe the
extension module and any extension types it contains. From the
specification, framer can generate all the boilerplate C code,
including function definitions, argument handling code, and type
objects.

View File

@ -1,6 +0,0 @@
Add spec for getsets.
Generate a distutils setup script.
Handle operator overloading.
Generate traverse and clear methods for GC.
Handle mapping, sequence, buffer protocols.
Finish the todo list.

View File

@ -1,126 +0,0 @@
"""Generate the skeleton for cStringIO as an example of framer."""
from framer.bases import Module, Type
from framer.member import member
class cStringIO(Module):
"""A simple fast partial StringIO replacement.
This module provides a simple useful replacement for the StringIO
module that is written in C. It does not provide the full
generality of StringIO, but it provides enough for most
applications and is especially useful in conjunction with the
pickle module.
Usage:
from io import StringIO
an_output_stream = StringIO()
an_output_stream.write(some_stuff)
...
value = an_output_stream.getvalue()
an_input_stream = StringIO(a_string)
spam = an_input_stream.readline()
spam = an_input_stream.read(5)
an_input_stream.seek(0) # OK, start over
spam = an_input_stream.read() # and read it all
"""
__file__ = "cStringIO.c"
def StringIO(o):
"""Return a StringIO-like stream for reading or writing"""
StringIO.pyarg = "|O"
class InputType(Type):
"Simple type for treating strings as input file streams"
abbrev = "input"
struct = """\
typedef struct {
PyObject_HEAD
char *buf;
int pos;
int size;
PyObject *pbuf;
} InputObject;
"""
def flush(self):
"""Does nothing"""
def getvalue(self):
"""Get the string value.
If use_pos is specified and is a true value, then the
string returned will include only the text up to the
current file position.
"""
def isatty(self):
"""Always returns False"""
def read(self, s):
"""Return s characters or the rest of the string."""
read.pyarg = "|i"
def readline(self):
"""Read one line."""
def readlines(self, hint):
"""Read all lines."""
readlines.pyarg = "|i"
def reset(self):
"""Reset the file position to the beginning."""
def tell(self):
"""Get the current position."""
def truncate(self, pos):
"""Truncate the file at the current position."""
truncate.pyarg = "|i"
def seek(self, position, mode=0):
"""Set the current position.
The optional mode argument can be 0 for absolute, 1 for relative,
and 2 for relative to EOF. The default is absolute.
"""
seek.pyarg = "i|i"
def close(self):
pass
class OutputType(InputType):
"Simple type for output strings."
abbrev = "output"
struct = """\
typedef struct {
PyObject_HEAD
char *buf;
int pos;
int size;
int softspace;
} OutputObject;
"""
softspace = member()
def close(self):
"""Explicitly release resources."""
def write(self, s):
"""Write a string to the file."""
# XXX Hack: writing None resets the buffer
def writelines(self, lines):
"""Write each string in lines."""
cStringIO.gen()

View File

@ -1,6 +0,0 @@
"""A tool to generate basic framework for C extension types.
The basic ideas is the same as modulator, but the code generates code
using many of the new features introduced in Python 2.2. It also
takes a more declarative approach to generating code.
"""

View File

@ -1,215 +0,0 @@
"""Provides the Module and Type base classes that user code inherits from."""
__all__ = ["Module", "Type", "member"]
from framer import struct, template
from framer.function import Function, Method
from framer.member import member
from framer.slots import *
from framer.util import cstring, unindent
from types import FunctionType
# The Module and Type classes are implemented using metaclasses,
# because most of the methods are class methods. It is easier to use
# metaclasses than the cumbersome classmethod() builtin. They have
# class methods because they are exposed to user code as base classes.
class BaseMetaclass(type):
"""Shared infrastructure for generating modules and types."""
# just methoddef so far
def dump_methoddef(self, f, functions, vars):
def p(templ, vars=vars): # helper function to generate output
print(templ % vars, file=f)
if not functions:
return
p(template.methoddef_start)
for name, func in sorted(functions.items()):
if func.__doc__:
p(template.methoddef_def_doc, func.vars)
else:
p(template.methoddef_def, func.vars)
p(template.methoddef_end)
class ModuleMetaclass(BaseMetaclass):
"""Provides methods for Module class."""
def gen(self):
self.analyze()
self.initvars()
f = open(self.__filename, "w")
self.dump(f)
f.close()
def analyze(self):
self.name = getattr(self, "abbrev", self.__name__)
self.__functions = {}
self.__types = {}
self.__members = False
for name, obj in self.__dict__.items():
if isinstance(obj, FunctionType):
self.__functions[name] = Function(obj, self)
elif isinstance(obj, TypeMetaclass):
obj._TypeMetaclass__module = self.name
obj.analyze()
self.__types[name] = obj
if obj.has_members():
self.__members = True
def initvars(self):
v = self.__vars = {}
filename = getattr(self, "__file__", None)
if filename is None:
filename = self.__name__ + "module.c"
self.__filename = v["FileName"] = filename
name = v["ModuleName"] = self.__name__
v["MethodDefName"] = "%s_methods" % name
v["ModuleDocstring"] = cstring(unindent(self.__doc__))
def dump(self, f):
def p(templ, vars=self.__vars): # helper function to generate output
print(templ % vars, file=f)
p(template.module_start)
if self.__members:
p(template.member_include)
print(file=f)
if self.__doc__:
p(template.module_doc)
for name, type in sorted(self.__types.items()):
type.dump(f)
for name, func in sorted(self.__functions.items()):
func.dump(f)
self.dump_methoddef(f, self.__functions, self.__vars)
p(template.module_init_start)
for name, type in sorted(self.__types.items()):
type.dump_init(f)
p("}")
class Module(metaclass=ModuleMetaclass):
pass
class TypeMetaclass(BaseMetaclass):
def dump(self, f):
self.initvars()
# defined after initvars() so that __vars is defined
def p(templ, vars=self.__vars):
print(templ % vars, file=f)
if self.struct is not None:
print(unindent(self.struct, False), file=f)
if self.__doc__:
p(template.docstring)
for name, func in sorted(self.__methods.items()):
func.dump(f)
self.dump_methoddef(f, self.__methods, self.__vars)
self.dump_memberdef(f)
self.dump_slots(f)
def has_members(self):
if self.__members:
return True
else:
return False
def analyze(self):
# called by ModuleMetaclass analyze()
self.name = getattr(self, "abbrev", self.__name__)
src = getattr(self, "struct", None)
if src is not None:
self.__struct = struct.parse(src)
else:
self.__struct = None
self.__methods = {}
self.__members = {}
for cls in self.__mro__:
for k, v in cls.__dict__.items():
if isinstance(v, FunctionType):
self.__methods[k] = Method(v, self)
if isinstance(v, member):
self.__members[k] = v
assert self.__struct is not None
v.register(k, self.__struct)
self.analyze_slots()
def analyze_slots(self):
self.__slots = {}
for s in Slots:
if s.special is not None:
meth = self.__methods.get(s.special)
if meth is not None:
self.__slots[s] = meth
self.__slots[TP_NAME] = '"%s.%s"' % (self.__module, self.__name__)
if self.__doc__:
self.__slots[TP_DOC] = "%s_doc" % self.name
if self.__struct is not None:
self.__slots[TP_BASICSIZE] = "sizeof(%s)" % self.__struct.name
self.__slots[TP_DEALLOC] = "%s_dealloc" % self.name
if self.__methods:
self.__slots[TP_METHODS] = "%s_methods" % self.name
if self.__members:
self.__slots[TP_MEMBERS] = "%s_members" % self.name
def initvars(self):
v = self.__vars = {}
v["TypeName"] = self.__name__
v["CTypeName"] = "Py%s_Type" % self.__name__
v["MethodDefName"] = self.__slots[TP_METHODS]
if self.__doc__:
v["DocstringVar"] = self.__slots[TP_DOC]
v["Docstring"] = cstring(unindent(self.__doc__))
if self.__struct is not None:
v["StructName"] = self.__struct.name
if self.__members:
v["MemberDefName"] = self.__slots[TP_MEMBERS]
def dump_memberdef(self, f):
def p(templ, vars=self.__vars):
print(templ % vars, file=f)
if not self.__members:
return
p(template.memberdef_start)
for name, slot in sorted(self.__members.items()):
slot.dump(f)
p(template.memberdef_end)
def dump_slots(self, f):
def p(templ, vars=self.__vars):
print(templ % vars, file=f)
if self.struct:
p(template.dealloc_func, {"name" : self.__slots[TP_DEALLOC]})
p(template.type_struct_start)
for s in Slots[:-5]: # XXX
val = self.__slots.get(s, s.default)
ntabs = 4 - (4 + len(val)) // 8
line = " %s,%s/* %s */" % (val, "\t" * ntabs, s.name)
print(line, file=f)
p(template.type_struct_end)
def dump_init(self, f):
def p(templ):
print(templ % self.__vars, file=f)
p(template.type_init_type)
p(template.module_add_type)
class Type(metaclass=TypeMetaclass):
pass

View File

@ -1,173 +0,0 @@
"""Functions."""
from framer import template
from framer.util import cstring, unindent
METH_O = "METH_O"
METH_NOARGS = "METH_NOARGS"
METH_VARARGS = "METH_VARARGS"
def parsefmt(fmt):
for c in fmt:
if c == '|':
continue
yield c
class Argument:
def __init__(self, name):
self.name = name
self.ctype = "PyObject *"
self.default = None
def __str__(self):
return "%s%s" % (self.ctype, self.name)
def setfmt(self, code):
self.ctype = self._codes[code]
if self.ctype[-1] != "*":
self.ctype += " "
_codes = {"O": "PyObject *",
"i": "int",
}
def decl(self):
if self.default is None:
return str(self) + ";"
else:
return "%s = %s;" % (self, self.default)
class _ArgumentList(object):
# these instance variables should be initialized by subclasses
ml_meth = None
fmt = None
def __init__(self, args):
self.args = list(map(Argument, args))
def __len__(self):
return len(self.args)
def __getitem__(self, i):
return self.args[i]
def dump_decls(self, f):
pass
class NoArgs(_ArgumentList):
def __init__(self, args):
assert len(args) == 0
super(NoArgs, self).__init__(args)
self.ml_meth = METH_NOARGS
def c_args(self):
return "PyObject *self"
class OneArg(_ArgumentList):
def __init__(self, args):
assert len(args) == 1
super(OneArg, self).__init__(args)
self.ml_meth = METH_O
def c_args(self):
return "PyObject *self, %s" % self.args[0]
class VarArgs(_ArgumentList):
def __init__(self, args, fmt=None):
super(VarArgs, self).__init__(args)
self.ml_meth = METH_VARARGS
if fmt is not None:
self.fmt = fmt
i = 0
for code in parsefmt(fmt):
self.args[i].setfmt(code)
i += 1
def c_args(self):
return "PyObject *self, PyObject *args"
def targets(self):
return ", ".join(["&%s" % a.name for a in self.args])
def dump_decls(self, f):
for a in self.args:
print(" %s" % a.decl(), file=f)
def ArgumentList(func, method):
code = func.__code__
args = code.co_varnames[:code.co_argcount]
if method:
args = args[1:]
pyarg = getattr(func, "pyarg", None)
if pyarg is not None:
args = VarArgs(args, pyarg)
if func.__defaults__:
L = list(func.__defaults__)
ndefault = len(L)
i = len(args) - ndefault
while L:
args[i].default = L.pop(0)
return args
else:
if len(args) == 0:
return NoArgs(args)
elif len(args) == 1:
return OneArg(args)
else:
return VarArgs(args)
class Function:
method = False
def __init__(self, func, parent):
self._func = func
self._parent = parent
self.analyze()
self.initvars()
def dump(self, f):
def p(templ, vars=None): # helper function to generate output
if vars is None:
vars = self.vars
print(templ % vars, file=f)
if self.__doc__:
p(template.docstring)
d = {"name" : self.vars["CName"],
"args" : self.args.c_args(),
}
p(template.funcdef_start, d)
self.args.dump_decls(f)
if self.args.ml_meth == METH_VARARGS:
p(template.varargs)
p(template.funcdef_end)
def analyze(self):
self.__doc__ = self._func.__doc__
self.args = ArgumentList(self._func, self.method)
def initvars(self):
v = self.vars = {}
v["PythonName"] = self._func.__name__
s = v["CName"] = "%s_%s" % (self._parent.name, self._func.__name__)
v["DocstringVar"] = s + "_doc"
v["MethType"] = self.args.ml_meth
if self.__doc__:
v["Docstring"] = cstring(unindent(self.__doc__))
if self.args.fmt is not None:
v["ArgParse"] = self.args.fmt
v["ArgTargets"] = self.args.targets()
class Method(Function):
method = True

View File

@ -1,73 +0,0 @@
from framer import template
from framer.util import cstring, unindent
T_SHORT = "T_SHORT"
T_INT = "T_INT"
T_LONG = "T_LONG"
T_FLOAT = "T_FLOAT"
T_DOUBLE = "T_DOUBLE"
T_STRING = "T_STRING"
T_OBJECT = "T_OBJECT"
T_CHAR = "T_CHAR"
T_BYTE = "T_BYTE"
T_UBYTE = "T_UBYTE"
T_UINT = "T_UINT"
T_ULONG = "T_ULONG"
T_STRING_INPLACE = "T_STRING_INPLACE"
T_OBJECT_EX = "T_OBJECT_EX"
RO = READONLY = "READONLY"
READ_RESTRICTED = "READ_RESTRICTED"
WRITE_RESTRICTED = "WRITE_RESTRICTED"
RESTRICT = "RESTRICTED"
c2t = {"int" : T_INT,
"unsigned int" : T_UINT,
"long" : T_LONG,
"unsigned long" : T_LONG,
"float" : T_FLOAT,
"double" : T_DOUBLE,
"char *" : T_CHAR,
"PyObject *" : T_OBJECT,
}
class member(object):
def __init__(self, cname=None, type=None, flags=None, doc=None):
self.type = type
self.flags = flags
self.cname = cname
self.doc = doc
self.name = None
self.struct = None
def register(self, name, struct):
self.name = name
self.struct = struct
self.initvars()
def initvars(self):
v = self.vars = {}
v["PythonName"] = self.name
if self.cname is not None:
v["CName"] = self.cname
else:
v["CName"] = self.name
v["Flags"] = self.flags or "0"
v["Type"] = self.get_type()
if self.doc is not None:
v["Docstring"] = cstring(unindent(self.doc))
v["StructName"] = self.struct.name
def get_type(self):
"""Deduce type code from struct specification if not defined"""
if self.type is not None:
return self.type
ctype = self.struct.get_type(self.name)
return c2t[ctype]
def dump(self, f):
if self.doc is None:
print(template.memberdef_def % self.vars, file=f)
else:
print(template.memberdef_def_doc % self.vars, file=f)

View File

@ -1,64 +0,0 @@
"""Descriptions of all the slots in Python's type objects."""
class Slot(object):
def __init__(self, name, cast=None, special=None, default="0"):
self.name = name
self.cast = cast
self.special = special
self.default = default
Slots = (Slot("ob_size"),
Slot("tp_name"),
Slot("tp_basicsize"),
Slot("tp_itemsize"),
Slot("tp_dealloc", "destructor"),
Slot("tp_print", "printfunc"),
Slot("tp_getattr", "getattrfunc"),
Slot("tp_setattr", "setattrfunc"),
Slot("tp_reserved", "void*"),
Slot("tp_repr", "reprfunc", "__repr__"),
Slot("tp_as_number"),
Slot("tp_as_sequence"),
Slot("tp_as_mapping"),
Slot("tp_hash", "hashfunc", "__hash__"),
Slot("tp_call", "ternaryfunc", "__call__"),
Slot("tp_str", "reprfunc", "__str__"),
Slot("tp_getattro", "getattrofunc", "__getattr__", # XXX
"PyObject_GenericGetAttr"),
Slot("tp_setattro", "setattrofunc", "__setattr__"),
Slot("tp_as_buffer"),
Slot("tp_flags", default="Py_TPFLAGS_DEFAULT"),
Slot("tp_doc"),
Slot("tp_traverse", "traverseprox"),
Slot("tp_clear", "inquiry"),
Slot("tp_richcompare", "richcmpfunc"),
Slot("tp_weaklistoffset"),
Slot("tp_iter", "getiterfunc", "__iter__"),
Slot("tp_iternext", "iternextfunc", "__next__"), # XXX
Slot("tp_methods"),
Slot("tp_members"),
Slot("tp_getset"),
Slot("tp_base"),
Slot("tp_dict"),
Slot("tp_descr_get", "descrgetfunc"),
Slot("tp_descr_set", "descrsetfunc"),
Slot("tp_dictoffset"),
Slot("tp_init", "initproc", "__init__"),
Slot("tp_alloc", "allocfunc"),
Slot("tp_new", "newfunc"),
Slot("tp_free", "freefunc"),
Slot("tp_is_gc", "inquiry"),
Slot("tp_bases"),
Slot("tp_mro"),
Slot("tp_cache"),
Slot("tp_subclasses"),
Slot("tp_weaklist"),
)
# give some slots symbolic names
TP_NAME = Slots[1]
TP_BASICSIZE = Slots[2]
TP_DEALLOC = Slots[4]
TP_DOC = Slots[20]
TP_METHODS = Slots[27]
TP_MEMBERS = Slots[28]

View File

@ -1,52 +0,0 @@
"""Rudimentary parser for C struct definitions."""
import re
PyObject_HEAD = "PyObject_HEAD"
PyObject_VAR_HEAD = "PyObject_VAR_HEAD"
rx_name = re.compile("} (\w+);")
class Struct:
def __init__(self, name, head, members):
self.name = name
self.head = head
self.members = members
def get_type(self, name):
for _name, type in self.members:
if name == _name:
return type
raise ValueError("no member named %s" % name)
def parse(s):
"""Parse a C struct definition.
The parser is very restricted in what it will accept.
"""
lines = [_f for _f in s.split("\n") if _f] # get non-empty lines
assert lines[0].strip() == "typedef struct {"
pyhead = lines[1].strip()
assert (pyhead.startswith("PyObject") and
pyhead.endswith("HEAD"))
members = []
for line in lines[2:]:
line = line.strip()
if line.startswith("}"):
break
assert line.endswith(";")
line = line[:-1]
words = line.split()
name = words[-1]
type = " ".join(words[:-1])
if name[0] == "*":
name = name[1:]
type += " *"
members.append((name, type))
name = None
mo = rx_name.search(line)
assert mo is not None
name = mo.group(1)
return Struct(name, pyhead, members)

View File

@ -1,46 +0,0 @@
"""Rudimentary parser for C struct definitions."""
import re
PyObject_HEAD = "PyObject_HEAD"
PyObject_VAR_HEAD = "PyObject_VAR_HEAD"
rx_name = re.compile("} (\w+);")
class Struct:
def __init__(self, name, head, members):
self.name = name
self.head = head
self.members = members
def parse(s):
"""Parse a C struct definition.
The parser is very restricted in what it will accept.
"""
lines = [_f for _f in s.split("\n") if _f] # get non-empty lines
assert lines[0].strip() == "typedef struct {"
pyhead = lines[1].strip()
assert (pyhead.startswith("PyObject") and
pyhead.endswith("HEAD"))
members = []
for line in lines[2:]:
line = line.strip()
if line.startswith("}"):
break
assert line.endswith(";")
line = line[:-1]
words = line.split()
name = words[-1]
type = " ".join(words[:-1])
if name[0] == "*":
name = name[1:]
type += " *"
members.append((name, type))
name = None
mo = rx_name.search(line)
assert mo is not None
name = mo.group(1)
return Struct(name, pyhead, members)

View File

@ -1,102 +0,0 @@
"""framer's C code templates.
Templates use the following variables:
FileName: name of the file that contains the C source code
ModuleName: name of the module, as in "import ModuleName"
ModuleDocstring: C string containing the module doc string
"""
module_start = '#include "Python.h"'
member_include = '#include "structmember.h"'
module_doc = """\
PyDoc_STRVAR(%(ModuleName)s_doc,
%(ModuleDocstring)s);
"""
methoddef_start = """\
static struct PyMethodDef %(MethodDefName)s[] = {"""
methoddef_def = """\
{"%(PythonName)s", (PyCFunction)%(CName)s, %(MethType)s},"""
methoddef_def_doc = """\
{"%(PythonName)s", (PyCFunction)%(CName)s, %(MethType)s,
%(DocstringVar)s},"""
methoddef_end = """\
{NULL, NULL}
};
"""
memberdef_start = """\
#define OFF(X) offsetof(%(StructName)s, X)
static struct PyMemberDef %(MemberDefName)s[] = {"""
memberdef_def_doc = """\
{"%(PythonName)s", %(Type)s, OFF(%(CName)s), %(Flags)s,
%(Docstring)s},"""
memberdef_def = """\
{"%(PythonName)s", %(Type)s, OFF(%(CName)s), %(Flags)s},"""
memberdef_end = """\
{NULL}
};
#undef OFF
"""
dealloc_func = """static void
%(name)s(PyObject *ob)
{
}
"""
docstring = """\
PyDoc_STRVAR(%(DocstringVar)s,
%(Docstring)s);
"""
funcdef_start = """\
static PyObject *
%(name)s(%(args)s)
{"""
funcdef_end = """\
}
"""
varargs = """\
if (!PyArg_ParseTuple(args, \"%(ArgParse)s:%(PythonName)s\",
%(ArgTargets)s))
return NULL;"""
module_init_start = """\
PyMODINIT_FUNC
PyInit_%(ModuleName)s(void)
{
PyObject *mod;
mod = Py_InitModule3("%(ModuleName)s", %(MethodDefName)s,
%(ModuleName)s_doc);
if (mod == NULL)
return;
"""
type_init_type = " %(CTypeName)s.ob_type = &PyType_Type;"
module_add_type = """\
if (!PyObject_SetAttrString(mod, "%(TypeName)s",
(PyObject *)&%(CTypeName)s))
return;
"""
type_struct_start = """\
static PyTypeObject %(CTypeName)s = {
PyObject_HEAD_INIT(0)"""
type_struct_end = """\
};
"""

View File

@ -1,35 +0,0 @@
def cstring(s, width=70):
"""Return C string representation of a Python string.
width specifies the maximum width of any line of the C string.
"""
L = []
for l in s.split("\n"):
if len(l) < width:
L.append(r'"%s\n"' % l)
return "\n".join(L)
def unindent(s, skipfirst=True):
"""Return an unindented version of a docstring.
Removes indentation on lines following the first one, using the
leading whitespace of the first indented line that is not blank
to determine the indentation.
"""
lines = s.split("\n")
if skipfirst:
first = lines.pop(0)
L = [first]
else:
L = []
indent = None
for l in lines:
ls = l.strip()
if ls:
indent = len(l) - len(ls)
break
L += [l[indent:] for l in lines]
return "\n".join(L)

View File

@ -1,85 +0,0 @@
world -- Print mappings between country names and DNS country codes.
Contact: Barry Warsaw
Email: bwarsaw@python.org
This script will take a list of Internet addresses and print out where in the
world those addresses originate from, based on the top-level domain country
code found in the address. Addresses can be in any of the following forms:
xx -- just the country code or top-level domain identifier
host.domain.xx -- any Internet host or network name
somebody@where.xx -- an Internet email address
If no match is found, the address is interpreted as a regular expression [*]
and a reverse lookup is attempted. This script will search the country names
and print a list of matching entries. You can force reverse mappings with the
`-r' flag (see below).
For example:
%% world tz us
tz originated from Tanzania, United Republic of
us originated from United States
%% world united
united matches 6 countries:
ae: United Arab Emirates
uk: United Kingdom (common practice)
um: United States Minor Outlying Islands
us: United States
tz: Tanzania, United Republic of
gb: United Kingdom
[*] Note that regular expressions must conform to Python 1.5's re.py module
syntax. The comparison is done with the search() method.
Country codes are maintained by the RIPE Network Coordination Centre,
in coordination with the ISO 3166 Maintenance Agency at DIN Berlin. The
authoritative source of counry code mappings is:
<url:ftp://info.ripe.net/iso3166-countrycodes>
The latest known change to this information was:
Thu Aug 7 17:59:51 MET DST 1997
This script also knows about non-geographic top-level domains.
Usage: world [-d] [-p file] [-o] [-h] addr [addr ...]
--dump
-d
Print mapping of all top-level domains.
--parse file
-p file
Parse an iso3166-countrycodes file extracting the two letter country
code followed by the country name. Note that the three letter country
codes and numbers, which are also provided in the standard format
file, are ignored.
--outputdict
-o
When used in conjunction with the `-p' option, output is in the form
of a Python dictionary, and country names are normalized
w.r.t. capitalization. This makes it appropriate for cutting and
pasting back into this file.
--reverse
-r
Force reverse lookup. In this mode the address can be any Python
regular expression; this is matched against all country names and a
list of matching mappings is printed. In normal mode (e.g. without
this flag), reverse lookup is performed on addresses if no matching
country code is found.
-h
--help
Print this message.
Local Variables:
indent-tabs-mode: nil
End:

View File

@ -1,568 +0,0 @@
#! /usr/bin/env python
"""world -- Print mappings between country names and DNS country codes.
Contact: Barry Warsaw
Email: barry@python.org
Version: %(__version__)s
This script will take a list of Internet addresses and print out where in the
world those addresses originate from, based on the top-level domain country
code found in the address. Addresses can be in any of the following forms:
xx -- just the country code or top-level domain identifier
host.domain.xx -- any Internet host or network name
somebody@where.xx -- an Internet email address
If no match is found, the address is interpreted as a regular expression and a
reverse lookup is attempted. This script will search the country names and
print a list of matching entries. You can force reverse mappings with the
`-r' flag (see below).
For example:
%% world tz us
tz originated from Tanzania, United Republic of
us originated from United States
%% world united
united matches 6 countries:
ae: United Arab Emirates
uk: United Kingdom (common practice)
um: United States Minor Outlying Islands
us: United States
tz: Tanzania, United Republic of
gb: United Kingdom
Country codes are maintained by the RIPE Network Coordination Centre,
in coordination with the ISO 3166 Maintenance Agency at DIN Berlin. The
authoritative source of country code mappings is:
<url:ftp://ftp.ripe.net/iso3166-countrycodes.txt>
The latest known change to this information was:
Monday, 10 October 2006, 17:59:51 UTC 2006
This script also knows about non-geographic top-level domains, and the
additional ccTLDs reserved by IANA.
Usage: %(PROGRAM)s [-d] [-p file] [-o] [-h] addr [addr ...]
--dump
-d
Print mapping of all top-level domains.
--parse file
-p file
Parse an iso3166-countrycodes file extracting the two letter country
code followed by the country name. Note that the three letter country
codes and numbers, which are also provided in the standard format
file, are ignored.
--outputdict
-o
When used in conjunction with the `-p' option, output is in the form
of a Python dictionary, and country names are normalized
w.r.t. capitalization. This makes it appropriate for cutting and
pasting back into this file. Output is always to standard out.
--reverse
-r
Force reverse lookup. In this mode the address can be any Python
regular expression; this is matched against all country names and a
list of matching mappings is printed. In normal mode (e.g. without
this flag), reverse lookup is performed on addresses if no matching
country code is found.
-h
--help
Print this message.
"""
__version__ = '$Revision$'
import sys
import getopt
import re
PROGRAM = sys.argv[0]
def usage(code, msg=''):
print(__doc__ % globals())
if msg:
print(msg)
sys.exit(code)
def resolve(rawaddr):
parts = rawaddr.split('.')
if not len(parts):
# no top level domain found, bounce it to the next step
return rawaddr
addr = parts[-1]
if addr in nameorgs:
print(rawaddr, 'is in the', nameorgs[addr], 'top level domain')
return None
elif addr in countries:
print(rawaddr, 'originated from', countries[addr])
return None
else:
# Not resolved, bounce it to the next step
return rawaddr
def reverse(regexp):
matches = []
cre = re.compile(regexp, re.IGNORECASE)
for code, country in all.items():
mo = cre.search(country)
if mo:
matches.append(code)
# print results
if not matches:
# not resolved, bounce it to the next step
return regexp
if len(matches) == 1:
code = matches[0]
print(regexp, "matches code `%s', %s" % (code, all[code]))
else:
print(regexp, 'matches %d countries:' % len(matches))
for code in matches:
print(" %s: %s" % (code, all[code]))
return None
def parse(file, normalize):
try:
fp = open(file)
except IOError as err:
errno, msg = err.args
print(msg, ':', file)
return
cre = re.compile('(.*?)[ \t]+([A-Z]{2})[ \t]+[A-Z]{3}[ \t]+[0-9]{3}')
scanning = 0
if normalize:
print('countries = {')
while 1:
line = fp.readline()
if line == '':
break # EOF
if scanning:
mo = cre.match(line)
if not mo:
line = line.strip()
if not line:
continue
elif line[0] == '-':
break
else:
print('Could not parse line:', line)
continue
country, code = mo.group(1, 2)
if normalize:
words = country.split()
for i in range(len(words)):
w = words[i]
# XXX special cases
if w in ('AND', 'OF', 'OF)', 'name:', 'METROPOLITAN'):
words[i] = w.lower()
elif w == 'THE' and i != 1:
words[i] = w.lower()
elif len(w) > 3 and w[1] == "'":
words[i] = w[0:3].upper() + w[3:].lower()
elif w in ('(U.S.)', 'U.S.'):
pass
elif w[0] == '(' and w != '(local':
words[i] = '(' + w[1:].capitalize()
elif w.find('-') != -1:
words[i] = '-'.join(
[s.capitalize() for s in w.split('-')])
else:
words[i] = w.capitalize()
code = code.lower()
country = ' '.join(words)
print(' "%s": "%s",' % (code, country))
else:
print(code, country)
elif line[0] == '-':
scanning = 1
if normalize:
print(' }')
def main():
help = 0
status = 0
dump = 0
parsefile = None
normalize = 0
forcerev = 0
try:
opts, args = getopt.getopt(
sys.argv[1:],
'p:rohd',
['parse=', 'reverse', 'outputdict', 'help', 'dump'])
except getopt.error as msg:
usage(1, msg)
for opt, arg in opts:
if opt in ('-h', '--help'):
help = 1
elif opt in ('-d', '--dump'):
dump = 1
elif opt in ('-p', '--parse'):
parsefile = arg
elif opt in ('-o', '--outputdict'):
normalize = 1
elif opt in ('-r', '--reverse'):
forcerev = 1
if help:
usage(status)
if dump:
print('Official country coded domains:')
codes = sorted(countries)
for code in codes:
print(' %2s:' % code, countries[code])
print('\nOther top-level domains:')
codes = sorted(nameorgs)
for code in codes:
print(' %6s:' % code, nameorgs[code])
elif parsefile:
parse(parsefile, normalize)
else:
if not forcerev:
args = filter(None, map(resolve, args))
args = filter(None, map(reverse, args))
for arg in args:
print('Where in the world is %s?' % arg)
# The mappings
nameorgs = {
# New top level domains as described by ICANN
# http://www.icann.org/tlds/
"aero": "air-transport industry",
"asia": "from Asia/for Asia",
"arpa": "Arpanet",
"biz": "business",
"cat": "Catalan community",
"com": "commercial",
"coop": "cooperatives",
"edu": "educational",
"gov": "government",
"info": "unrestricted `info'",
"int": "international",
"jobs": "employment-related",
"mil": "military",
"mobi": "mobile specific",
"museum": "museums",
"name": "`name' (for registration by individuals)",
"net": "networking",
"org": "non-commercial",
"pro": "professionals",
"tel": "business telecommunications",
"travel": "travel and tourism",
# These additional ccTLDs are included here even though they are not part
# of ISO 3166. IANA has a decoding table listing all reserved ccTLDs:
#
# http://www.iso.org/iso/iso-3166-1_decoding_table
#
# Note that `uk' is the common practice country code for the United
# Kingdom. AFAICT, the official `gb' code is routinely ignored!
#
# <D.M.Pick@qmw.ac.uk> tells me that `uk' was long in use before ISO3166
# was adopted for top-level DNS zone names (although in the reverse order
# like uk.ac.qmw) and was carried forward (with the reversal) to avoid a
# large-scale renaming process as the UK switched from their old `Coloured
# Book' protocols over X.25 to Internet protocols over IP.
#
# See <url:ftp://ftp.ripe.net/ripe/docs/ripe-159.txt>
#
# Also, `su', while obsolete is still in limited use.
"ac": "Ascension Island",
"cp": "Clipperton Island",
"dg": "Diego Garcia",
"ea": "Ceuta, Melilla",
"eu": "European Union",
"fx": "Metropolitan France",
"ic": "Canary Islands",
"ta": "Tristan da Cunha",
"uk": "United Kingdom (common practice)",
"su": "Soviet Union (still in limited use)",
}
countries = {
"af": "Afghanistan",
"ax": "Aland Islands",
"al": "Albania",
"dz": "Algeria",
"as": "American Samoa",
"ad": "Andorra",
"ao": "Angola",
"ai": "Anguilla",
"aq": "Antarctica",
"ag": "Antigua and Barbuda",
"ar": "Argentina",
"am": "Armenia",
"aw": "Aruba",
"au": "Australia",
"at": "Austria",
"az": "Azerbaijan",
"bs": "Bahamas",
"bh": "Bahrain",
"bd": "Bangladesh",
"bb": "Barbados",
"by": "Belarus",
"be": "Belgium",
"bz": "Belize",
"bj": "Benin",
"bm": "Bermuda",
"bt": "Bhutan",
"bo": "Bolivia",
"ba": "Bosnia and Herzegovina",
"bw": "Botswana",
"bv": "Bouvet Island",
"br": "Brazil",
"io": "British Indian Ocean Territory",
"bn": "Brunei Darussalam",
"bg": "Bulgaria",
"bf": "Burkina Faso",
"bi": "Burundi",
"kh": "Cambodia",
"cm": "Cameroon",
"ca": "Canada",
"cv": "Cape Verde",
"ky": "Cayman Islands",
"cf": "Central African Republic",
"td": "Chad",
"cl": "Chile",
"cn": "China",
"cx": "Christmas Island",
"cc": "Cocos (Keeling) Islands",
"co": "Colombia",
"km": "Comoros",
"cg": "Congo",
"cd": "Congo, The Democratic Republic of the",
"ck": "Cook Islands",
"cr": "Costa Rica",
"ci": "Cote D'Ivoire",
"hr": "Croatia",
"cu": "Cuba",
"cy": "Cyprus",
"cz": "Czech Republic",
"dk": "Denmark",
"dj": "Djibouti",
"dm": "Dominica",
"do": "Dominican Republic",
"ec": "Ecuador",
"eg": "Egypt",
"sv": "El Salvador",
"gq": "Equatorial Guinea",
"er": "Eritrea",
"ee": "Estonia",
"et": "Ethiopia",
"fk": "Falkland Islands (Malvinas)",
"fo": "Faroe Islands",
"fj": "Fiji",
"fi": "Finland",
"fr": "France",
"gf": "French Guiana",
"pf": "French Polynesia",
"tf": "French Southern Territories",
"ga": "Gabon",
"gm": "Gambia",
"ge": "Georgia",
"de": "Germany",
"gh": "Ghana",
"gi": "Gibraltar",
"gr": "Greece",
"gl": "Greenland",
"gd": "Grenada",
"gp": "Guadeloupe",
"gu": "Guam",
"gt": "Guatemala",
"gg": "Guernsey",
"gn": "Guinea",
"gw": "Guinea-Bissau",
"gy": "Guyana",
"ht": "Haiti",
"hm": "Heard Island and Mcdonald Islands",
"va": "Holy See (Vatican City State)",
"hn": "Honduras",
"hk": "Hong Kong",
"hu": "Hungary",
"is": "Iceland",
"in": "India",
"id": "Indonesia",
"ir": "Iran (Islamic Republic of)",
"iq": "Iraq",
"ie": "Ireland",
"im": "Isle of Man",
"il": "Israel",
"it": "Italy",
"jm": "Jamaica",
"jp": "Japan",
"je": "Jersey",
"jo": "Jordan",
"kz": "Kazakhstan",
"ke": "Kenya",
"ki": "Kiribati",
"kp": "Korea, Democratic People's Republic of",
"kr": "Korea, Republic of",
"kw": "Kuwait",
"kg": "Kyrgyzstan",
"la": "Lao People's Democratic Republic",
"lv": "Latvia",
"lb": "Lebanon",
"ls": "Lesotho",
"lr": "Liberia",
"ly": "Libyan Arab Jamahiriya",
"li": "Liechtenstein",
"lt": "Lithuania",
"lu": "Luxembourg",
"mo": "Macao",
"mk": "Macedonia, The Former Yugoslav Republic of",
"mg": "Madagascar",
"mw": "Malawi",
"my": "Malaysia",
"mv": "Maldives",
"ml": "Mali",
"mt": "Malta",
"mh": "Marshall Islands",
"mq": "Martinique",
"mr": "Mauritania",
"mu": "Mauritius",
"yt": "Mayotte",
"mx": "Mexico",
"fm": "Micronesia, Federated States of",
"md": "Moldova, Republic of",
"mc": "Monaco",
"mn": "Mongolia",
"me": "Montenegro",
"ms": "Montserrat",
"ma": "Morocco",
"mz": "Mozambique",
"mm": "Myanmar",
"na": "Namibia",
"nr": "Nauru",
"np": "Nepal",
"nl": "Netherlands",
"an": "Netherlands Antilles",
"nc": "New Caledonia",
"nz": "New Zealand",
"ni": "Nicaragua",
"ne": "Niger",
"ng": "Nigeria",
"nu": "Niue",
"nf": "Norfolk Island",
"mp": "Northern Mariana Islands",
"no": "Norway",
"om": "Oman",
"pk": "Pakistan",
"pw": "Palau",
"ps": "Palestinian Territory, Occupied",
"pa": "Panama",
"pg": "Papua New Guinea",
"py": "Paraguay",
"pe": "Peru",
"ph": "Philippines",
"pn": "Pitcairn",
"pl": "Poland",
"pt": "Portugal",
"pr": "Puerto Rico",
"qa": "Qatar",
"re": "Reunion",
"ro": "Romania",
"ru": "Russian Federation",
"rw": "Rwanda",
"sh": "Saint Helena",
"kn": "Saint Kitts and Nevis",
"lc": "Saint Lucia",
"pm": "Saint Pierre and Miquelon",
"vc": "Saint Vincent and the Grenadines",
"ws": "Samoa",
"sm": "San Marino",
"st": "Sao Tome and Principe",
"sa": "Saudi Arabia",
"sn": "Senegal",
"rs": "Serbia",
"sc": "Seychelles",
"sl": "Sierra Leone",
"sg": "Singapore",
"sk": "Slovakia",
"si": "Slovenia",
"sb": "Solomon Islands",
"so": "Somalia",
"za": "South Africa",
"gs": "South Georgia and the South Sandwich Islands",
"es": "Spain",
"lk": "Sri Lanka",
"sd": "Sudan",
"sr": "Suriname",
"sj": "Svalbard and Jan Mayen",
"sh": "St. Helena",
"pm": "St. Pierre and Miquelon",
"sz": "Swaziland",
"se": "Sweden",
"ch": "Switzerland",
"sy": "Syrian Arab Republic",
"tw": "Taiwan, Province of China",
"tj": "Tajikistan",
"tz": "Tanzania, United Republic of",
"th": "Thailand",
"tl": "Timor-Leste",
"tg": "Togo",
"tk": "Tokelau",
"to": "Tonga",
"tt": "Trinidad and Tobago",
"tn": "Tunisia",
"tr": "Turkey",
"tm": "Turkmenistan",
"tc": "Turks and Caicos Islands",
"tv": "Tuvalu",
"ug": "Uganda",
"ua": "Ukraine",
"ae": "United Arab Emirates",
"gb": "United Kingdom",
"us": "United States",
"um": "United States Minor Outlying Islands",
"uy": "Uruguay",
"uz": "Uzbekistan",
"vu": "Vanuatu",
"va": "Vatican City State (Holy See)",
"ve": "Venezuela",
"vn": "Viet Nam",
"vg": "Virgin Islands (British)",
"vi": "Virgin Islands (U.S.)",
"wf": "Wallis and Futuna",
"eh": "Western Sahara",
"ye": "Yemen",
"yu": "Yugoslavia",
"zm": "Zambia",
"zw": "Zimbabwe",
}
all = nameorgs.copy()
all.update(countries)
if __name__ == '__main__':
main()