mirror of https://github.com/python/cpython
Added the 7 new top level domains, and reworded the nameorgs output.
Not sure this is better in all cases. parse(): Fixed a bug in the output; the dict is referred to in the code as `countries' not `country'. Also added no-case-fold for the string "U.S." since the Virgin Islands name no longer wraps those in parentheses. main(): Fixed the argument parsing to agree with the docstring, i.e. --outputdict instead of --output. In the module docstring: - updated my email address - we don't need to explain about Python 1.5 regexps <wink> We also don't need to wrap the import of re with a try/except. Other style fixes: - untabification - revert back to <> style everywhere (and consistently)
This commit is contained in:
parent
9e9d4f8ed8
commit
aef8371acb
|
@ -3,7 +3,7 @@
|
||||||
"""world -- Print mappings between country names and DNS country codes.
|
"""world -- Print mappings between country names and DNS country codes.
|
||||||
|
|
||||||
Contact: Barry Warsaw
|
Contact: Barry Warsaw
|
||||||
Email: bwarsaw@python.org
|
Email: barry@python.org
|
||||||
Version: %(__version__)s
|
Version: %(__version__)s
|
||||||
|
|
||||||
This script will take a list of Internet addresses and print out where in the
|
This script will take a list of Internet addresses and print out where in the
|
||||||
|
@ -14,9 +14,9 @@ code found in the address. Addresses can be in any of the following forms:
|
||||||
host.domain.xx -- any Internet host or network name
|
host.domain.xx -- any Internet host or network name
|
||||||
somebody@where.xx -- an Internet email address
|
somebody@where.xx -- an Internet email address
|
||||||
|
|
||||||
If no match is found, the address is interpreted as a regular expression [*]
|
If no match is found, the address is interpreted as a regular expression and a
|
||||||
and a reverse lookup is attempted. This script will search the country names
|
reverse lookup is attempted. This script will search the country names and
|
||||||
and print a list of matching entries. You can force reverse mappings with the
|
print a list of matching entries. You can force reverse mappings with the
|
||||||
`-r' flag (see below).
|
`-r' flag (see below).
|
||||||
|
|
||||||
For example:
|
For example:
|
||||||
|
@ -34,10 +34,6 @@ For example:
|
||||||
tz: Tanzania, United Republic of
|
tz: Tanzania, United Republic of
|
||||||
gb: United Kingdom
|
gb: United Kingdom
|
||||||
|
|
||||||
|
|
||||||
[*] Note that regular expressions must conform to Python 1.5's re.py module
|
|
||||||
syntax. The comparison is done with the search() method.
|
|
||||||
|
|
||||||
Country codes are maintained by the RIPE Network Coordination Centre,
|
Country codes are maintained by the RIPE Network Coordination Centre,
|
||||||
in coordination with the ISO 3166 Maintenance Agency at DIN Berlin. The
|
in coordination with the ISO 3166 Maintenance Agency at DIN Berlin. The
|
||||||
authoritative source of country code mappings is:
|
authoritative source of country code mappings is:
|
||||||
|
@ -69,7 +65,7 @@ Usage: %(PROGRAM)s [-d] [-p file] [-o] [-h] addr [addr ...]
|
||||||
When used in conjunction with the `-p' option, output is in the form
|
When used in conjunction with the `-p' option, output is in the form
|
||||||
of a Python dictionary, and country names are normalized
|
of a Python dictionary, and country names are normalized
|
||||||
w.r.t. capitalization. This makes it appropriate for cutting and
|
w.r.t. capitalization. This makes it appropriate for cutting and
|
||||||
pasting back into this file.
|
pasting back into this file. Output is always to standard out.
|
||||||
|
|
||||||
--reverse
|
--reverse
|
||||||
-r
|
-r
|
||||||
|
@ -82,18 +78,13 @@ Usage: %(PROGRAM)s [-d] [-p file] [-o] [-h] addr [addr ...]
|
||||||
-h
|
-h
|
||||||
--help
|
--help
|
||||||
Print this message.
|
Print this message.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
__version__ = '$Revision$'
|
__version__ = '$Revision$'
|
||||||
|
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
import getopt
|
import getopt
|
||||||
try:
|
import re
|
||||||
import re
|
|
||||||
except ImportError:
|
|
||||||
print sys.argv[0], 'requires Python 1.5'
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
PROGRAM = sys.argv[0]
|
PROGRAM = sys.argv[0]
|
||||||
|
|
||||||
|
@ -110,22 +101,18 @@ def usage(code, msg=''):
|
||||||
def resolve(rawaddr):
|
def resolve(rawaddr):
|
||||||
parts = rawaddr.split('.')
|
parts = rawaddr.split('.')
|
||||||
if not len(parts):
|
if not len(parts):
|
||||||
# no top level domain found, bounce it to the next step
|
# no top level domain found, bounce it to the next step
|
||||||
return rawaddr
|
return rawaddr
|
||||||
addr = parts[-1]
|
addr = parts[-1]
|
||||||
if nameorgs.has_key(addr):
|
if nameorgs.has_key(addr):
|
||||||
if nameorgs[addr][0].lower() in 'aeiou':
|
print rawaddr, 'is in the', nameorgs[addr], 'top level domain'
|
||||||
ana = 'an'
|
return None
|
||||||
else:
|
|
||||||
ana = 'a'
|
|
||||||
print rawaddr, 'is from', ana, nameorgs[addr], 'organization'
|
|
||||||
return None
|
|
||||||
elif countries.has_key(addr):
|
elif countries.has_key(addr):
|
||||||
print rawaddr, 'originated from', countries[addr]
|
print rawaddr, 'originated from', countries[addr]
|
||||||
return None
|
return None
|
||||||
else:
|
else:
|
||||||
# Not resolved, bounce it to the next step
|
# Not resolved, bounce it to the next step
|
||||||
return rawaddr
|
return rawaddr
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -133,82 +120,83 @@ def reverse(regexp):
|
||||||
matches = []
|
matches = []
|
||||||
cre = re.compile(regexp, re.IGNORECASE)
|
cre = re.compile(regexp, re.IGNORECASE)
|
||||||
for code, country in all.items():
|
for code, country in all.items():
|
||||||
mo = cre.search(country)
|
mo = cre.search(country)
|
||||||
if mo:
|
if mo:
|
||||||
matches.append(code)
|
matches.append(code)
|
||||||
# print results
|
# print results
|
||||||
if not matches:
|
if not matches:
|
||||||
# not resolved, bounce it to the next step
|
# not resolved, bounce it to the next step
|
||||||
return regexp
|
return regexp
|
||||||
if len(matches) == 1:
|
if len(matches) == 1:
|
||||||
code = matches[0]
|
code = matches[0]
|
||||||
print regexp, "matches code `%s', %s" % (code, all[code])
|
print regexp, "matches code `%s', %s" % (code, all[code])
|
||||||
else:
|
else:
|
||||||
print regexp, 'matches %d countries:' % len(matches)
|
print regexp, 'matches %d countries:' % len(matches)
|
||||||
for code in matches:
|
for code in matches:
|
||||||
print " %s: %s" % (code, all[code])
|
print " %s: %s" % (code, all[code])
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def parse(file, normalize):
|
def parse(file, normalize):
|
||||||
try:
|
try:
|
||||||
fp = open(file)
|
fp = open(file)
|
||||||
except IOError, (err, msg):
|
except IOError, (err, msg):
|
||||||
print msg, ':', file
|
print msg, ':', file
|
||||||
|
|
||||||
cre = re.compile('(.*?)[ \t]+([A-Z]{2})[ \t]+[A-Z]{3}[ \t]+[0-9]{3}')
|
cre = re.compile('(.*?)[ \t]+([A-Z]{2})[ \t]+[A-Z]{3}[ \t]+[0-9]{3}')
|
||||||
scanning = 0
|
scanning = 0
|
||||||
|
|
||||||
if normalize:
|
if normalize:
|
||||||
print 'country = {'
|
print 'countries = {'
|
||||||
|
|
||||||
while 1:
|
while 1:
|
||||||
line = fp.readline()
|
line = fp.readline()
|
||||||
if line == '':
|
if line == '':
|
||||||
break # EOF
|
break # EOF
|
||||||
if scanning:
|
if scanning:
|
||||||
mo = cre.match(line)
|
mo = cre.match(line)
|
||||||
if not mo:
|
if not mo:
|
||||||
line = line.strip()
|
line = line.strip()
|
||||||
if not line:
|
if not line:
|
||||||
continue
|
continue
|
||||||
elif line[0] == '-':
|
elif line[0] == '-':
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
print 'Could not parse line:', line
|
print 'Could not parse line:', line
|
||||||
continue
|
continue
|
||||||
country, code = mo.group(1, 2)
|
country, code = mo.group(1, 2)
|
||||||
if normalize:
|
if normalize:
|
||||||
words = country.split()
|
words = country.split()
|
||||||
for i in range(len(words)):
|
for i in range(len(words)):
|
||||||
w = words[i]
|
w = words[i]
|
||||||
# XXX special cases
|
# XXX special cases
|
||||||
if w in ('AND', 'OF', 'OF)', 'name:', 'METROPOLITAN'):
|
if w in ('AND', 'OF', 'OF)', 'name:', 'METROPOLITAN'):
|
||||||
words[i] = w.lower()
|
words[i] = w.lower()
|
||||||
elif w == 'THE' and i != 1:
|
elif w == 'THE' and i <> 1:
|
||||||
words[i] = w.lower()
|
words[i] = w.lower()
|
||||||
elif len(w) > 3 and w[1] == "'":
|
elif len(w) > 3 and w[1] == "'":
|
||||||
words[i] = w[0:3].upper() + w[3:].lower()
|
words[i] = w[0:3].upper() + w[3:].lower()
|
||||||
elif w == '(U.S.)':
|
elif w in ('(U.S.)', 'U.S.'):
|
||||||
pass
|
pass
|
||||||
elif w[0] == '(' and w != '(local':
|
elif w[0] == '(' and w <> '(local':
|
||||||
words[i] = '(' + w[1:].capitalize()
|
words[i] = '(' + w[1:].capitalize()
|
||||||
elif w.find('-') != -1:
|
elif w.find('-') <> -1:
|
||||||
words[i] = '-'.join([s.capitalize() for s in w.split('-')])
|
words[i] = '-'.join(
|
||||||
else:
|
[s.capitalize() for s in w.split('-')])
|
||||||
words[i] = w.capitalize()
|
else:
|
||||||
code = code.lower()
|
words[i] = w.capitalize()
|
||||||
country = ' '.join(words)
|
code = code.lower()
|
||||||
print ' "%s": "%s",' % (code, country)
|
country = ' '.join(words)
|
||||||
else:
|
print ' "%s": "%s",' % (code, country)
|
||||||
print code, country
|
else:
|
||||||
|
print code, country
|
||||||
elif line[0] == '-':
|
|
||||||
scanning = 1
|
elif line[0] == '-':
|
||||||
|
scanning = 1
|
||||||
|
|
||||||
if normalize:
|
if normalize:
|
||||||
print ' }'
|
print ' }'
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
@ -228,53 +216,62 @@ def main():
|
||||||
usage(1, msg)
|
usage(1, msg)
|
||||||
|
|
||||||
for opt, arg in opts:
|
for opt, arg in opts:
|
||||||
if opt in ('-h', '--help'):
|
if opt in ('-h', '--help'):
|
||||||
help = 1
|
help = 1
|
||||||
elif opt in ('-d', '--dump'):
|
elif opt in ('-d', '--dump'):
|
||||||
dump = 1
|
dump = 1
|
||||||
elif opt in ('-p', '--parse'):
|
elif opt in ('-p', '--parse'):
|
||||||
parsefile = arg
|
parsefile = arg
|
||||||
elif opt in ('-o', '--output'):
|
elif opt in ('-o', '--outputdict'):
|
||||||
normalize = 1
|
normalize = 1
|
||||||
elif opt in ('-r', '--reverse'):
|
elif opt in ('-r', '--reverse'):
|
||||||
forcerev = 1
|
forcerev = 1
|
||||||
|
|
||||||
if help:
|
if help:
|
||||||
usage(status)
|
usage(status)
|
||||||
|
|
||||||
if dump:
|
if dump:
|
||||||
print 'Non-geographic domains:'
|
print 'Non-geographic domains:'
|
||||||
codes = nameorgs.keys()
|
codes = nameorgs.keys()
|
||||||
codes.sort()
|
codes.sort()
|
||||||
for code in codes:
|
for code in codes:
|
||||||
print ' %4s:' % code, nameorgs[code]
|
print ' %4s:' % code, nameorgs[code]
|
||||||
|
|
||||||
print '\nCountry coded domains:'
|
print '\nCountry coded domains:'
|
||||||
codes = countries.keys()
|
codes = countries.keys()
|
||||||
codes.sort()
|
codes.sort()
|
||||||
for code in codes:
|
for code in codes:
|
||||||
print ' %2s:' % code, countries[code]
|
print ' %2s:' % code, countries[code]
|
||||||
elif parsefile:
|
elif parsefile:
|
||||||
parse(parsefile, normalize)
|
parse(parsefile, normalize)
|
||||||
else:
|
else:
|
||||||
if not forcerev:
|
if not forcerev:
|
||||||
args = filter(None, map(resolve, args))
|
args = filter(None, map(resolve, args))
|
||||||
args = filter(None, map(reverse, args))
|
args = filter(None, map(reverse, args))
|
||||||
for arg in args:
|
for arg in args:
|
||||||
print 'Where in the world is %s?' % arg
|
print 'Where in the world is %s?' % arg
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# The mappings
|
# The mappings
|
||||||
nameorgs = {
|
nameorgs = {
|
||||||
|
# New top level domains as described by ICANN
|
||||||
|
# http://www.icann.org/tlds/
|
||||||
|
"aero": "air-transport industry",
|
||||||
"arpa": "Arpanet",
|
"arpa": "Arpanet",
|
||||||
|
"biz": "business",
|
||||||
"com": "commercial",
|
"com": "commercial",
|
||||||
|
"coop": "cooperatives",
|
||||||
"edu": "educational",
|
"edu": "educational",
|
||||||
"gov": "government",
|
"gov": "government",
|
||||||
|
"info": "unrestricted `info'",
|
||||||
|
"int": "international",
|
||||||
"mil": "military",
|
"mil": "military",
|
||||||
|
"museum": "museums",
|
||||||
|
"name": "`name' (for registration by individuals)",
|
||||||
"net": "networking",
|
"net": "networking",
|
||||||
"org": "non-commercial",
|
"org": "non-commercial",
|
||||||
"int": "international",
|
"pro": "professionals",
|
||||||
# This isn't in the same class as those above, but is included here
|
# This isn't in the same class as those above, but is included here
|
||||||
# because `uk' is the common practice country code for the United Kingdom.
|
# because `uk' is the common practice country code for the United Kingdom.
|
||||||
# AFAICT, the official `gb' code is routinely ignored!
|
# AFAICT, the official `gb' code is routinely ignored!
|
||||||
|
@ -525,7 +522,7 @@ countries = {
|
||||||
"ve": "Venezuela",
|
"ve": "Venezuela",
|
||||||
"vn": "Viet Nam",
|
"vn": "Viet Nam",
|
||||||
"vg": "Virgin Islands, British",
|
"vg": "Virgin Islands, British",
|
||||||
"vi": "Virgin Islands, U.s.",
|
"vi": "Virgin Islands, U.S.",
|
||||||
"wf": "Wallis and Futuna",
|
"wf": "Wallis and Futuna",
|
||||||
"eh": "Western Sahara",
|
"eh": "Western Sahara",
|
||||||
"ye": "Yemen",
|
"ye": "Yemen",
|
||||||
|
|
Loading…
Reference in New Issue