Added parsing of ISO 3166 files

This commit is contained in:
Barry Warsaw 1997-12-04 19:35:25 +00:00
parent eee08cdd54
commit 9efdef1d39
1 changed files with 104 additions and 7 deletions

View File

@ -1,4 +1,4 @@
#! /usr/bin/env python
#! /usr/bin/env python1.5
"""Print the long name of an Internet domain.
@ -17,17 +17,33 @@ in coordination with the ISO 3166 Maintenance Agency at DIN Berlin.
The latest known change to this information was:
Thu Feb 10 10:20:28 MET 1994
Thu Aug 7 17:59:51 MET DST 1997
This script also knows about non-geographic top-level domains.
Usage: %s [-d] [-h] addr [addr ...]
Usage: %s [-d] [-p|-P file] [-h] addr [addr ...]
-d (--dump) -- print mapping of all known top-level domains
-h (--help) -- print this help message
--dump
-d
Print mapping of all top-level domains.
--parse file
--p file
--P file
--Parse file
Parse an iso3166-countrycodes file (given as the argument).
This first the two letter country code (it ignores the three
letter code), followed by the country name. With -P option,
output is in the form of a Python dictionary, and country
names are normalized w.r.t. capitalization. This makes it
appropriate for cutting and pasting back into this file.
-h
--help
Print this message.
"""
__version__ = '1.0'
__version__ = '2.0'
__author__ = 'Barry Warsaw <bwarsaw@python.org>'
__source__ = '<url:http://www.python.org/~bwarsaw/pyware/>'
@ -35,6 +51,11 @@ __source__ = '<url:http://www.python.org/~bwarsaw/pyware/>'
import sys
import string
import getopt
try:
import re
except ImportError:
print 'Python 1.5 is required!'
sys.exit(1)
@ -42,6 +63,7 @@ def usage(status=0):
print __doc__ % sys.argv[0]
sys.exit(status)
def resolve(rawaddr):
parts = string.splitfields(rawaddr, '.')
if not len(parts):
@ -56,18 +78,90 @@ def resolve(rawaddr):
print 'Where in the world is %s?' % rawaddr
def parse(file, normalize):
try:
fp = open(file)
except IOError, (err, msg):
print msg, ':', file
cre = re.compile('(.*?)[ \t]+([A-Z]{2})[ \t]+[A-Z]{3}[ \t]+[0-9]{3}')
scanning = 0
if normalize:
print 'country = {'
while 1:
line = fp.readline()
if line == '':
break # EOF
if scanning:
mo = cre.match(line)
if not mo:
line = string.strip(line)
if not line:
continue
elif line[0] == '-':
break
else:
print 'Could not parse line:', line
continue
country, code = mo.group(1, 2)
if normalize:
words = string.split(country)
for i in range(len(words)):
w = words[i]
# XXX special cases
if w in ('AND', 'OF', 'OF)', 'name:', 'METROPOLITAN'):
words[i] = string.lower(w)
elif w == 'THE' and i <> 1:
words[i] = string.lower(w)
elif len(w) > 3 and w[1] == "'":
words[i] = string.upper(w[0:3]) + \
string.lower(w[3:])
elif w == '(U.S.)':
pass
elif w[0] == '(' and w <> '(local':
words[i] = '(' + string.capitalize(w[1:])
elif string.find(w, '-'):
words[i] = string.join(
map(string.capitalize, string.split(w, '-')),
'-')
else:
words[i] = string.capitalize(w)
code = string.lower(code)
country = string.join(words)
print ' "%s": "%s",' % (code, country)
else:
print code, country
elif line[0] == '-':
scanning = 1
if normalize:
print ' }'
def main():
help = 0
status = 0
dump = 0
parsefile = None
normalize = 0
opts, args = getopt.getopt(sys.argv[1:], 'hd', ['help', 'dump'])
opts, args = getopt.getopt(sys.argv[1:],
'p:P:hd',
['parse', 'Parse', 'PARSE', 'help', 'dump'])
for arg, val in opts:
if arg in ('-h', '--help'):
help = 1
elif arg in ('-d', '--dump'):
dump = 1
elif arg in ('-p', '--parse'):
parsefile = val
elif arg in ('-P', '--Parse', '--PARSE'):
parsefile = val
normalize = 1
if help:
usage(status)
@ -84,9 +178,12 @@ def main():
codes.sort()
for code in codes:
print ' %2s:' % code, country[code]
elif parsefile:
parse(parsefile, normalize)
else:
map(resolve, args)
# The mappings
nameorg = {