cpython/Tools/world/world

552 lines
15 KiB
Plaintext
Raw Normal View History

1997-12-04 15:44:30 -04:00
#! /usr/bin/env python
1998-12-05 18:07:24 -04:00
"""world -- Print mappings between country names and DNS country codes.
1998-12-14 17:36:40 -04:00
Contact: Barry Warsaw
Email: barry@python.org
1998-12-05 18:07:24 -04:00
Version: %(__version__)s
1998-03-11 12:50:31 -04:00
This script will take a list of Internet addresses and print out where in the
world those addresses originate from, based on the top-level domain country
code found in the address. Addresses can be in any of the following forms:
xx -- just the country code or top-level domain identifier
host.domain.xx -- any Internet host or network name
somebody@where.xx -- an Internet email address
If no match is found, the address is interpreted as a regular expression and a
reverse lookup is attempted. This script will search the country names and
print a list of matching entries. You can force reverse mappings with the
1998-12-05 18:07:24 -04:00
`-r' flag (see below).
1998-03-11 12:46:04 -04:00
For example:
%% world tz us
1998-03-11 12:46:04 -04:00
tz originated from Tanzania, United Republic of
us originated from United States
%% world united
1998-03-13 14:33:34 -04:00
united matches 6 countries:
ae: United Arab Emirates
uk: United Kingdom (common practice)
um: United States Minor Outlying Islands
us: United States
tz: Tanzania, United Republic of
gb: United Kingdom
1998-03-11 12:46:04 -04:00
Country codes are maintained by the RIPE Network Coordination Centre,
1998-03-11 12:46:04 -04:00
in coordination with the ISO 3166 Maintenance Agency at DIN Berlin. The
authoritative source of country code mappings is:
<url:ftp://ftp.ripe.net/iso3166-countrycodes.txt>
The latest known change to this information was:
Friday, 5 April 2002, 12.00 CET 2002
This script also knows about non-geographic top-level domains, and the
additional ccTLDs reserved by IANA.
1998-03-11 12:50:31 -04:00
1998-12-05 18:07:24 -04:00
Usage: %(PROGRAM)s [-d] [-p file] [-o] [-h] addr [addr ...]
1997-12-04 15:35:25 -04:00
--dump
-d
Print mapping of all top-level domains.
--parse file
1998-03-11 12:46:04 -04:00
-p file
Parse an iso3166-countrycodes file extracting the two letter country
code followed by the country name. Note that the three letter country
1998-12-05 18:07:24 -04:00
codes and numbers, which are also provided in the standard format
file, are ignored.
1998-03-11 12:46:04 -04:00
--outputdict
-o
1998-12-05 18:07:24 -04:00
When used in conjunction with the `-p' option, output is in the form
1998-03-11 12:46:04 -04:00
of a Python dictionary, and country names are normalized
w.r.t. capitalization. This makes it appropriate for cutting and
pasting back into this file. Output is always to standard out.
1998-03-11 12:46:04 -04:00
--reverse
-r
Force reverse lookup. In this mode the address can be any Python
regular expression; this is matched against all country names and a
list of matching mappings is printed. In normal mode (e.g. without
this flag), reverse lookup is performed on addresses if no matching
country code is found.
1997-12-04 15:35:25 -04:00
-h
--help
Print this message.
"""
1998-03-11 12:53:21 -04:00
__version__ = '$Revision$'
1996-11-20 11:17:50 -04:00
1996-11-18 17:26:56 -04:00
1995-06-15 12:54:16 -03:00
import sys
import getopt
import re
1998-03-11 12:46:04 -04:00
1998-12-05 18:07:24 -04:00
PROGRAM = sys.argv[0]
1996-11-18 17:26:56 -04:00
1998-12-05 18:07:24 -04:00
def usage(code, msg=''):
print __doc__ % globals()
if msg:
print msg
sys.exit(code)
1996-11-18 17:26:56 -04:00
1997-12-04 15:35:25 -04:00
1998-03-11 12:46:04 -04:00
1996-11-18 17:26:56 -04:00
def resolve(rawaddr):
parts = rawaddr.split('.')
1996-11-18 17:26:56 -04:00
if not len(parts):
# no top level domain found, bounce it to the next step
return rawaddr
1996-11-18 17:26:56 -04:00
addr = parts[-1]
1998-03-11 12:46:04 -04:00
if nameorgs.has_key(addr):
print rawaddr, 'is in the', nameorgs[addr], 'top level domain'
return None
1998-03-11 12:46:04 -04:00
elif countries.has_key(addr):
print rawaddr, 'originated from', countries[addr]
return None
1996-11-18 17:26:56 -04:00
else:
# Not resolved, bounce it to the next step
return rawaddr
1997-12-04 15:35:25 -04:00
1998-03-11 12:46:04 -04:00
def reverse(regexp):
matches = []
cre = re.compile(regexp, re.IGNORECASE)
for code, country in all.items():
mo = cre.search(country)
if mo:
matches.append(code)
1998-03-11 12:46:04 -04:00
# print results
if not matches:
# not resolved, bounce it to the next step
return regexp
1998-03-11 12:46:04 -04:00
if len(matches) == 1:
code = matches[0]
print regexp, "matches code `%s', %s" % (code, all[code])
1998-03-11 12:46:04 -04:00
else:
print regexp, 'matches %d countries:' % len(matches)
for code in matches:
print " %s: %s" % (code, all[code])
1998-03-11 12:46:04 -04:00
return None
1997-12-04 15:44:30 -04:00
1998-03-11 12:46:04 -04:00
def parse(file, normalize):
1997-12-04 15:35:25 -04:00
try:
fp = open(file)
1997-12-04 15:35:25 -04:00
except IOError, (err, msg):
print msg, ':', file
1997-12-04 15:35:25 -04:00
cre = re.compile('(.*?)[ \t]+([A-Z]{2})[ \t]+[A-Z]{3}[ \t]+[0-9]{3}')
scanning = 0
if normalize:
print 'countries = {'
1997-12-04 15:35:25 -04:00
while 1:
line = fp.readline()
if line == '':
break # EOF
if scanning:
mo = cre.match(line)
if not mo:
line = line.strip()
if not line:
continue
elif line[0] == '-':
break
else:
print 'Could not parse line:', line
continue
country, code = mo.group(1, 2)
if normalize:
words = country.split()
for i in range(len(words)):
w = words[i]
# XXX special cases
if w in ('AND', 'OF', 'OF)', 'name:', 'METROPOLITAN'):
words[i] = w.lower()
elif w == 'THE' and i <> 1:
words[i] = w.lower()
elif len(w) > 3 and w[1] == "'":
words[i] = w[0:3].upper() + w[3:].lower()
elif w in ('(U.S.)', 'U.S.'):
pass
elif w[0] == '(' and w <> '(local':
words[i] = '(' + w[1:].capitalize()
elif w.find('-') <> -1:
words[i] = '-'.join(
[s.capitalize() for s in w.split('-')])
else:
words[i] = w.capitalize()
code = code.lower()
country = ' '.join(words)
print ' "%s": "%s",' % (code, country)
else:
print code, country
elif line[0] == '-':
scanning = 1
1997-12-04 15:35:25 -04:00
if normalize:
print ' }'
1997-12-04 15:35:25 -04:00
def main():
help = 0
status = 0
dump = 0
1997-12-04 15:35:25 -04:00
parsefile = None
normalize = 0
1998-03-11 12:46:04 -04:00
forcerev = 0
1994-07-25 18:52:13 -03:00
1998-12-05 18:07:24 -04:00
try:
opts, args = getopt.getopt(
sys.argv[1:],
'p:rohd',
['parse=', 'reverse', 'outputdict', 'help', 'dump'])
except getopt.error, msg:
usage(1, msg)
for opt, arg in opts:
if opt in ('-h', '--help'):
help = 1
elif opt in ('-d', '--dump'):
dump = 1
elif opt in ('-p', '--parse'):
parsefile = arg
elif opt in ('-o', '--outputdict'):
normalize = 1
elif opt in ('-r', '--reverse'):
forcerev = 1
if help:
usage(status)
if dump:
print 'Non-geographic domains:'
codes = nameorgs.keys()
codes.sort()
for code in codes:
print ' %4s:' % code, nameorgs[code]
print '\nCountry coded domains:'
codes = countries.keys()
codes.sort()
for code in codes:
print ' %2s:' % code, countries[code]
1997-12-04 15:35:25 -04:00
elif parsefile:
parse(parsefile, normalize)
else:
if not forcerev:
args = filter(None, map(resolve, args))
args = filter(None, map(reverse, args))
for arg in args:
print 'Where in the world is %s?' % arg
1994-07-25 18:52:13 -03:00
1997-12-04 15:35:25 -04:00
1996-11-18 17:26:56 -04:00
1994-07-25 18:52:13 -03:00
# The mappings
1998-03-11 12:46:04 -04:00
nameorgs = {
# New top level domains as described by ICANN
# http://www.icann.org/tlds/
"aero": "air-transport industry",
1995-06-15 12:54:16 -03:00
"arpa": "Arpanet",
"biz": "business",
1995-06-15 12:54:16 -03:00
"com": "commercial",
"coop": "cooperatives",
1995-06-15 12:54:16 -03:00
"edu": "educational",
"gov": "government",
"info": "unrestricted `info'",
"int": "international",
1995-06-15 12:54:16 -03:00
"mil": "military",
"museum": "museums",
"name": "`name' (for registration by individuals)",
1995-06-15 12:54:16 -03:00
"net": "networking",
"org": "non-commercial",
"pro": "professionals",
# These additional ccTLDs are included here even though they are not part
# of ISO 3166. IANA has 5 reserved ccTLDs as described here:
#
# http://www.iso.org/iso/en/prods-services/iso3166ma/04background-on-iso-3166/iso3166-1-and-ccTLDs.html
#
# but I can't find an official list anywhere.
#
# Note that `uk' is the common practice country code for the United
# Kingdom. AFAICT, the official `gb' code is routinely ignored!
#
# <D.M.Pick@qmw.ac.uk> tells me that `uk' was long in use before ISO3166
# was adopted for top-level DNS zone names (although in the reverse order
# like uk.ac.qmw) and was carried forward (with the reversal) to avoid a
# large-scale renaming process as the UK switched from their old `Coloured
# Book' protocols over X.25 to Internet protocols over IP.
1998-03-12 10:05:37 -04:00
#
# See <url:ftp://ftp.ripe.net/ripe/docs/ripe-159.txt>
#
# Also, `su', while obsolete is still in limited use.
"ac": "Ascension Island",
"gg": "Guernsey",
"im": "Isle of Man",
"je": "Jersey",
"uk": "United Kingdom (common practice)",
"su": "Soviet Union (still in limited use)",
1995-06-15 12:54:16 -03:00
}
1994-07-25 18:52:13 -03:00
1996-11-18 17:26:56 -04:00
1998-03-11 12:46:04 -04:00
countries = {
"af": "Afghanistan",
1995-06-15 12:54:16 -03:00
"al": "Albania",
"dz": "Algeria",
"as": "American Samoa",
"ad": "Andorra",
"ao": "Angola",
"ai": "Anguilla",
1995-06-15 12:54:16 -03:00
"aq": "Antarctica",
"ag": "Antigua and Barbuda",
1995-06-15 12:54:16 -03:00
"ar": "Argentina",
"am": "Armenia",
"aw": "Aruba",
1995-06-15 12:54:16 -03:00
"au": "Australia",
"at": "Austria",
"az": "Azerbaijan",
"bs": "Bahamas",
"bh": "Bahrain",
"bd": "Bangladesh",
1995-06-15 12:54:16 -03:00
"bb": "Barbados",
"by": "Belarus",
1995-06-15 12:54:16 -03:00
"be": "Belgium",
"bz": "Belize",
"bj": "Benin",
"bm": "Bermuda",
"bt": "Bhutan",
1995-06-15 12:54:16 -03:00
"bo": "Bolivia",
"ba": "Bosnia and Herzegowina",
"bw": "Botswana",
"bv": "Bouvet Island",
1995-06-15 12:54:16 -03:00
"br": "Brazil",
"io": "British Indian Ocean Territory",
"bn": "Brunei Darussalam",
"bg": "Bulgaria",
"bf": "Burkina Faso",
"bi": "Burundi",
"kh": "Cambodia",
"cm": "Cameroon",
1995-06-15 12:54:16 -03:00
"ca": "Canada",
"cv": "Cape Verde",
"ky": "Cayman Islands",
"cf": "Central African Republic",
"td": "Chad",
1995-06-15 12:54:16 -03:00
"cl": "Chile",
"cn": "China",
"cx": "Christmas Island",
"cc": "Cocos (Keeling) Islands",
1995-06-15 12:54:16 -03:00
"co": "Colombia",
"km": "Comoros",
"cg": "Congo",
"cd": "Congo, The Democratic Republic of the",
"ck": "Cook Islands",
1995-06-15 12:54:16 -03:00
"cr": "Costa Rica",
"ci": "Cote D'Ivoire",
"hr": "Croatia",
"cu": "Cuba",
1995-06-15 12:54:16 -03:00
"cy": "Cyprus",
"cz": "Czech Republic",
"dk": "Denmark",
"dj": "Djibouti",
1995-06-15 12:54:16 -03:00
"dm": "Dominica",
"do": "Dominican Republic",
"tp": "East Timor",
1995-06-15 12:54:16 -03:00
"ec": "Ecuador",
"eg": "Egypt",
"sv": "El Salvador",
"gq": "Equatorial Guinea",
"er": "Eritrea",
"ee": "Estonia",
"et": "Ethiopia",
"fk": "Falkland Islands (Malvinas)",
"fo": "Faroe Islands",
1995-06-15 12:54:16 -03:00
"fj": "Fiji",
"fi": "Finland",
1995-06-15 12:54:16 -03:00
"fr": "France",
"gf": "French Guiana",
"pf": "French Polynesia",
"tf": "French Southern Territories",
"ga": "Gabon",
"gm": "Gambia",
"ge": "Georgia",
"de": "Germany",
1995-06-15 12:54:16 -03:00
"gh": "Ghana",
"gi": "Gibraltar",
1995-06-15 12:54:16 -03:00
"gr": "Greece",
"gl": "Greenland",
"gd": "Grenada",
"gp": "Guadeloupe",
"gu": "Guam",
"gt": "Guatemala",
"gn": "Guinea",
"gw": "Guinea-Bissau",
"gy": "Guyana",
"ht": "Haiti",
"hm": "Heard Island and Mcdonald Islands",
"va": "Holy See (Vatican City State)",
"hn": "Honduras",
1995-06-15 12:54:16 -03:00
"hk": "Hong Kong",
"hu": "Hungary",
"is": "Iceland",
"in": "India",
1995-06-15 12:54:16 -03:00
"id": "Indonesia",
"ir": "Iran, Islamic Republic of",
"iq": "Iraq",
1995-06-15 12:54:16 -03:00
"ie": "Ireland",
"il": "Israel",
"it": "Italy",
"jm": "Jamaica",
"jp": "Japan",
"jo": "Jordan",
"kz": "Kazakstan",
"ke": "Kenya",
"ki": "Kiribati",
"kp": "Korea, Democratic People's Republic of",
"kr": "Korea, Republic of",
1995-06-15 12:54:16 -03:00
"kw": "Kuwait",
"kg": "Kyrgyzstan",
"la": "Lao People's Democratic Republic",
"lv": "Latvia",
"lb": "Lebanon",
"ls": "Lesotho",
"lr": "Liberia",
"ly": "Libyan Arab Jamahiriya",
1995-06-15 12:54:16 -03:00
"li": "Liechtenstein",
"lt": "Lithuania",
1995-06-15 12:54:16 -03:00
"lu": "Luxembourg",
"mo": "Macau",
"mk": "Macedonia, The Former Yugoslav Republic of",
"mg": "Madagascar",
"mw": "Malawi",
1995-06-15 12:54:16 -03:00
"my": "Malaysia",
"mv": "Maldives",
"ml": "Mali",
"mt": "Malta",
"mh": "Marshall Islands",
"mq": "Martinique",
"mr": "Mauritania",
"mu": "Mauritius",
"yt": "Mayotte",
1995-06-15 12:54:16 -03:00
"mx": "Mexico",
"fm": "Micronesia, Federated States of",
"md": "Moldova, Republic of",
"mc": "Monaco",
"mn": "Mongolia",
"ms": "Montserrat",
"ma": "Morocco",
"mz": "Mozambique",
"mm": "Myanmar",
1995-06-15 12:54:16 -03:00
"na": "Namibia",
"nr": "Nauru",
"np": "Nepal",
1995-06-15 12:54:16 -03:00
"nl": "Netherlands",
"an": "Netherlands Antilles",
"nc": "New Caledonia",
1995-06-15 12:54:16 -03:00
"nz": "New Zealand",
"ni": "Nicaragua",
"ne": "Niger",
"ng": "Nigeria",
"nu": "Niue",
"nf": "Norfolk Island",
"mp": "Northern Mariana Islands",
"no": "Norway",
"om": "Oman",
"pk": "Pakistan",
"pw": "Palau",
"ps": "Palestinian Territory, Occupied",
"pa": "Panama",
1995-06-15 12:54:16 -03:00
"pg": "Papua New Guinea",
"py": "Paraguay",
"pe": "Peru",
1995-06-15 12:54:16 -03:00
"ph": "Philippines",
"pn": "Pitcairn",
1995-06-15 12:54:16 -03:00
"pl": "Poland",
"pt": "Portugal",
"pr": "Puerto Rico",
"qa": "Qatar",
"re": "Reunion",
1995-06-15 12:54:16 -03:00
"ro": "Romania",
"ru": "Russian Federation",
"rw": "Rwanda",
"sh": "Saint Helena",
"kn": "Saint Kitts and Nevis",
"lc": "Saint Lucia",
"pm": "Saint Pierre and Miquelon",
"vc": "Saint Vincent and the Grenadines",
"ws": "Samoa",
"sm": "San Marino",
"st": "Sao Tome and Principe",
"sa": "Saudi Arabia",
"sn": "Senegal",
"sc": "Seychelles",
"sl": "Sierra Leone",
1995-06-15 12:54:16 -03:00
"sg": "Singapore",
"sk": "Slovakia",
1995-06-15 12:54:16 -03:00
"si": "Slovenia",
"sb": "Solomon Islands",
"so": "Somalia",
"za": "South Africa",
"gs": "South Georgia and the South Sandwich Islands",
"es": "Spain",
"lk": "Sri Lanka",
"sd": "Sudan",
1995-06-15 12:54:16 -03:00
"sr": "Suriname",
"sj": "Svalbard and Jan Mayen",
"sz": "Swaziland",
"se": "Sweden",
"ch": "Switzerland",
"sy": "Syrian Arab Republic",
"tw": "Taiwan, Province of China",
"tj": "Tajikistan",
"tz": "Tanzania, United Republic of",
1995-06-15 12:54:16 -03:00
"th": "Thailand",
"tg": "Togo",
"tk": "Tokelau",
"to": "Tonga",
"tt": "Trinidad and Tobago",
1995-06-15 12:54:16 -03:00
"tn": "Tunisia",
"tr": "Turkey",
"tm": "Turkmenistan",
"tc": "Turks and Caicos Islands",
"tv": "Tuvalu",
"ug": "Uganda",
"ua": "Ukraine",
"ae": "United Arab Emirates",
"gb": "United Kingdom",
1995-06-15 12:54:16 -03:00
"us": "United States",
"um": "United States Minor Outlying Islands",
1995-06-15 12:54:16 -03:00
"uy": "Uruguay",
"uz": "Uzbekistan",
"vu": "Vanuatu",
1995-06-15 12:54:16 -03:00
"ve": "Venezuela",
"vn": "Viet Nam",
"vg": "Virgin Islands, British",
"vi": "Virgin Islands, U.S.",
"wf": "Wallis and Futuna",
"eh": "Western Sahara",
"ye": "Yemen",
1995-06-15 12:54:16 -03:00
"yu": "Yugoslavia",
"zm": "Zambia",
1996-11-18 17:26:56 -04:00
"zw": "Zimbabwe",
1995-06-15 12:54:16 -03:00
}
1994-07-25 18:52:13 -03:00
1998-03-11 12:46:04 -04:00
all = nameorgs.copy()
all.update(countries)
1996-11-18 17:26:56 -04:00
if __name__ == '__main__':
main()