mirror of https://github.com/python/cpython
552 lines
15 KiB
Python
Executable File
552 lines
15 KiB
Python
Executable File
#! /usr/bin/env python
|
||
|
||
"""world -- Print mappings between country names and DNS country codes.
|
||
|
||
Contact: Barry Warsaw
|
||
Email: barry@python.org
|
||
Version: %(__version__)s
|
||
|
||
This script will take a list of Internet addresses and print out where in the
|
||
world those addresses originate from, based on the top-level domain country
|
||
code found in the address. Addresses can be in any of the following forms:
|
||
|
||
xx -- just the country code or top-level domain identifier
|
||
host.domain.xx -- any Internet host or network name
|
||
somebody@where.xx -- an Internet email address
|
||
|
||
If no match is found, the address is interpreted as a regular expression and a
|
||
reverse lookup is attempted. This script will search the country names and
|
||
print a list of matching entries. You can force reverse mappings with the
|
||
`-r' flag (see below).
|
||
|
||
For example:
|
||
|
||
%% world tz us
|
||
tz originated from Tanzania, United Republic of
|
||
us originated from United States
|
||
|
||
%% world united
|
||
united matches 6 countries:
|
||
ae: United Arab Emirates
|
||
uk: United Kingdom (common practice)
|
||
um: United States Minor Outlying Islands
|
||
us: United States
|
||
tz: Tanzania, United Republic of
|
||
gb: United Kingdom
|
||
|
||
Country codes are maintained by the RIPE Network Coordination Centre,
|
||
in coordination with the ISO 3166 Maintenance Agency at DIN Berlin. The
|
||
authoritative source of country code mappings is:
|
||
|
||
<url:ftp://ftp.ripe.net/iso3166-countrycodes.txt>
|
||
|
||
The latest known change to this information was:
|
||
|
||
Friday, 5 April 2002, 12.00 CET 2002
|
||
|
||
This script also knows about non-geographic top-level domains, and the
|
||
additional ccTLDs reserved by IANA.
|
||
|
||
Usage: %(PROGRAM)s [-d] [-p file] [-o] [-h] addr [addr ...]
|
||
|
||
--dump
|
||
-d
|
||
Print mapping of all top-level domains.
|
||
|
||
--parse file
|
||
-p file
|
||
Parse an iso3166-countrycodes file extracting the two letter country
|
||
code followed by the country name. Note that the three letter country
|
||
codes and numbers, which are also provided in the standard format
|
||
file, are ignored.
|
||
|
||
--outputdict
|
||
-o
|
||
When used in conjunction with the `-p' option, output is in the form
|
||
of a Python dictionary, and country names are normalized
|
||
w.r.t. capitalization. This makes it appropriate for cutting and
|
||
pasting back into this file. Output is always to standard out.
|
||
|
||
--reverse
|
||
-r
|
||
Force reverse lookup. In this mode the address can be any Python
|
||
regular expression; this is matched against all country names and a
|
||
list of matching mappings is printed. In normal mode (e.g. without
|
||
this flag), reverse lookup is performed on addresses if no matching
|
||
country code is found.
|
||
|
||
-h
|
||
--help
|
||
Print this message.
|
||
"""
|
||
__version__ = '$Revision$'
|
||
|
||
|
||
import sys
|
||
import getopt
|
||
import re
|
||
|
||
PROGRAM = sys.argv[0]
|
||
|
||
|
||
|
||
def usage(code, msg=''):
|
||
print __doc__ % globals()
|
||
if msg:
|
||
print msg
|
||
sys.exit(code)
|
||
|
||
|
||
|
||
def resolve(rawaddr):
|
||
parts = rawaddr.split('.')
|
||
if not len(parts):
|
||
# no top level domain found, bounce it to the next step
|
||
return rawaddr
|
||
addr = parts[-1]
|
||
if nameorgs.has_key(addr):
|
||
print rawaddr, 'is in the', nameorgs[addr], 'top level domain'
|
||
return None
|
||
elif countries.has_key(addr):
|
||
print rawaddr, 'originated from', countries[addr]
|
||
return None
|
||
else:
|
||
# Not resolved, bounce it to the next step
|
||
return rawaddr
|
||
|
||
|
||
|
||
def reverse(regexp):
|
||
matches = []
|
||
cre = re.compile(regexp, re.IGNORECASE)
|
||
for code, country in all.items():
|
||
mo = cre.search(country)
|
||
if mo:
|
||
matches.append(code)
|
||
# print results
|
||
if not matches:
|
||
# not resolved, bounce it to the next step
|
||
return regexp
|
||
if len(matches) == 1:
|
||
code = matches[0]
|
||
print regexp, "matches code `%s', %s" % (code, all[code])
|
||
else:
|
||
print regexp, 'matches %d countries:' % len(matches)
|
||
for code in matches:
|
||
print " %s: %s" % (code, all[code])
|
||
return None
|
||
|
||
|
||
|
||
def parse(file, normalize):
|
||
try:
|
||
fp = open(file)
|
||
except IOError, (err, msg):
|
||
print msg, ':', file
|
||
|
||
cre = re.compile('(.*?)[ \t]+([A-Z]{2})[ \t]+[A-Z]{3}[ \t]+[0-9]{3}')
|
||
scanning = 0
|
||
|
||
if normalize:
|
||
print 'countries = {'
|
||
|
||
while 1:
|
||
line = fp.readline()
|
||
if line == '':
|
||
break # EOF
|
||
if scanning:
|
||
mo = cre.match(line)
|
||
if not mo:
|
||
line = line.strip()
|
||
if not line:
|
||
continue
|
||
elif line[0] == '-':
|
||
break
|
||
else:
|
||
print 'Could not parse line:', line
|
||
continue
|
||
country, code = mo.group(1, 2)
|
||
if normalize:
|
||
words = country.split()
|
||
for i in range(len(words)):
|
||
w = words[i]
|
||
# XXX special cases
|
||
if w in ('AND', 'OF', 'OF)', 'name:', 'METROPOLITAN'):
|
||
words[i] = w.lower()
|
||
elif w == 'THE' and i <> 1:
|
||
words[i] = w.lower()
|
||
elif len(w) > 3 and w[1] == "'":
|
||
words[i] = w[0:3].upper() + w[3:].lower()
|
||
elif w in ('(U.S.)', 'U.S.'):
|
||
pass
|
||
elif w[0] == '(' and w <> '(local':
|
||
words[i] = '(' + w[1:].capitalize()
|
||
elif w.find('-') <> -1:
|
||
words[i] = '-'.join(
|
||
[s.capitalize() for s in w.split('-')])
|
||
else:
|
||
words[i] = w.capitalize()
|
||
code = code.lower()
|
||
country = ' '.join(words)
|
||
print ' "%s": "%s",' % (code, country)
|
||
else:
|
||
print code, country
|
||
|
||
elif line[0] == '-':
|
||
scanning = 1
|
||
|
||
if normalize:
|
||
print ' }'
|
||
|
||
|
||
def main():
|
||
help = 0
|
||
status = 0
|
||
dump = 0
|
||
parsefile = None
|
||
normalize = 0
|
||
forcerev = 0
|
||
|
||
try:
|
||
opts, args = getopt.getopt(
|
||
sys.argv[1:],
|
||
'p:rohd',
|
||
['parse=', 'reverse', 'outputdict', 'help', 'dump'])
|
||
except getopt.error, msg:
|
||
usage(1, msg)
|
||
|
||
for opt, arg in opts:
|
||
if opt in ('-h', '--help'):
|
||
help = 1
|
||
elif opt in ('-d', '--dump'):
|
||
dump = 1
|
||
elif opt in ('-p', '--parse'):
|
||
parsefile = arg
|
||
elif opt in ('-o', '--outputdict'):
|
||
normalize = 1
|
||
elif opt in ('-r', '--reverse'):
|
||
forcerev = 1
|
||
|
||
if help:
|
||
usage(status)
|
||
|
||
if dump:
|
||
print 'Non-geographic domains:'
|
||
codes = nameorgs.keys()
|
||
codes.sort()
|
||
for code in codes:
|
||
print ' %4s:' % code, nameorgs[code]
|
||
|
||
print '\nCountry coded domains:'
|
||
codes = countries.keys()
|
||
codes.sort()
|
||
for code in codes:
|
||
print ' %2s:' % code, countries[code]
|
||
elif parsefile:
|
||
parse(parsefile, normalize)
|
||
else:
|
||
if not forcerev:
|
||
args = filter(None, map(resolve, args))
|
||
args = filter(None, map(reverse, args))
|
||
for arg in args:
|
||
print 'Where in the world is %s?' % arg
|
||
|
||
|
||
|
||
# The mappings
|
||
nameorgs = {
|
||
# New top level domains as described by ICANN
|
||
# http://www.icann.org/tlds/
|
||
"aero": "air-transport industry",
|
||
"arpa": "Arpanet",
|
||
"biz": "business",
|
||
"com": "commercial",
|
||
"coop": "cooperatives",
|
||
"edu": "educational",
|
||
"gov": "government",
|
||
"info": "unrestricted `info'",
|
||
"int": "international",
|
||
"mil": "military",
|
||
"museum": "museums",
|
||
"name": "`name' (for registration by individuals)",
|
||
"net": "networking",
|
||
"org": "non-commercial",
|
||
"pro": "professionals",
|
||
# These additional ccTLDs are included here even though they are not part
|
||
# of ISO 3166. IANA has 5 reserved ccTLDs as described here:
|
||
#
|
||
# http://www.iso.org/iso/en/prods-services/iso3166ma/04background-on-iso-3166/iso3166-1-and-ccTLDs.html
|
||
#
|
||
# but I can't find an official list anywhere.
|
||
#
|
||
# Note that `uk' is the common practice country code for the United
|
||
# Kingdom. AFAICT, the official `gb' code is routinely ignored!
|
||
#
|
||
# <D.M.Pick@qmw.ac.uk> tells me that `uk' was long in use before ISO3166
|
||
# was adopted for top-level DNS zone names (although in the reverse order
|
||
# like uk.ac.qmw) and was carried forward (with the reversal) to avoid a
|
||
# large-scale renaming process as the UK switched from their old `Coloured
|
||
# Book' protocols over X.25 to Internet protocols over IP.
|
||
#
|
||
# See <url:ftp://ftp.ripe.net/ripe/docs/ripe-159.txt>
|
||
#
|
||
# Also, `su', while obsolete is still in limited use.
|
||
"ac": "Ascension Island",
|
||
"gg": "Guernsey",
|
||
"im": "Isle of Man",
|
||
"je": "Jersey",
|
||
"uk": "United Kingdom (common practice)",
|
||
"su": "Soviet Union (still in limited use)",
|
||
}
|
||
|
||
|
||
|
||
countries = {
|
||
"af": "Afghanistan",
|
||
"al": "Albania",
|
||
"dz": "Algeria",
|
||
"as": "American Samoa",
|
||
"ad": "Andorra",
|
||
"ao": "Angola",
|
||
"ai": "Anguilla",
|
||
"aq": "Antarctica",
|
||
"ag": "Antigua and Barbuda",
|
||
"ar": "Argentina",
|
||
"am": "Armenia",
|
||
"aw": "Aruba",
|
||
"au": "Australia",
|
||
"at": "Austria",
|
||
"az": "Azerbaijan",
|
||
"bs": "Bahamas",
|
||
"bh": "Bahrain",
|
||
"bd": "Bangladesh",
|
||
"bb": "Barbados",
|
||
"by": "Belarus",
|
||
"be": "Belgium",
|
||
"bz": "Belize",
|
||
"bj": "Benin",
|
||
"bm": "Bermuda",
|
||
"bt": "Bhutan",
|
||
"bo": "Bolivia",
|
||
"ba": "Bosnia and Herzegowina",
|
||
"bw": "Botswana",
|
||
"bv": "Bouvet Island",
|
||
"br": "Brazil",
|
||
"io": "British Indian Ocean Territory",
|
||
"bn": "Brunei Darussalam",
|
||
"bg": "Bulgaria",
|
||
"bf": "Burkina Faso",
|
||
"bi": "Burundi",
|
||
"kh": "Cambodia",
|
||
"cm": "Cameroon",
|
||
"ca": "Canada",
|
||
"cv": "Cape Verde",
|
||
"ky": "Cayman Islands",
|
||
"cf": "Central African Republic",
|
||
"td": "Chad",
|
||
"cl": "Chile",
|
||
"cn": "China",
|
||
"cx": "Christmas Island",
|
||
"cc": "Cocos (Keeling) Islands",
|
||
"co": "Colombia",
|
||
"km": "Comoros",
|
||
"cg": "Congo",
|
||
"cd": "Congo, The Democratic Republic of the",
|
||
"ck": "Cook Islands",
|
||
"cr": "Costa Rica",
|
||
"ci": "Cote D'Ivoire",
|
||
"hr": "Croatia",
|
||
"cu": "Cuba",
|
||
"cy": "Cyprus",
|
||
"cz": "Czech Republic",
|
||
"dk": "Denmark",
|
||
"dj": "Djibouti",
|
||
"dm": "Dominica",
|
||
"do": "Dominican Republic",
|
||
"tp": "East Timor",
|
||
"ec": "Ecuador",
|
||
"eg": "Egypt",
|
||
"sv": "El Salvador",
|
||
"gq": "Equatorial Guinea",
|
||
"er": "Eritrea",
|
||
"ee": "Estonia",
|
||
"et": "Ethiopia",
|
||
"fk": "Falkland Islands (Malvinas)",
|
||
"fo": "Faroe Islands",
|
||
"fj": "Fiji",
|
||
"fi": "Finland",
|
||
"fr": "France",
|
||
"gf": "French Guiana",
|
||
"pf": "French Polynesia",
|
||
"tf": "French Southern Territories",
|
||
"ga": "Gabon",
|
||
"gm": "Gambia",
|
||
"ge": "Georgia",
|
||
"de": "Germany",
|
||
"gh": "Ghana",
|
||
"gi": "Gibraltar",
|
||
"gr": "Greece",
|
||
"gl": "Greenland",
|
||
"gd": "Grenada",
|
||
"gp": "Guadeloupe",
|
||
"gu": "Guam",
|
||
"gt": "Guatemala",
|
||
"gn": "Guinea",
|
||
"gw": "Guinea-Bissau",
|
||
"gy": "Guyana",
|
||
"ht": "Haiti",
|
||
"hm": "Heard Island and Mcdonald Islands",
|
||
"va": "Holy See (Vatican City State)",
|
||
"hn": "Honduras",
|
||
"hk": "Hong Kong",
|
||
"hu": "Hungary",
|
||
"is": "Iceland",
|
||
"in": "India",
|
||
"id": "Indonesia",
|
||
"ir": "Iran, Islamic Republic of",
|
||
"iq": "Iraq",
|
||
"ie": "Ireland",
|
||
"il": "Israel",
|
||
"it": "Italy",
|
||
"jm": "Jamaica",
|
||
"jp": "Japan",
|
||
"jo": "Jordan",
|
||
"kz": "Kazakstan",
|
||
"ke": "Kenya",
|
||
"ki": "Kiribati",
|
||
"kp": "Korea, Democratic People's Republic of",
|
||
"kr": "Korea, Republic of",
|
||
"kw": "Kuwait",
|
||
"kg": "Kyrgyzstan",
|
||
"la": "Lao People's Democratic Republic",
|
||
"lv": "Latvia",
|
||
"lb": "Lebanon",
|
||
"ls": "Lesotho",
|
||
"lr": "Liberia",
|
||
"ly": "Libyan Arab Jamahiriya",
|
||
"li": "Liechtenstein",
|
||
"lt": "Lithuania",
|
||
"lu": "Luxembourg",
|
||
"mo": "Macau",
|
||
"mk": "Macedonia, The Former Yugoslav Republic of",
|
||
"mg": "Madagascar",
|
||
"mw": "Malawi",
|
||
"my": "Malaysia",
|
||
"mv": "Maldives",
|
||
"ml": "Mali",
|
||
"mt": "Malta",
|
||
"mh": "Marshall Islands",
|
||
"mq": "Martinique",
|
||
"mr": "Mauritania",
|
||
"mu": "Mauritius",
|
||
"yt": "Mayotte",
|
||
"mx": "Mexico",
|
||
"fm": "Micronesia, Federated States of",
|
||
"md": "Moldova, Republic of",
|
||
"mc": "Monaco",
|
||
"mn": "Mongolia",
|
||
"ms": "Montserrat",
|
||
"ma": "Morocco",
|
||
"mz": "Mozambique",
|
||
"mm": "Myanmar",
|
||
"na": "Namibia",
|
||
"nr": "Nauru",
|
||
"np": "Nepal",
|
||
"nl": "Netherlands",
|
||
"an": "Netherlands Antilles",
|
||
"nc": "New Caledonia",
|
||
"nz": "New Zealand",
|
||
"ni": "Nicaragua",
|
||
"ne": "Niger",
|
||
"ng": "Nigeria",
|
||
"nu": "Niue",
|
||
"nf": "Norfolk Island",
|
||
"mp": "Northern Mariana Islands",
|
||
"no": "Norway",
|
||
"om": "Oman",
|
||
"pk": "Pakistan",
|
||
"pw": "Palau",
|
||
"ps": "Palestinian Territory, Occupied",
|
||
"pa": "Panama",
|
||
"pg": "Papua New Guinea",
|
||
"py": "Paraguay",
|
||
"pe": "Peru",
|
||
"ph": "Philippines",
|
||
"pn": "Pitcairn",
|
||
"pl": "Poland",
|
||
"pt": "Portugal",
|
||
"pr": "Puerto Rico",
|
||
"qa": "Qatar",
|
||
"re": "Reunion",
|
||
"ro": "Romania",
|
||
"ru": "Russian Federation",
|
||
"rw": "Rwanda",
|
||
"sh": "Saint Helena",
|
||
"kn": "Saint Kitts and Nevis",
|
||
"lc": "Saint Lucia",
|
||
"pm": "Saint Pierre and Miquelon",
|
||
"vc": "Saint Vincent and the Grenadines",
|
||
"ws": "Samoa",
|
||
"sm": "San Marino",
|
||
"st": "Sao Tome and Principe",
|
||
"sa": "Saudi Arabia",
|
||
"sn": "Senegal",
|
||
"sc": "Seychelles",
|
||
"sl": "Sierra Leone",
|
||
"sg": "Singapore",
|
||
"sk": "Slovakia",
|
||
"si": "Slovenia",
|
||
"sb": "Solomon Islands",
|
||
"so": "Somalia",
|
||
"za": "South Africa",
|
||
"gs": "South Georgia and the South Sandwich Islands",
|
||
"es": "Spain",
|
||
"lk": "Sri Lanka",
|
||
"sd": "Sudan",
|
||
"sr": "Suriname",
|
||
"sj": "Svalbard and Jan Mayen",
|
||
"sz": "Swaziland",
|
||
"se": "Sweden",
|
||
"ch": "Switzerland",
|
||
"sy": "Syrian Arab Republic",
|
||
"tw": "Taiwan, Province of China",
|
||
"tj": "Tajikistan",
|
||
"tz": "Tanzania, United Republic of",
|
||
"th": "Thailand",
|
||
"tg": "Togo",
|
||
"tk": "Tokelau",
|
||
"to": "Tonga",
|
||
"tt": "Trinidad and Tobago",
|
||
"tn": "Tunisia",
|
||
"tr": "Turkey",
|
||
"tm": "Turkmenistan",
|
||
"tc": "Turks and Caicos Islands",
|
||
"tv": "Tuvalu",
|
||
"ug": "Uganda",
|
||
"ua": "Ukraine",
|
||
"ae": "United Arab Emirates",
|
||
"gb": "United Kingdom",
|
||
"us": "United States",
|
||
"um": "United States Minor Outlying Islands",
|
||
"uy": "Uruguay",
|
||
"uz": "Uzbekistan",
|
||
"vu": "Vanuatu",
|
||
"ve": "Venezuela",
|
||
"vn": "Viet Nam",
|
||
"vg": "Virgin Islands, British",
|
||
"vi": "Virgin Islands, U.S.",
|
||
"wf": "Wallis and Futuna",
|
||
"eh": "Western Sahara",
|
||
"ye": "Yemen",
|
||
"yu": "Yugoslavia",
|
||
"zm": "Zambia",
|
||
"zw": "Zimbabwe",
|
||
}
|
||
|
||
all = nameorgs.copy()
|
||
all.update(countries)
|
||
|
||
|
||
if __name__ == '__main__':
|
||
main()
|