Search for Unicode character names using regular expressions.

This commit is contained in:
Skip Montanaro 2003-01-01 20:26:47 +00:00
parent decc6a47df
commit f2c47114e7
1 changed files with 40 additions and 0 deletions

View File

@ -0,0 +1,40 @@
#!/usr/bin/env python
"""
For each argument on the command line, look for it in the set of all Unicode
names. Arguments are treated as case-insensitive regular expressions, e.g.:
% find-uname 'small letter a$' 'horizontal line'
*** small letter a$ matches ***
LATIN SMALL LETTER A (97)
COMBINING LATIN SMALL LETTER A (867)
CYRILLIC SMALL LETTER A (1072)
PARENTHESIZED LATIN SMALL LETTER A (9372)
CIRCLED LATIN SMALL LETTER A (9424)
FULLWIDTH LATIN SMALL LETTER A (65345)
*** horizontal line matches ***
HORIZONTAL LINE EXTENSION (9135)
"""
import unicodedata
import sys
import re
def main(args):
unicode_names= []
for ix in range(sys.maxunicode+1):
try:
unicode_names.append( (ix, unicodedata.name(unichr(ix))) )
except ValueError: # no name for the character
pass
for arg in args:
pat = re.compile(arg, re.I)
matches = [(x,y) for (x,y) in unicode_names
if pat.search(y) is not None]
if matches:
print "***", arg, "matches", "***"
for (x,y) in matches:
print "%s (%d)" % (y,x)
if __name__ == "__main__":
main(sys.argv[1:])