mirror of https://github.com/python/cpython
Search for Unicode character names using regular expressions.
This commit is contained in:
parent
decc6a47df
commit
f2c47114e7
|
@ -0,0 +1,40 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
"""
|
||||
For each argument on the command line, look for it in the set of all Unicode
|
||||
names. Arguments are treated as case-insensitive regular expressions, e.g.:
|
||||
|
||||
% find-uname 'small letter a$' 'horizontal line'
|
||||
*** small letter a$ matches ***
|
||||
LATIN SMALL LETTER A (97)
|
||||
COMBINING LATIN SMALL LETTER A (867)
|
||||
CYRILLIC SMALL LETTER A (1072)
|
||||
PARENTHESIZED LATIN SMALL LETTER A (9372)
|
||||
CIRCLED LATIN SMALL LETTER A (9424)
|
||||
FULLWIDTH LATIN SMALL LETTER A (65345)
|
||||
*** horizontal line matches ***
|
||||
HORIZONTAL LINE EXTENSION (9135)
|
||||
"""
|
||||
|
||||
import unicodedata
|
||||
import sys
|
||||
import re
|
||||
|
||||
def main(args):
|
||||
unicode_names= []
|
||||
for ix in range(sys.maxunicode+1):
|
||||
try:
|
||||
unicode_names.append( (ix, unicodedata.name(unichr(ix))) )
|
||||
except ValueError: # no name for the character
|
||||
pass
|
||||
for arg in args:
|
||||
pat = re.compile(arg, re.I)
|
||||
matches = [(x,y) for (x,y) in unicode_names
|
||||
if pat.search(y) is not None]
|
||||
if matches:
|
||||
print "***", arg, "matches", "***"
|
||||
for (x,y) in matches:
|
||||
print "%s (%d)" % (y,x)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main(sys.argv[1:])
|
Loading…
Reference in New Issue