diff --git a/Lib/idlelib/IOBinding.py b/Lib/idlelib/IOBinding.py index 496bc431832..58d1913c7e8 100644 --- a/Lib/idlelib/IOBinding.py +++ b/Lib/idlelib/IOBinding.py @@ -6,6 +6,9 @@ # which will only understand the local convention. import os +import types +import sys +import codecs import tempfile import tkFileDialog import tkMessageBox @@ -32,6 +35,71 @@ from configHandler import idleConf #$ win #$ unix +try: + from codecs import BOM_UTF8 +except ImportError: + # only available since Python 2.3 + BOM_UTF8 = '\xef\xbb\xbf' + +# Try setting the locale, so that we can find out +# what encoding to use +try: + import locale + locale.setlocale(locale.LC_CTYPE, "") +except ImportError: + pass + +encoding = "ascii" +if sys.platform == 'win32': + # On Windows, we could use "mbcs". However, to give the user + # a portable encoding name, we need to find the code page + try: + encoding = locale.getdefaultlocale()[1] + codecs.lookup(encoding) + except LookupError: + pass +else: + try: + # Different things can fail here: the locale module may not be + # loaded, it may not offer nl_langinfo, or CODESET, or the + # resulting codeset may be unknown to Python. We ignore all + # these problems, falling back to ASCII + encoding = locale.nl_langinfo(locale.CODESET) + codecs.lookup(encoding) + except (NameError, AttributeError, LookupError): + # Try getdefaultlocale well: it parses environment variables, + # which may give a clue. Unfortunately, getdefaultlocale has + # bugs that can cause ValueError. + try: + encoding = locale.getdefaultlocale()[1] + codecs.lookup(encoding) + except (ValueError, LookupError): + pass + +encoding = encoding.lower() + +coding_re = re.compile("coding[:=]\s*([-\w_.]+)") +def coding_spec(str): + + """Return the encoding declaration according to PEP 263. + Raise LookupError if the encoding is declared but unknown.""" + + # Only consider the first two lines + str = str.split("\n")[:2] + str = "\n".join(str) + + match = coding_re.search(str) + if not match: + return None + name = match.group(1) + # Check whether the encoding is known + import codecs + try: + codecs.lookup(name) + except LookupError: + # The standard encoding error does not indicate the encoding + raise LookupError, "Unknown encoding "+name + return name class IOBinding: @@ -44,6 +112,7 @@ class IOBinding: self.save_as) self.__id_savecopy = self.text.bind("<>", self.save_a_copy) + self.fileencoding = None self.__id_print = self.text.bind("<>", self.print_window) def close(self): @@ -129,8 +198,9 @@ class IOBinding: f.close() except IOError, msg: tkMessageBox.showerror("I/O Error", str(msg), master=self.text) - return 0 + return False + chars = self.decode(chars) # We now convert all end-of-lines to '\n's eol = r"(\r\n)|\n|\r" # \r\n (Windows), \n (UNIX), or \r (Mac) chars = re.compile( eol ).sub( r"\n", chars ) @@ -142,7 +212,55 @@ class IOBinding: self.set_filename(filename) self.text.mark_set("insert", "1.0") self.text.see("insert") - return 1 + return True + + def decode(self, chars): + # Try to create a Unicode string. If that fails, let Tcl try + # its best + + # Check presence of a UTF-8 signature first + if chars.startswith(BOM_UTF8): + try: + chars = chars[3:].decode("utf-8") + except UnicodeError: + # has UTF-8 signature, but fails to decode... + return chars + else: + # Indicates that this file originally had a BOM + self.fileencoding = BOM_UTF8 + return chars + + # Next look for coding specification + try: + enc = coding_spec(chars) + except LookupError, name: + tkMessageBox.showerror( + title="Error loading the file", + message="The encoding '%s' is not known to this Python "\ + "installation. The file may not display correctly" % name, + master = self.text) + enc = None + + if enc: + try: + return unicode(chars, enc) + except UnicodeError: + pass + + # If it is ASCII, we need not to record anything + try: + return unicode(chars, 'ascii') + except UnicodeError: + pass + + # Finally, try the locale's encoding. This is deprecated; + # the user should declare a non-ASCII encoding + try: + chars = unicode(chars, encoding) + self.fileencoding = encoding + except UnicodeError: + pass + return chars def maybesave(self): if self.get_saved(): @@ -190,18 +308,86 @@ class IOBinding: def writefile(self, filename): self.fixlastline() + chars = self.encode(self.text.get("1.0", "end-1c")) try: f = open(filename, "w") - chars = str(self.text.get("1.0", "end-1c")) f.write(chars) f.close() ## print "saved to", `filename` - return 1 + return True except IOError, msg: tkMessageBox.showerror("I/O Error", str(msg), master=self.text) - return 0 + return False + + def encode(self, chars): + if isinstance(chars, types.StringType): + # This is either plain ASCII, or Tk was returning mixed-encoding + # text to us. Don't try to guess further. + return chars + + # See whether there is anything non-ASCII in it. + # If not, no need to figure out the encoding. + try: + return chars.encode('ascii') + except UnicodeError: + pass + + # If there is an encoding declared, try this first. + try: + enc = coding_spec(chars) + failed = None + except LookupError, msg: + failed = msg + enc = None + if enc: + try: + return chars.encode(enc) + except UnicodeError: + failed = "Invalid encoding '%s'" % enc + + if failed: + tkMessageBox.showerror( + "I/O Error", + "%s. Saving as UTF-8" % failed, + master = self.text) + + # If there was a UTF-8 signature, use that. This should not fail + if self.fileencoding == BOM_UTF8 or failed: + return BOM_UTF8 + chars.encode("utf-8") + + # Try the original file encoding next, if any + if self.fileencoding: + try: + return chars.encode(self.fileencoding) + except UnicodeError: + tkMessageBox.showerror( + "I/O Error", + "Cannot save this as '%s' anymore. Saving as UTF-8" \ + % self.fileencoding, + master = self.text) + return BOM_UTF8 + chars.encode("utf-8") + + # Nothing was declared, and we had not determined an encoding + # on loading. Recommend an encoding line. + try: + chars = chars.encode(encoding) + enc = encoding + except UnicodeError: + chars = BOM_UTF8 + chars.encode("utf-8") + enc = "utf-8" + tkMessageBox.showerror( + "I/O Error", + "Non-ASCII found, yet no encoding declared. Add a line like\n" + "# -*- coding: %s -*- \nto your file" % enc, + master = self.text) + return chars + def fixlastline(self): + c = self.text.get("end-2c") + if c != '\n': + self.text.insert("end-1c", "\n") + def print_window(self, event): tempfilename = None if self.get_saved(): @@ -214,7 +400,8 @@ class IOBinding: platform=os.name printPlatform=1 if platform == 'posix': #posix platform - command = idleConf.GetOption('main','General','print-command-posix') + command = idleConf.GetOption('main','General', + 'print-command-posix') command = command + " 2>&1" elif platform == 'nt': #win32 platform command = idleConf.GetOption('main','General','print-command-win') @@ -226,7 +413,8 @@ class IOBinding: output = pipe.read().strip() status = pipe.close() if status: - output = "Printing failed (exit status 0x%x)\n" % status + output + output = "Printing failed (exit status 0x%x)\n" % \ + status + output if output: output = "Printing command: %s\n" % repr(command) + output tkMessageBox.showerror("Print status", output, master=self.text) @@ -235,11 +423,6 @@ class IOBinding: tkMessageBox.showinfo("Print status", message, master=self.text) return "break" - def fixlastline(self): - c = self.text.get("end-2c") - if c != '\n': - self.text.insert("end-1c", "\n") - opendialog = None savedialog = None