Implement PEP 3120.
This commit is contained in:
parent
5de17db361
commit
447d33ead6
|
@ -0,0 +1 @@
|
||||||
|
print("böse")
|
|
@ -0,0 +1,30 @@
|
||||||
|
# This file is marked as binary in the CVS, to prevent MacCVS from recoding it.
|
||||||
|
|
||||||
|
import unittest
|
||||||
|
from test import test_support
|
||||||
|
|
||||||
|
class PEP3120Test(unittest.TestCase):
|
||||||
|
|
||||||
|
def test_pep3120(self):
|
||||||
|
self.assertEqual(
|
||||||
|
"Питон".encode("utf-8"),
|
||||||
|
b'\xd0\x9f\xd0\xb8\xd1\x82\xd0\xbe\xd0\xbd'
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
"\П".encode("utf-8"),
|
||||||
|
b'\\\xd0\x9f'
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_badsyntax(self):
|
||||||
|
try:
|
||||||
|
import test.badsyntax_pep3120
|
||||||
|
except SyntaxError as msg:
|
||||||
|
self.assert_(str(msg).find("Non-UTF-8 code starting with") >= 0)
|
||||||
|
else:
|
||||||
|
self.fail("expected exception didn't occur")
|
||||||
|
|
||||||
|
def test_main():
|
||||||
|
test_support.run_unittest(PEP3120Test)
|
||||||
|
|
||||||
|
if __name__=="__main__":
|
||||||
|
test_main()
|
|
@ -26,6 +26,8 @@ TO DO
|
||||||
Core and Builtins
|
Core and Builtins
|
||||||
-----------------
|
-----------------
|
||||||
|
|
||||||
|
- PEP 3120: Change default encoding to UTF-8.
|
||||||
|
|
||||||
- PEP 3123: Use proper C inheritance for PyObject.
|
- PEP 3123: Use proper C inheritance for PyObject.
|
||||||
|
|
||||||
- Removed the __oct__ and __hex__ special methods and added a bin()
|
- Removed the __oct__ and __hex__ special methods and added a bin()
|
||||||
|
|
|
@ -444,6 +444,34 @@ static void fp_ungetc(int c, struct tok_state *tok) {
|
||||||
ungetc(c, tok->fp);
|
ungetc(c, tok->fp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Check whether the characters at s start a valid
|
||||||
|
UTF-8 sequence. Return the number of characters forming
|
||||||
|
the sequence if yes, 0 if not. */
|
||||||
|
static int valid_utf8(const unsigned char* s)
|
||||||
|
{
|
||||||
|
int expected = 0;
|
||||||
|
int length;
|
||||||
|
if (*s < 0x80)
|
||||||
|
/* single-byte code */
|
||||||
|
return 1;
|
||||||
|
if (*s < 0xc0)
|
||||||
|
/* following byte */
|
||||||
|
return 0;
|
||||||
|
if (*s < 0xE0)
|
||||||
|
expected = 1;
|
||||||
|
else if (*s < 0xF0)
|
||||||
|
expected = 2;
|
||||||
|
else if (*s < 0xF8)
|
||||||
|
expected = 3;
|
||||||
|
else
|
||||||
|
return 0;
|
||||||
|
length = expected + 1;
|
||||||
|
for (; expected; expected--)
|
||||||
|
if (s[expected] < 0x80 || s[expected] >= 0xC0)
|
||||||
|
return 0;
|
||||||
|
return length;
|
||||||
|
}
|
||||||
|
|
||||||
/* Read a line of input from TOK. Determine encoding
|
/* Read a line of input from TOK. Determine encoding
|
||||||
if necessary. */
|
if necessary. */
|
||||||
|
|
||||||
|
@ -478,12 +506,13 @@ decoding_fgets(char *s, int size, struct tok_state *tok)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#ifndef PGEN
|
#ifndef PGEN
|
||||||
/* The default encoding is ASCII, so make sure we don't have any
|
/* The default encoding is UTF-8, so make sure we don't have any
|
||||||
non-ASCII bytes in it. */
|
non-UTF-8 sequences in it. */
|
||||||
if (line && !tok->encoding) {
|
if (line && !tok->encoding) {
|
||||||
unsigned char *c;
|
unsigned char *c;
|
||||||
for (c = (unsigned char *)line; *c; c++)
|
int length;
|
||||||
if (*c > 127) {
|
for (c = (unsigned char *)line; *c; c += length)
|
||||||
|
if (!(length = valid_utf8(c))) {
|
||||||
badchar = *c;
|
badchar = *c;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -493,7 +522,7 @@ decoding_fgets(char *s, int size, struct tok_state *tok)
|
||||||
/* Need to add 1 to the line number, since this line
|
/* Need to add 1 to the line number, since this line
|
||||||
has not been counted, yet. */
|
has not been counted, yet. */
|
||||||
sprintf(buf,
|
sprintf(buf,
|
||||||
"Non-ASCII character '\\x%.2x' "
|
"Non-UTF-8 code starting with '\\x%.2x' "
|
||||||
"in file %.200s on line %i, "
|
"in file %.200s on line %i, "
|
||||||
"but no encoding declared; "
|
"but no encoding declared; "
|
||||||
"see http://www.python.org/peps/pep-0263.html for details",
|
"see http://www.python.org/peps/pep-0263.html for details",
|
||||||
|
|
|
@ -203,7 +203,8 @@ PyAST_FromNode(const node *n, PyCompilerFlags *flags, const char *filename,
|
||||||
c.c_encoding = STR(n);
|
c.c_encoding = STR(n);
|
||||||
n = CHILD(n, 0);
|
n = CHILD(n, 0);
|
||||||
} else {
|
} else {
|
||||||
c.c_encoding = NULL;
|
/* PEP 3120 */
|
||||||
|
c.c_encoding = "utf-8";
|
||||||
}
|
}
|
||||||
c.c_arena = arena;
|
c.c_arena = arena;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue