From ec014a101a7f6243b95dfc08acfe1542b9fa5d39 Mon Sep 17 00:00:00 2001 From: Sergey Fedoseev Date: Wed, 12 Sep 2018 03:47:59 +0500 Subject: [PATCH] bpo-34636: Use fast path for more chars in SRE category macros. (GH-9170) When handling \s, \d, or \w (and their inverse) escapes in bytes regexes this a small but measurable performance improvement. https://bugs.python.org/issue34636 --- .../next/Library/2018-09-11-15-04-05.bpo-34636.capCmt.rst | 2 ++ Modules/_sre.c | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2018-09-11-15-04-05.bpo-34636.capCmt.rst diff --git a/Misc/NEWS.d/next/Library/2018-09-11-15-04-05.bpo-34636.capCmt.rst b/Misc/NEWS.d/next/Library/2018-09-11-15-04-05.bpo-34636.capCmt.rst new file mode 100644 index 00000000000..c982b0a4cda --- /dev/null +++ b/Misc/NEWS.d/next/Library/2018-09-11-15-04-05.bpo-34636.capCmt.rst @@ -0,0 +1,2 @@ +Speed up re scanning of many non-matching characters for \s \w and \d within +bytes objects. (microoptimization) diff --git a/Modules/_sre.c b/Modules/_sre.c index d67083037e5..483cf5e9ff9 100644 --- a/Modules/_sre.c +++ b/Modules/_sre.c @@ -87,13 +87,13 @@ static const char copyright[] = /* search engine state */ #define SRE_IS_DIGIT(ch)\ - ((ch) < 128 && Py_ISDIGIT(ch)) + ((ch) <= '9' && Py_ISDIGIT(ch)) #define SRE_IS_SPACE(ch)\ - ((ch) < 128 && Py_ISSPACE(ch)) + ((ch) <= ' ' && Py_ISSPACE(ch)) #define SRE_IS_LINEBREAK(ch)\ ((ch) == '\n') #define SRE_IS_WORD(ch)\ - ((ch) < 128 && (Py_ISALNUM(ch) || (ch) == '_')) + ((ch) <= 'z' && (Py_ISALNUM(ch) || (ch) == '_')) static unsigned int sre_lower_ascii(unsigned int ch) {