GH-91719: Make MSVC generate somewhat faster switch code (#91718)

Apparently a switch on an 8-bit quantity where all cases are
present generates a more efficient jump (doing only one indexed
memory load instead of two).

So we make opcode and use_tracing uint8_t, and generate a macro
full of extra `case NNN:` lines for all unused opcodes.

See https://github.com/faster-cpython/ideas/issues/321#issuecomment-1103263673
This commit is contained in:
Guido van Rossum 2022-04-21 11:53:57 -07:00 committed by GitHub
parent d44815cabc
commit f8dc6186d1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 90 additions and 3 deletions

View File

@ -46,7 +46,7 @@ typedef struct _PyCFrame {
* discipline and make sure that instances of this struct cannot
* accessed outside of their lifetime.
*/
int use_tracing;
uint8_t use_tracing; // 0 or 255 (or'ed into opcode, hence 8-bit type)
/* Pointer to the currently executing frame (it can be NULL) */
struct _PyInterpreterFrame *current_frame;
struct _PyCFrame *previous;

78
Include/opcode.h generated
View File

@ -707,6 +707,84 @@ static const char *const _PyOpcode_OpName[256] = {
};
#endif
#define EXTRA_CASES \
case 180: \
case 181: \
case 182: \
case 183: \
case 184: \
case 185: \
case 186: \
case 187: \
case 188: \
case 189: \
case 190: \
case 191: \
case 192: \
case 193: \
case 194: \
case 195: \
case 196: \
case 197: \
case 198: \
case 199: \
case 200: \
case 201: \
case 202: \
case 203: \
case 204: \
case 205: \
case 206: \
case 207: \
case 208: \
case 209: \
case 210: \
case 211: \
case 212: \
case 213: \
case 214: \
case 215: \
case 216: \
case 217: \
case 218: \
case 219: \
case 220: \
case 221: \
case 222: \
case 223: \
case 224: \
case 225: \
case 226: \
case 227: \
case 228: \
case 229: \
case 230: \
case 231: \
case 232: \
case 233: \
case 234: \
case 235: \
case 236: \
case 237: \
case 238: \
case 239: \
case 240: \
case 241: \
case 242: \
case 243: \
case 244: \
case 245: \
case 246: \
case 247: \
case 248: \
case 249: \
case 250: \
case 251: \
case 252: \
case 253: \
case 254: \
;
#define HAS_ARG(op) ((op) >= HAVE_ARGUMENT)
/* Reserve some bytecodes for internal use in the compiler.

View File

@ -1662,7 +1662,9 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
#ifdef Py_STATS
int lastopcode = 0;
#endif
int opcode; /* Current opcode */
// opcode is an 8-bit value to improve the code generated by MSVC
// for the big switch below (in combination with the EXTRA_CASES macro).
uint8_t opcode; /* Current opcode */
int oparg; /* Current opcode argument, if any */
_Py_atomic_int * const eval_breaker = &tstate->interp->ceval.eval_breaker;
@ -5645,7 +5647,7 @@ handle_eval_breaker:
#if USE_COMPUTED_GOTOS
_unknown_opcode:
#else
default:
EXTRA_CASES // From opcode.h, a 'case' for each unused opcode
#endif
fprintf(stderr, "XXX lineno: %d, opcode: %d\n",
_PyInterpreterFrame_GetLine(frame), opcode);

View File

@ -129,6 +129,13 @@ def main(opcode_py, outfile='Include/opcode.h'):
fobj.write("};\n")
fobj.write("#endif\n")
fobj.write("\n")
fobj.write("#define EXTRA_CASES \\\n")
for i, flag in enumerate(used):
if not flag:
fobj.write(f" case {i}: \\\n")
fobj.write(" ;\n")
fobj.write(footer)