diff --git a/Makefile.pre.in b/Makefile.pre.in index 1f8bd561f61dcd..6ceab34e29d52d 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -1455,6 +1455,15 @@ regen-cases: -i $(srcdir)/Python/bytecodes.c \ -o $(srcdir)/Python/generated_cases.c.h.new $(UPDATE_FILE) $(srcdir)/Python/generated_cases.c.h $(srcdir)/Python/generated_cases.c.h.new + # Regenerate Python/opcode_metadata.h from Python/bytecodes.c + # using Tools/cases_generator/generate_cases.py --metadata + PYTHONPATH=$(srcdir)/Tools/cases_generator \ + $(PYTHON_FOR_REGEN) \ + $(srcdir)/Tools/cases_generator/generate_cases.py \ + --metadata \ + -i $(srcdir)/Python/bytecodes.c \ + -o $(srcdir)/Python/opcode_metadata.h.new + $(UPDATE_FILE) $(srcdir)/Python/opcode_metadata.h $(srcdir)/Python/opcode_metadata.h.new Python/ceval.o: $(srcdir)/Python/opcode_targets.h $(srcdir)/Python/condvar.h $(srcdir)/Python/generated_cases.c.h diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 839fac3fcd1176..4d760bcf8ab359 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -772,7 +772,7 @@ dummy_func( ERROR_IF(w == NULL, error); } - inst(YIELD_VALUE, (retval --)) { + inst(YIELD_VALUE, (retval -- unused)) { // NOTE: It's important that YIELD_VALUE never raises an exception! // The compiler treats any exception raised here as a failed close() // or throw() call. diff --git a/Python/compile.c b/Python/compile.c index cbbdfb9e946772..dc1c49557fd51c 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -35,6 +35,8 @@ #include "pycore_pymem.h" // _PyMem_IsPtrFreed() #include "pycore_symtable.h" // PySTEntryObject +#include "opcode_metadata.h" // _PyOpcode_opcode_metadata + #define DEFAULT_BLOCK_SIZE 16 #define DEFAULT_CODE_SIZE 128 @@ -8668,6 +8670,31 @@ no_redundant_jumps(cfg_builder *g) { return true; } +static bool +opcode_metadata_is_sane(cfg_builder *g) { + for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { + for (int i = 0; i < b->b_iused; i++) { + struct instr *instr = &b->b_instr[i]; + int opcode = instr->i_opcode; + assert(opcode <= MAX_REAL_OPCODE); + int pushed = _PyOpcode_opcode_metadata[opcode].n_pushed; + int popped = _PyOpcode_opcode_metadata[opcode].n_popped; + assert((pushed < 0) == (popped < 0)); + if (pushed >= 0) { + assert(_PyOpcode_opcode_metadata[opcode].valid_entry); + int effect = stack_effect(opcode, instr->i_oparg, -1); + if (effect != pushed - popped) { + fprintf(stderr, + "op=%d: stack_effect (%d) != pushed (%d) - popped (%d)\n", + opcode, effect, pushed, popped); + return false; + } + } + } + } + return true; +} + static bool no_empty_basic_blocks(cfg_builder *g) { for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { @@ -8851,6 +8878,7 @@ assemble(struct compiler *c, int addNone) } assert(no_redundant_jumps(g)); + assert(opcode_metadata_is_sane(g)); /* Can't modify the bytecode after computing jump offsets. */ assemble_jump_offsets(g->g_entryblock); diff --git a/Python/opcode_metadata.h b/Python/opcode_metadata.h new file mode 100644 index 00000000000000..2d539896844d80 --- /dev/null +++ b/Python/opcode_metadata.h @@ -0,0 +1,187 @@ +// This file is generated by Tools/cases_generator/generate_cases.py --metadata +// from Python/bytecodes.c +// Do not edit! +enum Direction { DIR_NONE, DIR_READ, DIR_WRITE }; +static const struct { + short n_popped; + short n_pushed; + enum Direction dir_op1; + enum Direction dir_op2; + enum Direction dir_op3; + bool valid_entry; + char instr_format[10]; +} _PyOpcode_opcode_metadata[256] = { + [NOP] = { 0, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [RESUME] = { 0, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [LOAD_CLOSURE] = { 0, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [LOAD_FAST_CHECK] = { 0, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [LOAD_FAST] = { 0, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [LOAD_CONST] = { 0, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [STORE_FAST] = { 1, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [LOAD_FAST__LOAD_FAST] = { 0, 2, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBIB" }, + [LOAD_FAST__LOAD_CONST] = { 0, 2, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBIB" }, + [STORE_FAST__LOAD_FAST] = { 1, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBIB" }, + [STORE_FAST__STORE_FAST] = { 2, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBIB" }, + [LOAD_CONST__LOAD_FAST] = { 0, 2, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBIB" }, + [POP_TOP] = { 1, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [PUSH_NULL] = { 0, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [END_FOR] = { 2, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [UNARY_POSITIVE] = { 1, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [UNARY_NEGATIVE] = { 1, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [UNARY_NOT] = { 1, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [UNARY_INVERT] = { 1, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [BINARY_OP_MULTIPLY_INT] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC" }, + [BINARY_OP_MULTIPLY_FLOAT] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC" }, + [BINARY_OP_SUBTRACT_INT] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC" }, + [BINARY_OP_SUBTRACT_FLOAT] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC" }, + [BINARY_OP_ADD_UNICODE] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC" }, + [BINARY_OP_INPLACE_ADD_UNICODE] = { 2, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [BINARY_OP_ADD_FLOAT] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC" }, + [BINARY_OP_ADD_INT] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC" }, + [BINARY_SUBSCR] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC000" }, + [BINARY_SLICE] = { 3, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [STORE_SLICE] = { 4, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [BINARY_SUBSCR_LIST_INT] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC000" }, + [BINARY_SUBSCR_TUPLE_INT] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC000" }, + [BINARY_SUBSCR_DICT] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC000" }, + [BINARY_SUBSCR_GETITEM] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC000" }, + [LIST_APPEND] = { 1, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [SET_ADD] = { 1, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [STORE_SUBSCR] = { 3, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC" }, + [STORE_SUBSCR_LIST_INT] = { 3, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC" }, + [STORE_SUBSCR_DICT] = { 3, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC" }, + [DELETE_SUBSCR] = { 2, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [PRINT_EXPR] = { 1, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [RAISE_VARARGS] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [INTERPRETER_EXIT] = { 1, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [RETURN_VALUE] = { 1, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [GET_AITER] = { 1, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [GET_ANEXT] = { 1, 2, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [GET_AWAITABLE] = { 1, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [SEND] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [ASYNC_GEN_WRAP] = { 1, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [YIELD_VALUE] = { 1, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [POP_EXCEPT] = { 1, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [RERAISE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [PREP_RERAISE_STAR] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [END_ASYNC_FOR] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [CLEANUP_THROW] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [STOPITERATION_ERROR] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [LOAD_ASSERTION_ERROR] = { 0, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [LOAD_BUILD_CLASS] = { 0, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [STORE_NAME] = { 1, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [DELETE_NAME] = { 0, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [UNPACK_SEQUENCE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [UNPACK_SEQUENCE_TWO_TUPLE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [UNPACK_SEQUENCE_TUPLE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [UNPACK_SEQUENCE_LIST] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [UNPACK_EX] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [STORE_ATTR] = { 2, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC000" }, + [DELETE_ATTR] = { 1, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [STORE_GLOBAL] = { 1, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [DELETE_GLOBAL] = { 0, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [LOAD_NAME] = { 0, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [LOAD_GLOBAL] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [LOAD_GLOBAL_MODULE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [LOAD_GLOBAL_BUILTIN] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [DELETE_FAST] = { 0, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [MAKE_CELL] = { 0, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [DELETE_DEREF] = { 0, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [LOAD_CLASSDEREF] = { 0, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [LOAD_DEREF] = { 0, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [STORE_DEREF] = { 1, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [COPY_FREE_VARS] = { 0, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [BUILD_STRING] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [BUILD_TUPLE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [BUILD_LIST] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [LIST_TO_TUPLE] = { 1, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [LIST_EXTEND] = { 1, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [SET_UPDATE] = { 1, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [BUILD_SET] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [BUILD_MAP] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [SETUP_ANNOTATIONS] = { 0, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [BUILD_CONST_KEY_MAP] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [DICT_UPDATE] = { 1, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [DICT_MERGE] = { 1, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [MAP_ADD] = { 2, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [LOAD_ATTR] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [LOAD_ATTR_INSTANCE_VALUE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [LOAD_ATTR_MODULE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [LOAD_ATTR_WITH_HINT] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [LOAD_ATTR_SLOT] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [LOAD_ATTR_CLASS] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [LOAD_ATTR_PROPERTY] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [STORE_ATTR_INSTANCE_VALUE] = { 2, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC000" }, + [STORE_ATTR_WITH_HINT] = { 2, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC000" }, + [STORE_ATTR_SLOT] = { 2, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC000" }, + [COMPARE_OP] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC0" }, + [COMPARE_OP_FLOAT_JUMP] = { 3, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC0IB" }, + [COMPARE_OP_INT_JUMP] = { 3, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC0IB" }, + [COMPARE_OP_STR_JUMP] = { 3, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC0IB" }, + [IS_OP] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [CONTAINS_OP] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [CHECK_EG_MATCH] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [CHECK_EXC_MATCH] = { 2, 2, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [IMPORT_NAME] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [IMPORT_STAR] = { 1, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [IMPORT_FROM] = { 1, 2, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [JUMP_FORWARD] = { 0, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [JUMP_BACKWARD] = { 0, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [POP_JUMP_IF_FALSE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [POP_JUMP_IF_TRUE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [POP_JUMP_IF_NOT_NONE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [POP_JUMP_IF_NONE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [JUMP_IF_FALSE_OR_POP] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [JUMP_IF_TRUE_OR_POP] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [JUMP_BACKWARD_NO_INTERRUPT] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [GET_LEN] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [MATCH_CLASS] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [MATCH_MAPPING] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [MATCH_SEQUENCE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [MATCH_KEYS] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [GET_ITER] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [GET_YIELD_FROM_ITER] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [FOR_ITER] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [FOR_ITER_LIST] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [FOR_ITER_TUPLE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [FOR_ITER_RANGE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [FOR_ITER_GEN] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [BEFORE_ASYNC_WITH] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [BEFORE_WITH] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [WITH_EXCEPT_START] = { 4, 5, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [PUSH_EXC_INFO] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [LOAD_ATTR_METHOD_WITH_VALUES] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [LOAD_ATTR_METHOD_WITH_DICT] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [LOAD_ATTR_METHOD_NO_DICT] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [LOAD_ATTR_METHOD_LAZY_DICT] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [CALL_BOUND_METHOD_EXACT_ARGS] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [KW_NAMES] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [CALL] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [CALL_PY_EXACT_ARGS] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [CALL_PY_WITH_DEFAULTS] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [CALL_NO_KW_TYPE_1] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [CALL_NO_KW_STR_1] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [CALL_NO_KW_TUPLE_1] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [CALL_BUILTIN_CLASS] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [CALL_NO_KW_BUILTIN_O] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [CALL_NO_KW_BUILTIN_FAST] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [CALL_BUILTIN_FAST_WITH_KEYWORDS] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [CALL_NO_KW_LEN] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [CALL_NO_KW_ISINSTANCE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [CALL_NO_KW_LIST_APPEND] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [CALL_NO_KW_METHOD_DESCRIPTOR_O] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [CALL_NO_KW_METHOD_DESCRIPTOR_NOARGS] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [CALL_NO_KW_METHOD_DESCRIPTOR_FAST] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [CALL_FUNCTION_EX] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [MAKE_FUNCTION] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [RETURN_GENERATOR] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [BUILD_SLICE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [FORMAT_VALUE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [COPY] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [BINARY_OP] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC" }, + [SWAP] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [EXTENDED_ARG] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, + [CACHE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" }, +}; diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index 5eed74c5e1472b..7452b2ced05254 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -21,6 +21,9 @@ DEFAULT_OUTPUT = os.path.relpath( os.path.join(os.path.dirname(__file__), "../../Python/generated_cases.c.h") ) +DEFAULT_METADATA_OUTPUT = os.path.relpath( + os.path.join(os.path.dirname(__file__), "../../Python/opcode_metadata.h") +) BEGIN_MARKER = "// BEGIN BYTECODES //" END_MARKER = "// END BYTECODES //" RE_PREDICTED = r"^\s*(?:PREDICT\(|GO_TO_INSTRUCTION\(|DEOPT_IF\(.*?,\s*)(\w+)\);\s*$" @@ -37,6 +40,12 @@ arg_parser.add_argument( "-o", "--output", type=str, help="Generated code", default=DEFAULT_OUTPUT ) +arg_parser.add_argument( + "-m", + "--metadata", + action="store_true", + help=f"Generate metadata instead, changes output default to {DEFAULT_METADATA_OUTPUT}", +) class Formatter: @@ -96,6 +105,8 @@ def assign(self, dst: StackEffect, src: StackEffect): cast = self.cast(dst, src) if m := re.match(r"^PEEK\((\d+)\)$", dst.name): self.emit(f"POKE({m.group(1)}, {cast}{src.name});") + elif m := re.match(r"^REG\(oparg(\d+)\)$", dst.name): + self.emit(f"Py_XSETREF({dst.name}, {cast}{src.name});") else: self.emit(f"{dst.name} = {cast}{src.name};") @@ -109,7 +120,8 @@ class Instruction: # Parts of the underlying instruction definition inst: parser.InstDef - kind: typing.Literal["inst", "op"] + register: bool + kind: typing.Literal["inst", "op", "legacy"] # Legacy means no (input -- output) name: str block: parser.Block block_text: list[str] # Block.text, less curlies, less PREDICT() calls @@ -121,6 +133,9 @@ class Instruction: cache_effects: list[parser.CacheEffect] input_effects: list[StackEffect] output_effects: list[StackEffect] + # Parallel to input_effects + input_registers: list[str] = dataclasses.field(repr=False) + output_registers: list[str] = dataclasses.field(repr=False) # Set later family: parser.Family | None = None @@ -129,6 +144,7 @@ class Instruction: def __init__(self, inst: parser.InstDef): self.inst = inst + self.register = inst.register self.kind = inst.kind self.name = inst.name self.block = inst.block @@ -150,9 +166,24 @@ def __init__(self, inst: parser.InstDef): break self.unmoved_names = frozenset(unmoved_names) + def analyze_registers(self, a: "Analyzer") -> None: + regs = iter(("REG(oparg1)", "REG(oparg2)", "REG(oparg3)")) + try: + self.input_registers = [ + next(regs) for ieff in self.input_effects if ieff.name != UNUSED + ] + self.output_registers = [ + next(regs) for oeff in self.output_effects if oeff.name != UNUSED + ] + except StopIteration: # Running out of registers + a.error( + f"Instruction {self.name} has too many register effects", node=self.inst + ) + def write(self, out: Formatter) -> None: """Write one instruction, sans prologue and epilogue.""" # Write a static assertion that a family's cache size is correct + if family := self.family: if self.name == family.members[0]: if cache_size := family.size: @@ -161,10 +192,16 @@ def write(self, out: Formatter) -> None: f'{self.cache_offset}, "incorrect cache size");' ) - # Write input stack effect variable declarations and initializations - for i, ieffect in enumerate(reversed(self.input_effects), 1): - src = StackEffect(f"PEEK({i})", "") - out.declare(ieffect, src) + if not self.register: + # Write input stack effect variable declarations and initializations + for i, ieffect in enumerate(reversed(self.input_effects), 1): + src = StackEffect(f"PEEK({i})", "") + out.declare(ieffect, src) + else: + # Write input register variable declarations and initializations + for ieffect, reg in zip(self.input_effects, self.input_registers): + src = StackEffect(reg, "") + out.declare(ieffect, src) # Write output stack effect variable declarations input_names = {ieffect.name for ieffect in self.input_effects} @@ -172,20 +209,28 @@ def write(self, out: Formatter) -> None: if oeffect.name not in input_names: out.declare(oeffect, None) + # out.emit(f"JUMPBY(OPSIZE({self.inst.name}) - 1);") + self.write_body(out, 0) # Skip the rest if the block always exits if self.always_exits: return - # Write net stack growth/shrinkage - diff = len(self.output_effects) - len(self.input_effects) - out.stack_adjust(diff) + if not self.register: + # Write net stack growth/shrinkage + diff = len(self.output_effects) - len(self.input_effects) + out.stack_adjust(diff) - # Write output stack effect assignments - for i, oeffect in enumerate(reversed(self.output_effects), 1): - if oeffect.name not in self.unmoved_names: - dst = StackEffect(f"PEEK({i})", "") + # Write output stack effect assignments + for i, oeffect in enumerate(reversed(self.output_effects), 1): + if oeffect.name not in self.unmoved_names: + dst = StackEffect(f"PEEK({i})", "") + out.assign(dst, oeffect) + else: + # Write output register assignments + for oeffect, reg in zip(self.output_effects, self.output_registers): + dst = StackEffect(reg, "") out.assign(dst, oeffect) # Write cache effect @@ -209,7 +254,9 @@ def write_body(self, out: Formatter, dedent: int, cache_adjust: int = 0) -> None else: typ = f"uint{bits}_t " func = f"read_u{bits}" - out.emit(f"{typ}{ceffect.name} = {func}(&next_instr[{cache_offset}].cache);") + out.emit( + f"{typ}{ceffect.name} = {func}(&next_instr[{cache_offset}].cache);" + ) cache_offset += ceffect.size assert cache_offset == self.cache_offset + cache_adjust @@ -222,14 +269,17 @@ def write_body(self, out: Formatter, dedent: int, cache_adjust: int = 0) -> None # ERROR_IF() must pop the inputs from the stack. # The code block is responsible for DECREF()ing them. # NOTE: If the label doesn't exist, just add it to ceval.c. - ninputs = len(self.input_effects) - # Don't pop common input/output effects at the bottom! - # These aren't DECREF'ed so they can stay. - for ieff, oeff in zip(self.input_effects, self.output_effects): - if ieff.name == oeff.name: - ninputs -= 1 - else: - break + if not self.register: + ninputs = len(self.input_effects) + # Don't pop common input/output effects at the bottom! + # These aren't DECREF'ed so they can stay. + for ieff, oeff in zip(self.input_effects, self.output_effects): + if ieff.name == oeff.name: + ninputs -= 1 + else: + break + else: + ninputs = 0 if ninputs: out.write_raw( f"{extra}{space}if ({cond}) goto pop_{ninputs}_{label};\n" @@ -237,10 +287,11 @@ def write_body(self, out: Formatter, dedent: int, cache_adjust: int = 0) -> None else: out.write_raw(f"{extra}{space}if ({cond}) goto {label};\n") elif m := re.match(r"(\s*)DECREF_INPUTS\(\);\s*$", line): - space = m.group(1) - for ieff in self.input_effects: - if ieff.name not in self.unmoved_names: - out.write_raw(f"{extra}{space}Py_DECREF({ieff.name});\n") + if not self.register: + space = m.group(1) + for ieff in self.input_effects: + if ieff.name not in self.unmoved_names: + out.write_raw(f"{extra}{space}Py_DECREF({ieff.name});\n") else: out.write_raw(extra + line) @@ -392,6 +443,7 @@ def analyze(self) -> None: self.find_predictions() self.map_families() self.check_families() + self.analyze_register_instrs() self.analyze_supers_and_macros() def find_predictions(self) -> None: @@ -458,6 +510,11 @@ def check_families(self) -> None: family, ) + def analyze_register_instrs(self) -> None: + for instr in self.instrs.values(): + if instr.register: + instr.analyze_registers(self) + def analyze_supers_and_macros(self) -> None: """Analyze each super- and macro instruction.""" self.super_instrs = {} @@ -563,6 +620,129 @@ def stack_analysis( ] return stack, -lowest + def write_metadata(self) -> None: + """Write instruction metadata to output file.""" + with open(self.output_filename, "w") as f: + # Write provenance header + f.write( + f"// This file is generated by {os.path.relpath(__file__)} --metadata\n" + ) + f.write(f"// from {os.path.relpath(self.filename)}\n") + f.write(f"// Do not edit!\n") + + # Create formatter; the rest of the code uses this + self.out = Formatter(f, 0) + + # Write variable definition + self.out.emit("enum Direction { DIR_NONE, DIR_READ, DIR_WRITE };") + self.out.emit("static const struct {") + with self.out.indent(): + self.out.emit("short n_popped;") + self.out.emit("short n_pushed;") + self.out.emit("enum Direction dir_op1;") + self.out.emit("enum Direction dir_op2;") + self.out.emit("enum Direction dir_op3;") + self.out.emit("bool valid_entry;") + self.out.emit("char instr_format[10];") + self.out.emit("} _PyOpcode_opcode_metadata[256] = {") + + # Write metadata for each instruction + for thing in self.everything: + match thing: + case parser.InstDef(): + if thing.kind != "op": + self.write_metadata_for_inst(self.instrs[thing.name]) + case parser.Super(): + self.write_metadata_for_super(self.super_instrs[thing.name]) + case parser.Macro(): + self.write_metadata_for_macro(self.macro_instrs[thing.name]) + case _: + typing.assert_never(thing) + + # Write end of array + self.out.emit("};") + + def get_format(self, thing: Instruction | SuperInstruction | MacroInstruction) -> str: + """Get the format string for a single instruction.""" + def instr_format(instr: Instruction) -> str: + if instr.register: + fmt = "IBBB" + else: + fmt = "IB" + cache = "C" + for ce in instr.cache_effects: + for _ in range(ce.size): + fmt += cache + cache = "0" + return fmt + match thing: + case Instruction(): + format = instr_format(thing) + case SuperInstruction(): + format = "" + for part in thing.parts: + format += instr_format(part.instr) + case MacroInstruction(): + # Macros don't support register instructions yet + format = "IB" + cache = "C" + for part in thing.parts: + if isinstance(part, parser.CacheEffect): + for _ in range(part.size): + format += cache + cache = "0" + else: + assert isinstance(part, Component) + for ce in part.instr.cache_effects: + for _ in range(ce.size): + format += cache + cache = "0" + case _: + typing.assert_never(thing) + assert len(format) < 10 # Else update the size of instr_format above + return format + + def write_metadata_for_inst(self, instr: Instruction) -> None: + """Write metadata for a single instruction.""" + dir_op1 = dir_op2 = dir_op3 = "DIR_NONE" + if instr.kind == "legacy": + n_popped = n_pushed = -1 + assert not instr.register + else: + n_popped = len(instr.input_effects) + n_pushed = len(instr.output_effects) + if instr.register: + directions: list[str] = [] + directions.extend("DIR_READ" for _ in instr.input_effects) + directions.extend("DIR_WRITE" for _ in instr.output_effects) + directions.extend("DIR_NONE" for _ in range(3)) + dir_op1, dir_op2, dir_op3 = directions[:3] + format = self.get_format(instr) + self.out.emit( + f' [{instr.name}] = {{ {n_popped}, {n_pushed}, {dir_op1}, {dir_op2}, {dir_op3}, true, "{format}" }},' + ) + + def write_metadata_for_super(self, sup: SuperInstruction) -> None: + """Write metadata for a super-instruction.""" + n_popped = sum(len(comp.instr.input_effects) for comp in sup.parts) + n_pushed = sum(len(comp.instr.output_effects) for comp in sup.parts) + dir_op1 = dir_op2 = dir_op3 = "DIR_NONE" + format = self.get_format(sup) + self.out.emit( + f' [{sup.name}] = {{ {n_popped}, {n_pushed}, {dir_op1}, {dir_op2}, {dir_op3}, true, "{format}" }},' + ) + + def write_metadata_for_macro(self, mac: MacroInstruction) -> None: + """Write metadata for a macro-instruction.""" + parts = [comp for comp in mac.parts if isinstance(comp, Component)] + n_popped = sum(len(comp.instr.input_effects) for comp in parts) + n_pushed = sum(len(comp.instr.output_effects) for comp in parts) + dir_op1 = dir_op2 = dir_op3 = "DIR_NONE" + format = self.get_format(mac) + self.out.emit( + f' [{mac.name}] = {{ {n_popped}, {n_pushed}, {dir_op1}, {dir_op2}, {dir_op3}, true, "{format}" }},' + ) + def write_instructions(self) -> None: """Write instructions to output file.""" with open(self.output_filename, "w") as f: @@ -571,7 +751,7 @@ def write_instructions(self) -> None: f.write(f"// from {os.path.relpath(self.filename)}\n") f.write(f"// Do not edit!\n") - # Create formatter; the rest of the code uses this. + # Create formatter; the rest of the code uses this self.out = Formatter(f, 8) # Write and count instructions of all kinds @@ -581,7 +761,7 @@ def write_instructions(self) -> None: for thing in self.everything: match thing: case parser.InstDef(): - if thing.kind == "inst": + if thing.kind != "op": n_instrs += 1 self.write_instr(self.instrs[thing.name]) case parser.Super(): @@ -616,9 +796,13 @@ def write_super(self, sup: SuperInstruction) -> None: with self.wrap_super_or_macro(sup): first = True for comp in sup.parts: - if not first: + if first: + pass + # self.out.emit("JUMPBY(OPSIZE(opcode) - 1);") + else: self.out.emit("NEXTOPARG();") self.out.emit("JUMPBY(1);") + # self.out.emit("JUMPBY(OPSIZE(opcode));") first = False comp.write_body(self.out, 0) if comp.instr.cache_offset: @@ -711,12 +895,18 @@ def always_exits(lines: list[str]) -> bool: def main(): """Parse command line, parse input, analyze, write output.""" args = arg_parser.parse_args() # Prints message and sys.exit(2) on error + if args.metadata: + if args.output == DEFAULT_OUTPUT: + args.output = DEFAULT_METADATA_OUTPUT a = Analyzer(args.input, args.output) # Raises OSError if input unreadable a.parse() # Raises SyntaxError on failure a.analyze() # Prints messages and sets a.errors on failure if a.errors: sys.exit(f"Found {a.errors} errors") - a.write_instructions() # Raises OSError if output can't be written + if args.metadata: + a.write_metadata() + else: + a.write_instructions() # Raises OSError if output can't be written if __name__ == "__main__": diff --git a/Tools/cases_generator/parser.py b/Tools/cases_generator/parser.py index d802c733dfd10c..4885394bf6b1e5 100644 --- a/Tools/cases_generator/parser.py +++ b/Tools/cases_generator/parser.py @@ -84,7 +84,8 @@ class OpName(Node): @dataclass class InstHeader(Node): - kind: Literal["inst", "op"] + register: bool + kind: Literal["inst", "op", "legacy"] # Legacy means no (inputs -- outputs) name: str inputs: list[InputEffect] outputs: list[OutputEffect] @@ -92,7 +93,8 @@ class InstHeader(Node): @dataclass class InstDef(Node): - kind: Literal["inst", "op"] + register: bool + kind: Literal["inst", "op", "legacy"] name: str inputs: list[InputEffect] outputs: list[OutputEffect] @@ -134,16 +136,19 @@ def definition(self) -> InstDef | Super | Macro | Family | None: def inst_def(self) -> InstDef | None: if hdr := self.inst_header(): if block := self.block(): - return InstDef(hdr.kind, hdr.name, hdr.inputs, hdr.outputs, block) + return InstDef( + hdr.register, hdr.kind, hdr.name, hdr.inputs, hdr.outputs, block + ) raise self.make_syntax_error("Expected block") return None @contextual def inst_header(self) -> InstHeader | None: # inst(NAME) - # | inst(NAME, (inputs -- outputs)) - # | op(NAME, (inputs -- outputs)) + # | [register] inst(NAME, (inputs -- outputs)) + # | [register] op(NAME, (inputs -- outputs)) # TODO: Make INST a keyword in the lexer. + register = bool(self.expect(lx.REGISTER)) if (tkn := self.expect(lx.IDENTIFIER)) and (kind := tkn.text) in ("inst", "op"): if self.expect(lx.LPAREN) and (tkn := self.expect(lx.IDENTIFIER)): name = tkn.text @@ -151,10 +156,10 @@ def inst_header(self) -> InstHeader | None: inp, outp = self.io_effect() if self.expect(lx.RPAREN): if (tkn := self.peek()) and tkn.kind == lx.LBRACE: - return InstHeader(kind, name, inp, outp) + return InstHeader(register, kind, name, inp, outp) elif self.expect(lx.RPAREN) and kind == "inst": # No legacy stack effect if kind is "op". - return InstHeader(kind, name, [], []) + return InstHeader(register, "legacy", name, [], []) return None def io_effect(self) -> tuple[list[InputEffect], list[OutputEffect]]: