From a2d9c3ced43db5f3cee06dbcc285dbf51db1af2c Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Thu, 11 Jan 2024 11:20:19 -0800 Subject: [PATCH 1/2] Get rid of _PyUOpExecutorObject --- Include/cpython/optimizer.h | 9 ++++++++- Include/internal/pycore_uops.h | 12 ------------ Python/bytecodes.c | 8 ++++---- Python/ceval.c | 3 +-- Python/executor_cases.c.h | 2 +- Python/generated_cases.c.h | 4 ++-- Python/optimizer.c | 23 +++++++++++------------ 7 files changed, 27 insertions(+), 34 deletions(-) diff --git a/Include/cpython/optimizer.h b/Include/cpython/optimizer.h index f077da7ee88456..622b3a0b26e95e 100644 --- a/Include/cpython/optimizer.h +++ b/Include/cpython/optimizer.h @@ -29,10 +29,17 @@ typedef struct { _PyExecutorLinkListNode links; } _PyVMData; +typedef struct { + uint16_t opcode; + uint16_t oparg; + uint32_t target; + uint64_t operand; // A cache entry +} _PyUOpInstruction; + typedef struct _PyExecutorObject { PyObject_VAR_HEAD _PyVMData vm_data; /* Used by the VM, but opaque to the optimizer */ - /* Data needed by the executor goes here, but is opaque to the VM */ + _PyUOpInstruction trace[1]; } _PyExecutorObject; typedef struct _PyOptimizerObject _PyOptimizerObject; diff --git a/Include/internal/pycore_uops.h b/Include/internal/pycore_uops.h index eb10002d34ce51..bd5d5a0d37b175 100644 --- a/Include/internal/pycore_uops.h +++ b/Include/internal/pycore_uops.h @@ -12,18 +12,6 @@ extern "C" { #define _Py_UOP_MAX_TRACE_LENGTH 512 -typedef struct { - uint16_t opcode; - uint16_t oparg; - uint32_t target; - uint64_t operand; // A cache entry -} _PyUOpInstruction; - -typedef struct { - _PyExecutorObject base; - _PyUOpInstruction trace[1]; -} _PyUOpExecutorObject; - #ifdef __cplusplus } #endif diff --git a/Python/bytecodes.c b/Python/bytecodes.c index f53ddae8df985a..d7270ad361f046 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -68,7 +68,7 @@ static size_t jump; static uint16_t invert, counter, index, hint; #define unused 0 // Used in a macro def, can't be static static uint32_t type_version; -static _PyUOpExecutorObject *current_executor; +static _PyExecutorObject *current_executor; static PyObject * dummy_func( @@ -2363,10 +2363,10 @@ dummy_func( CHECK_EVAL_BREAKER(); PyCodeObject *code = _PyFrame_GetCode(frame); - _PyExecutorObject *executor = (_PyExecutorObject *)code->co_executors->executors[oparg&255]; + _PyExecutorObject *executor = code->co_executors->executors[oparg & 255]; if (executor->vm_data.valid) { Py_INCREF(executor); - current_executor = (_PyUOpExecutorObject *)executor; + current_executor = executor; GOTO_TIER_TWO(); } else { @@ -4055,7 +4055,7 @@ dummy_func( op(_CHECK_VALIDITY, (--)) { TIER_TWO_ONLY - DEOPT_IF(!current_executor->base.vm_data.valid); + DEOPT_IF(!current_executor->vm_data.valid); } op(_LOAD_CONST_INLINE_BORROW, (ptr/4 -- value)) { diff --git a/Python/ceval.c b/Python/ceval.c index b3b542f8ddea37..49388cd20377c0 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -25,7 +25,6 @@ #include "pycore_tuple.h" // _PyTuple_ITEMS() #include "pycore_typeobject.h" // _PySuper_Lookup() #include "pycore_uop_ids.h" // Uops -#include "pycore_uops.h" // _PyUOpExecutorObject #include "pycore_pyerrors.h" #include "pycore_dict.h" @@ -739,7 +738,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int } /* State shared between Tier 1 and Tier 2 interpreter */ - _PyUOpExecutorObject *current_executor = NULL; + _PyExecutorObject *current_executor = NULL; /* Local "register" variables. * These are cached values from the frame and code object. */ diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index ea4caa9a97ab39..3ffe4161b0124e 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3393,7 +3393,7 @@ case _CHECK_VALIDITY: { TIER_TWO_ONLY - if (!current_executor->base.vm_data.valid) goto deoptimize; + if (!current_executor->vm_data.valid) goto deoptimize; break; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index e693e3e2560e7b..9aab135b1aedc5 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -2377,10 +2377,10 @@ TIER_ONE_ONLY CHECK_EVAL_BREAKER(); PyCodeObject *code = _PyFrame_GetCode(frame); - _PyExecutorObject *executor = (_PyExecutorObject *)code->co_executors->executors[oparg&255]; + _PyExecutorObject *executor = code->co_executors->executors[oparg & 255]; if (executor->vm_data.valid) { Py_INCREF(executor); - current_executor = (_PyUOpExecutorObject *)executor; + current_executor = executor; GOTO_TIER_TWO(); } else { diff --git a/Python/optimizer.c b/Python/optimizer.c index 28e12dbbf5d78b..7526750fc002b7 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -226,8 +226,8 @@ static PyMethodDef executor_methods[] = { ///////////////////// Experimental UOp Optimizer ///////////////////// static void -uop_dealloc(_PyUOpExecutorObject *self) { - _Py_ExecutorClear((_PyExecutorObject *)self); +uop_dealloc(_PyExecutorObject *self) { + _Py_ExecutorClear(self); PyObject_Free(self); } @@ -238,13 +238,13 @@ _PyUOpName(int index) } static Py_ssize_t -uop_len(_PyUOpExecutorObject *self) +uop_len(_PyExecutorObject *self) { return Py_SIZE(self); } static PyObject * -uop_item(_PyUOpExecutorObject *self, Py_ssize_t index) +uop_item(_PyExecutorObject *self, Py_ssize_t index) { Py_ssize_t len = uop_len(self); if (index < 0 || index >= len) { @@ -282,7 +282,7 @@ PySequenceMethods uop_as_sequence = { PyTypeObject _PyUOpExecutor_Type = { PyVarObject_HEAD_INIT(&PyType_Type, 0) .tp_name = "uop_executor", - .tp_basicsize = offsetof(_PyUOpExecutorObject, trace), + .tp_basicsize = offsetof(_PyExecutorObject, trace), .tp_itemsize = sizeof(_PyUOpInstruction), .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION, .tp_dealloc = (destructor)uop_dealloc, @@ -433,8 +433,7 @@ translate_bytecode_to_trace( } if (opcode == ENTER_EXECUTOR) { - _PyExecutorObject *executor = - (_PyExecutorObject *)code->co_executors->executors[oparg&255]; + _PyExecutorObject *executor = code->co_executors->executors[oparg & 255]; opcode = executor->vm_data.opcode; DPRINTF(2, " * ENTER_EXECUTOR -> %s\n", _PyOpcode_OpName[opcode]); oparg = (oparg & 0xffffff00) | executor->vm_data.oparg; @@ -719,7 +718,7 @@ make_executor_from_uops(_PyUOpInstruction *buffer, _PyBloomFilter *dependencies) { uint32_t used[(_Py_UOP_MAX_TRACE_LENGTH + 31)/32] = { 0 }; int length = compute_used(buffer, used); - _PyUOpExecutorObject *executor = PyObject_NewVar(_PyUOpExecutorObject, &_PyUOpExecutor_Type, length); + _PyExecutorObject *executor = PyObject_NewVar(_PyExecutorObject, &_PyUOpExecutor_Type, length); if (executor == NULL) { return NULL; } @@ -744,7 +743,7 @@ make_executor_from_uops(_PyUOpInstruction *buffer, _PyBloomFilter *dependencies) dest--; } assert(dest == -1); - _Py_ExecutorInit((_PyExecutorObject *)executor, dependencies); + _Py_ExecutorInit(executor, dependencies); #ifdef Py_DEBUG char *python_lltrace = Py_GETENV("PYTHON_LLTRACE"); int lltrace = 0; @@ -763,7 +762,7 @@ make_executor_from_uops(_PyUOpInstruction *buffer, _PyBloomFilter *dependencies) } } #endif - return (_PyExecutorObject *)executor; + return executor; } static int @@ -829,7 +828,7 @@ PyUnstable_Optimizer_NewUOpOptimizer(void) } static void -counter_dealloc(_PyUOpExecutorObject *self) { +counter_dealloc(_PyExecutorObject *self) { PyObject *opt = (PyObject *)self->trace[0].operand; Py_DECREF(opt); uop_dealloc(self); @@ -838,7 +837,7 @@ counter_dealloc(_PyUOpExecutorObject *self) { PyTypeObject _PyCounterExecutor_Type = { PyVarObject_HEAD_INIT(&PyType_Type, 0) .tp_name = "counting_executor", - .tp_basicsize = offsetof(_PyUOpExecutorObject, trace), + .tp_basicsize = offsetof(_PyExecutorObject, trace), .tp_itemsize = sizeof(_PyUOpInstruction), .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION, .tp_dealloc = (destructor)counter_dealloc, From 9080d0fcde91cd6aa17ddf8a7b086af5699f7cab Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Thu, 11 Jan 2024 11:46:48 -0800 Subject: [PATCH 2/2] Get rid of pycore_uops.h --- Include/internal/pycore_optimizer.h | 2 -- Include/internal/pycore_uops.h | 18 ------------------ Makefile.pre.in | 1 - PCbuild/pythoncore.vcxproj | 1 - PCbuild/pythoncore.vcxproj.filters | 3 --- Python/optimizer.c | 15 ++++++++------- Python/optimizer_analysis.c | 1 - 7 files changed, 8 insertions(+), 33 deletions(-) delete mode 100644 Include/internal/pycore_uops.h diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index b052460b44b791..31f30c673f207a 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -8,8 +8,6 @@ extern "C" { # error "this header requires Py_BUILD_CORE define" #endif -#include "pycore_uops.h" // _PyUOpInstruction - int _Py_uop_analyze_and_optimize(PyCodeObject *code, _PyUOpInstruction *trace, int trace_len, int curr_stackentries); diff --git a/Include/internal/pycore_uops.h b/Include/internal/pycore_uops.h deleted file mode 100644 index bd5d5a0d37b175..00000000000000 --- a/Include/internal/pycore_uops.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef Py_INTERNAL_UOPS_H -#define Py_INTERNAL_UOPS_H -#ifdef __cplusplus -extern "C" { -#endif - -#ifndef Py_BUILD_CORE -# error "this header requires Py_BUILD_CORE define" -#endif - -#include "pycore_frame.h" // _PyInterpreterFrame - -#define _Py_UOP_MAX_TRACE_LENGTH 512 - -#ifdef __cplusplus -} -#endif -#endif /* !Py_INTERNAL_UOPS_H */ diff --git a/Makefile.pre.in b/Makefile.pre.in index 15d419b930c181..289ab97666e902 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -1895,7 +1895,6 @@ PYTHON_HEADERS= \ $(srcdir)/Include/internal/pycore_unionobject.h \ $(srcdir)/Include/internal/pycore_unicodeobject.h \ $(srcdir)/Include/internal/pycore_unicodeobject_generated.h \ - $(srcdir)/Include/internal/pycore_uops.h \ $(srcdir)/Include/internal/pycore_uop_metadata.h \ $(srcdir)/Include/internal/pycore_warnings.h \ $(srcdir)/Include/internal/pycore_weakref.h \ diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index a8b753ca489ab7..64738b1bbf235d 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -295,7 +295,6 @@ - diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index 965efa2e3d34b9..b37ca2dfed55ab 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -804,9 +804,6 @@ Include\internal - - Include\internal - Include\internal\mimalloc diff --git a/Python/optimizer.c b/Python/optimizer.c index 2b6db7e20bd4fe..236ae266971d48 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -7,7 +7,6 @@ #include "pycore_optimizer.h" // _Py_uop_analyze_and_optimize() #include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_uop_ids.h" -#include "pycore_uops.h" #include "cpython/optimizer.h" #include #include @@ -17,6 +16,8 @@ #include "pycore_uop_metadata.h" // Uop tables #undef NEED_OPCODE_METADATA +#define UOP_MAX_TRACE_LENGTH 512 + #define MAX_EXECUTORS_SIZE 256 @@ -703,7 +704,7 @@ compute_used(_PyUOpInstruction *buffer, uint32_t *used) { int count = 0; SET_BIT(used, 0); - for (int i = 0; i < _Py_UOP_MAX_TRACE_LENGTH; i++) { + for (int i = 0; i < UOP_MAX_TRACE_LENGTH; i++) { if (!BIT_IS_SET(used, i)) { continue; } @@ -735,7 +736,7 @@ compute_used(_PyUOpInstruction *buffer, uint32_t *used) static _PyExecutorObject * make_executor_from_uops(_PyUOpInstruction *buffer, _PyBloomFilter *dependencies) { - uint32_t used[(_Py_UOP_MAX_TRACE_LENGTH + 31)/32] = { 0 }; + uint32_t used[(UOP_MAX_TRACE_LENGTH + 31)/32] = { 0 }; int length = compute_used(buffer, used); _PyExecutorObject *executor = PyObject_NewVar(_PyExecutorObject, &_PyUOpExecutor_Type, length); if (executor == NULL) { @@ -743,7 +744,7 @@ make_executor_from_uops(_PyUOpInstruction *buffer, _PyBloomFilter *dependencies) } int dest = length - 1; /* Scan backwards, so that we see the destinations of jumps before the jumps themselves. */ - for (int i = _Py_UOP_MAX_TRACE_LENGTH-1; i >= 0; i--) { + for (int i = UOP_MAX_TRACE_LENGTH-1; i >= 0; i--) { if (!BIT_IS_SET(used, i)) { continue; } @@ -794,8 +795,8 @@ uop_optimize( { _PyBloomFilter dependencies; _Py_BloomFilter_Init(&dependencies); - _PyUOpInstruction buffer[_Py_UOP_MAX_TRACE_LENGTH]; - int err = translate_bytecode_to_trace(code, instr, buffer, _Py_UOP_MAX_TRACE_LENGTH, &dependencies); + _PyUOpInstruction buffer[UOP_MAX_TRACE_LENGTH]; + int err = translate_bytecode_to_trace(code, instr, buffer, UOP_MAX_TRACE_LENGTH, &dependencies); if (err <= 0) { // Error or nothing translated return err; @@ -803,7 +804,7 @@ uop_optimize( OPT_STAT_INC(traces_created); char *uop_optimize = Py_GETENV("PYTHONUOPSOPTIMIZE"); if (uop_optimize == NULL || *uop_optimize > '0') { - err = _Py_uop_analyze_and_optimize(code, buffer, _Py_UOP_MAX_TRACE_LENGTH, curr_stackentries); + err = _Py_uop_analyze_and_optimize(code, buffer, UOP_MAX_TRACE_LENGTH, curr_stackentries); if (err < 0) { return -1; } diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 4eb2d9711f5e56..7db51f0d90a453 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -5,7 +5,6 @@ #include "pycore_opcode_utils.h" #include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_uop_metadata.h" -#include "pycore_uops.h" #include "pycore_long.h" #include "cpython/optimizer.h" #include