Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-122417: Implement per-thread heap type refcounts #122418

Merged
merged 6 commits into from
Aug 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Include/cpython/object.h
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,9 @@ typedef struct _heaptypeobject {
PyObject *ht_module;
char *_ht_tpname; // Storage for "tp_name"; see PyType_FromModuleAndSpec
struct _specialization_cache _spec_cache; // For use by the specializer.
#ifdef Py_GIL_DISABLED
Py_ssize_t unique_id; // ID used for thread-local refcounting
#endif
/* here are optional user slots, followed by the members. */
} PyHeapTypeObject;

Expand Down
4 changes: 0 additions & 4 deletions Include/internal/pycore_gc.h
Original file line number Diff line number Diff line change
Expand Up @@ -381,10 +381,6 @@ extern void _PyGC_ClearAllFreeLists(PyInterpreterState *interp);
extern void _Py_ScheduleGC(PyThreadState *tstate);
extern void _Py_RunGC(PyThreadState *tstate);

#ifdef Py_GIL_DISABLED
// gh-117783: Immortalize objects that use deferred reference counting
extern void _PyGC_ImmortalizeDeferredObjects(PyInterpreterState *interp);
#endif

#ifdef __cplusplus
}
Expand Down
2 changes: 2 additions & 0 deletions Include/internal/pycore_interp.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ extern "C" {
#include "pycore_qsbr.h" // struct _qsbr_state
#include "pycore_tstate.h" // _PyThreadStateImpl
#include "pycore_tuple.h" // struct _Py_tuple_state
#include "pycore_typeid.h" // struct _Py_type_id_pool
#include "pycore_typeobject.h" // struct types_state
#include "pycore_unicodeobject.h" // struct _Py_unicode_state
#include "pycore_warnings.h" // struct _warnings_runtime_state
Expand Down Expand Up @@ -220,6 +221,7 @@ struct _is {
#if defined(Py_GIL_DISABLED)
struct _mimalloc_interp_state mimalloc;
struct _brc_state brc; // biased reference counting state
struct _Py_type_id_pool type_ids;
PyMutex weakref_locks[NUM_WEAKREF_LIST_LOCKS];
#endif

Expand Down
72 changes: 71 additions & 1 deletion Include/internal/pycore_object.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,19 @@ extern "C" {
#include "pycore_interp.h" // PyInterpreterState.gc
#include "pycore_pyatomic_ft_wrappers.h" // FT_ATOMIC_STORE_PTR_RELAXED
#include "pycore_pystate.h" // _PyInterpreterState_GET()
#include "pycore_typeid.h" // _PyType_IncrefSlow


#define _Py_IMMORTAL_REFCNT_LOOSE ((_Py_IMMORTAL_REFCNT >> 1) + 1)

// This value is added to `ob_ref_shared` for objects that use deferred
// reference counting so that they are not immediately deallocated when the
// non-deferred reference count drops to zero.
//
// The value is half the maximum shared refcount because the low two bits of
// `ob_ref_shared` are used for flags.
#define _Py_REF_DEFERRED (PY_SSIZE_T_MAX / 8)

// gh-121528, gh-118997: Similar to _Py_IsImmortal() but be more loose when
// comparing the reference count to stay compatible with C extensions built
// with the stable ABI 3.11 or older. Such extensions implement INCREF/DECREF
Expand Down Expand Up @@ -280,6 +289,67 @@ extern PyStatus _PyObject_InitState(PyInterpreterState *interp);
extern void _PyObject_FiniState(PyInterpreterState *interp);
extern bool _PyRefchain_IsTraced(PyInterpreterState *interp, PyObject *obj);

#ifndef Py_GIL_DISABLED
# define _Py_INCREF_TYPE Py_INCREF
# define _Py_DECREF_TYPE Py_DECREF
#else
static inline void
markshannon marked this conversation as resolved.
Show resolved Hide resolved
_Py_INCREF_TYPE(PyTypeObject *type)
{
if (!_PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
assert(_Py_IsImmortal(type));
return;
}

_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
PyHeapTypeObject *ht = (PyHeapTypeObject *)type;

// Unsigned comparison so that `unique_id=-1`, which indicates that
// per-thread refcounting has been disabled on this type, is handled by
// the "else".
if ((size_t)ht->unique_id < (size_t)tstate->types.size) {
# ifdef Py_REF_DEBUG
_Py_INCREF_IncRefTotal();
# endif
_Py_INCREF_STAT_INC();
tstate->types.refcounts[ht->unique_id]++;
}
else {
// The slow path resizes the thread-local refcount array if necessary.
// It handles the unique_id=-1 case to keep the inlinable function smaller.
_PyType_IncrefSlow(ht);
}
}

static inline void
_Py_DECREF_TYPE(PyTypeObject *type)
markshannon marked this conversation as resolved.
Show resolved Hide resolved
{
if (!_PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if (!_PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
if (_Py_IsImmortal(type)) {

Immortal heap types are allowed.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Immortal heap types are fine. They either go through the Py_DECREF(), which is a no-op, or do some unnecessary work with the per-thread refcounts, which is also fine.

The assumption in the assert is that static types are immortal. We already assert that elsewhere (in _Py_NewReference).

We could also write:

if (_Py_IsImmortal(type)) {
    return;
}

assert(_PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE));

That seems a bit less robust to me in cases where the assumptions are violated, but also fine.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Either works. I think they equivalent in terms of checking assumptions.

assert(_Py_IsImmortal(type));
return;
}

_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
PyHeapTypeObject *ht = (PyHeapTypeObject *)type;

// Unsigned comparison so that `unique_id=-1`, which indicates that
// per-thread refcounting has been disabled on this type, is handled by
// the "else".
if ((size_t)ht->unique_id < (size_t)tstate->types.size) {
# ifdef Py_REF_DEBUG
_Py_DECREF_DecRefTotal();
# endif
_Py_DECREF_STAT_INC();
tstate->types.refcounts[ht->unique_id]--;
}
else {
// Directly decref the type if the type id is not assigned or if
// per-thread refcounting has been disabled on this type.
Py_DECREF(type);
}
}
#endif

/* Inline functions trading binary compatibility for speed:
_PyObject_Init() is the fast version of PyObject_Init(), and
_PyObject_InitVar() is the fast version of PyObject_InitVar().
Expand All @@ -291,7 +361,7 @@ _PyObject_Init(PyObject *op, PyTypeObject *typeobj)
assert(op != NULL);
Py_SET_TYPE(op, typeobj);
assert(_PyType_HasFeature(typeobj, Py_TPFLAGS_HEAPTYPE) || _Py_IsImmortalLoose(typeobj));
Py_INCREF(typeobj);
_Py_INCREF_TYPE(typeobj);
_Py_NewReference(op);
}

Expand Down
10 changes: 10 additions & 0 deletions Include/internal/pycore_tstate.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,16 @@ typedef struct _PyThreadStateImpl {
struct _mimalloc_thread_state mimalloc;
struct _Py_freelists freelists;
struct _brc_thread_state brc;
struct {
// The thread-local refcounts for heap type objects
Py_ssize_t *refcounts;

// Size of the refcounts array.
Py_ssize_t size;

// If set, don't use thread-local refcounts
int is_finalized;
} types;
#endif

#if defined(Py_REF_DEBUG) && defined(Py_GIL_DISABLED)
Expand Down
75 changes: 75 additions & 0 deletions Include/internal/pycore_typeid.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#ifndef Py_INTERNAL_TYPEID_H
#define Py_INTERNAL_TYPEID_H
#ifdef __cplusplus
extern "C" {
#endif

#ifndef Py_BUILD_CORE
# error "this header requires Py_BUILD_CORE define"
#endif

#ifdef Py_GIL_DISABLED

// This contains code for allocating unique ids to heap type objects
// and re-using those ids when the type is deallocated.
//
// The type ids are used to implement per-thread reference counts of
// heap type objects to avoid contention on the reference count fields
// of heap type objects. Static type objects are immortal, so contention
// is not an issue for those types.
//
// Type id of -1 is used to indicate a type doesn't use thread-local
// refcounting. This value is used when a type object is finalized by the GC
// and during interpreter shutdown to allow the type object to be
// deallocated promptly when the object's refcount reaches zero.
//
// Each entry implicitly represents a type id based on it's offset in the
// table. Non-allocated entries form a free-list via the 'next' pointer.
// Allocated entries store the corresponding PyTypeObject.
typedef union _Py_type_id_entry {
// Points to the next free type id, when part of the freelist
union _Py_type_id_entry *next;

// Stores the type object when the id is assigned
PyHeapTypeObject *type;
} _Py_type_id_entry;

struct _Py_type_id_pool {
PyMutex mutex;

// combined table of types with allocated type ids and unallocated
// type ids.
_Py_type_id_entry *table;

// Next entry to allocate inside 'table' or NULL
_Py_type_id_entry *freelist;

// size of 'table'
Py_ssize_t size;
};

// Assigns the next id from the pool of type ids.
extern void _PyType_AssignId(PyHeapTypeObject *type);

// Releases the allocated type id back to the pool.
extern void _PyType_ReleaseId(PyHeapTypeObject *type);

// Merges the thread-local reference counts into the corresponding types.
extern void _PyType_MergeThreadLocalRefcounts(_PyThreadStateImpl *tstate);

// Like _PyType_MergeThreadLocalRefcounts, but also frees the thread-local
// array of refcounts.
extern void _PyType_FinalizeThreadLocalRefcounts(_PyThreadStateImpl *tstate);

// Frees the interpreter's pool of type ids.
extern void _PyType_FinalizeIdPool(PyInterpreterState *interp);

// Increfs the type, resizing the thread-local refcount array if necessary.
PyAPI_FUNC(void) _PyType_IncrefSlow(PyHeapTypeObject *type);

#endif /* Py_GIL_DISABLED */

#ifdef __cplusplus
}
#endif
#endif /* !Py_INTERNAL_TYPEID_H */
4 changes: 3 additions & 1 deletion Lib/test/test_sys.py
Original file line number Diff line number Diff line change
Expand Up @@ -1710,6 +1710,7 @@ def delx(self): del self.__x
fmt = 'P2nPI13Pl4Pn9Pn12PIPc'
s = vsize(fmt)
check(int, s)
typeid = 'n' if support.Py_GIL_DISABLED else ''
# class
s = vsize(fmt + # PyTypeObject
'4P' # PyAsyncMethods
Expand All @@ -1718,7 +1719,8 @@ def delx(self): del self.__x
'10P' # PySequenceMethods
'2P' # PyBufferProcs
'6P'
'1PIP' # Specializer cache
'1PIP' # Specializer cache
+ typeid # heap type id (free-threaded only)
)
class newstyleclass(object): pass
# Separate block for PyDictKeysObject with 8 keys and 5 entries
Expand Down
2 changes: 2 additions & 0 deletions Makefile.pre.in
Original file line number Diff line number Diff line change
Expand Up @@ -483,6 +483,7 @@ PYTHON_OBJS= \
Python/thread.o \
Python/traceback.o \
Python/tracemalloc.o \
Python/typeid.o \
Python/getopt.o \
Python/pystrcmp.o \
Python/pystrtod.o \
Expand Down Expand Up @@ -1257,6 +1258,7 @@ PYTHON_HEADERS= \
$(srcdir)/Include/internal/pycore_tracemalloc.h \
$(srcdir)/Include/internal/pycore_tstate.h \
$(srcdir)/Include/internal/pycore_tuple.h \
$(srcdir)/Include/internal/pycore_typeid.h \
$(srcdir)/Include/internal/pycore_typeobject.h \
$(srcdir)/Include/internal/pycore_typevarobject.h \
$(srcdir)/Include/internal/pycore_ucnhash.h \
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
In the free-threaded build, the reference counts for heap type objects are now
partially stored in a distributed manner in per-thread arrays. This reduces
contention on the heap type's reference count fields when creating or
destroying instances of the same type from multiple threads concurrently.
10 changes: 1 addition & 9 deletions Objects/object.c
Original file line number Diff line number Diff line change
Expand Up @@ -2470,15 +2470,7 @@ _PyObject_SetDeferredRefcount(PyObject *op)
assert(_Py_IsOwnedByCurrentThread(op));
assert(op->ob_ref_shared == 0);
_PyObject_SET_GC_BITS(op, _PyGC_BITS_DEFERRED);
PyInterpreterState *interp = _PyInterpreterState_GET();
if (_Py_atomic_load_int_relaxed(&interp->gc.immortalize) == 1) {
// gh-117696: immortalize objects instead of using deferred reference
// counting for now.
_Py_SetImmortal(op);
return;
}
op->ob_ref_local += 1;
op->ob_ref_shared = _Py_REF_QUEUED;
op->ob_ref_shared = _Py_REF_SHARED(_Py_REF_DEFERRED, 0);
#endif
}

Expand Down
16 changes: 13 additions & 3 deletions Objects/typeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -2452,7 +2452,7 @@ subtype_dealloc(PyObject *self)
reference counting. Only decref if the base type is not already a heap
allocated type. Otherwise, basedealloc should have decref'd it already */
if (type_needs_decref) {
Py_DECREF(type);
_Py_DECREF_TYPE(type);
}

/* Done */
Expand Down Expand Up @@ -2562,7 +2562,7 @@ subtype_dealloc(PyObject *self)
reference counting. Only decref if the base type is not already a heap
allocated type. Otherwise, basedealloc should have decref'd it already */
if (type_needs_decref) {
Py_DECREF(type);
_Py_DECREF_TYPE(type);
}

endlabel:
Expand Down Expand Up @@ -3913,7 +3913,9 @@ type_new_alloc(type_new_ctx *ctx)
et->ht_module = NULL;
et->_ht_tpname = NULL;

_PyObject_SetDeferredRefcount((PyObject *)et);
#ifdef Py_GIL_DISABLED
_PyType_AssignId(et);
#endif

return type;
}
Expand Down Expand Up @@ -4965,6 +4967,11 @@ _PyType_FromMetaclass_impl(
type->tp_weaklistoffset = weaklistoffset;
type->tp_dictoffset = dictoffset;

#ifdef Py_GIL_DISABLED
// Assign a type id to enable thread-local refcounting
_PyType_AssignId(res);
#endif

/* Ready the type (which includes inheritance).
*
* After this call we should generally only touch up what's
Expand Down Expand Up @@ -5914,6 +5921,9 @@ type_dealloc(PyObject *self)
}
Py_XDECREF(et->ht_module);
PyMem_Free(et->_ht_tpname);
#ifdef Py_GIL_DISABLED
_PyType_ReleaseId(et);
#endif
Py_TYPE(type)->tp_free((PyObject *)type);
}

Expand Down
1 change: 1 addition & 0 deletions PCbuild/_freeze_module.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,7 @@
<ClCompile Include="..\Python\thread.c" />
<ClCompile Include="..\Python\traceback.c" />
<ClCompile Include="..\Python\tracemalloc.c" />
<ClCompile Include="..\Python\typeid.c" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\PC\pyconfig.h.in" />
Expand Down
3 changes: 3 additions & 0 deletions PCbuild/_freeze_module.vcxproj.filters
Original file line number Diff line number Diff line change
Expand Up @@ -464,6 +464,9 @@
<ClCompile Include="..\Python\tracemalloc.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\Python\typeid.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\Objects\tupleobject.c">
<Filter>Source Files</Filter>
</ClCompile>
Expand Down
2 changes: 2 additions & 0 deletions PCbuild/pythoncore.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,7 @@
<ClInclude Include="..\Include\internal\pycore_tracemalloc.h" />
<ClInclude Include="..\Include\internal\pycore_tstate.h" />
<ClInclude Include="..\Include\internal\pycore_tuple.h" />
<ClInclude Include="..\Include\internal\pycore_typeid.h" />
<ClInclude Include="..\Include\internal\pycore_typeobject.h" />
<ClInclude Include="..\Include\internal\pycore_typevarobject.h" />
<ClInclude Include="..\Include\internal\pycore_ucnhash.h" />
Expand Down Expand Up @@ -643,6 +644,7 @@
<ClCompile Include="..\Python\thread.c" />
<ClCompile Include="..\Python\traceback.c" />
<ClCompile Include="..\Python\tracemalloc.c" />
<ClCompile Include="..\Python\typeid.c" />
</ItemGroup>
<ItemGroup Condition="$(IncludeExternals)">
<ClCompile Include="..\Modules\zlibmodule.c" />
Expand Down
6 changes: 6 additions & 0 deletions PCbuild/pythoncore.vcxproj.filters
Original file line number Diff line number Diff line change
Expand Up @@ -831,6 +831,9 @@
<ClInclude Include="..\Include\internal\pycore_tuple.h">
<Filter>Include\internal</Filter>
</ClInclude>
<ClInclude Include="..\Include\internal\pycore_typeid.h">
<Filter>Include\internal</Filter>
</ClInclude>
<ClInclude Include="..\Include\internal\pycore_typeobject.h">
<Filter>Include\internal</Filter>
</ClInclude>
Expand Down Expand Up @@ -1493,6 +1496,9 @@
<ClCompile Include="..\Python\tracemalloc.c">
<Filter>Python</Filter>
</ClCompile>
<ClCompile Include="..\Python\typeid.c">
<Filter>Python</Filter>
</ClCompile>
<ClCompile Include="..\Python\bootstrap_hash.c">
<Filter>Python</Filter>
</ClCompile>
Expand Down
Loading
Loading