Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DrawEngine: Avoid decoding indices when we don't need them. #18586

Merged
merged 1 commit into from
Dec 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 24 additions & 8 deletions GPU/Common/DrawEngineCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -816,6 +816,10 @@ int DrawEngineCommon::ExtendNonIndexedPrim(const uint32_t *cmd, const uint32_t *
_dbg_assert_(numDrawInds_ <= MAX_DEFERRED_DRAW_INDS); // if it's equal, the check below will take care of it before any action is taken.
_dbg_assert_(numDrawVerts_ > 0);

if (!clockwise) {
anyCCWOrIndexed_ = true;
}
int seenPrims = 0;
while (cmd != stall) {
uint32_t data = *cmd;
if ((data & 0xFFF80000) != 0x04000000) {
Expand All @@ -831,6 +835,7 @@ int DrawEngineCommon::ExtendNonIndexedPrim(const uint32_t *cmd, const uint32_t *
DeferredInds &di = drawInds_[numDrawInds_++];
di.indexType = 0;
di.prim = newPrim;
seenPrims |= (1 << newPrim);
di.clockwise = clockwise;
di.vertexCount = vertexCount;
di.vertDecodeIndex = prevDrawVerts;
Expand All @@ -839,6 +844,10 @@ int DrawEngineCommon::ExtendNonIndexedPrim(const uint32_t *cmd, const uint32_t *
cmd++;
}

seenPrims_ |= seenPrims;

_dbg_assert_(cmd != start);

int totalCount = offset - dv.vertexCount;
dv.vertexCount = offset;
dv.indexUpperBound = dv.vertexCount - 1;
Expand Down Expand Up @@ -910,9 +919,16 @@ bool DrawEngineCommon::SubmitPrim(const void *verts, const void *inds, GEPrimiti

DeferredInds &di = drawInds_[numDrawInds_++];
di.inds = inds;
di.indexType = (vertTypeID & GE_VTYPE_IDX_MASK) >> GE_VTYPE_IDX_SHIFT;
int indexType = (vertTypeID & GE_VTYPE_IDX_MASK) >> GE_VTYPE_IDX_SHIFT;
if (indexType) {
anyCCWOrIndexed_ = true;
}
di.indexType = indexType;
di.prim = prim;
di.clockwise = clockwise;
if (!clockwise) {
anyCCWOrIndexed_ = true;
}
di.vertexCount = vertexCount;
di.vertDecodeIndex = numDrawVerts_;
di.offset = 0;
Expand Down Expand Up @@ -942,6 +958,7 @@ bool DrawEngineCommon::SubmitPrim(const void *verts, const void *inds, GEPrimiti
}

vertexCountInDrawCalls_ += vertexCount;
seenPrims_ |= (1 << prim);

if (prim == GE_PRIM_RECTANGLES && (gstate.getTextureAddress(0) & 0x3FFFFFFF) == (gstate.getFrameBufAddress() & 0x3FFFFFFF)) {
// This prevents issues with consecutive self-renders in Ridge Racer.
Expand All @@ -952,6 +969,8 @@ bool DrawEngineCommon::SubmitPrim(const void *verts, const void *inds, GEPrimiti
}

void DrawEngineCommon::DecodeVerts(u8 *dest) {
// Note that this should be able to continue a partial decode - we don't necessarily start from zero here (although we do most of the time).

int i = decodeVertsCounter_;
int stride = (int)dec_->GetDecVtxFmt().stride;
for (; i < numDrawVerts_; i++) {
Expand All @@ -968,7 +987,9 @@ void DrawEngineCommon::DecodeVerts(u8 *dest) {
decodeVertsCounter_ = i;
}

void DrawEngineCommon::DecodeInds() {
int DrawEngineCommon::DecodeInds() {
// Note that this should be able to continue a partial decode - we don't necessarily start from zero here (although we do most of the time).

int i = decodeIndsCounter_;
for (; i < numDrawInds_; i++) {
const DeferredInds &di = drawInds_[i];
Expand All @@ -994,12 +1015,7 @@ void DrawEngineCommon::DecodeInds() {
}
decodeIndsCounter_ = i;

// Sanity check
if (indexGen.Prim() < 0) {
ERROR_LOG_REPORT(G3D, "DecodeVerts: Failed to deduce prim: %i", indexGen.Prim());
// Force to points (0)
indexGen.AddPrim(GE_PRIM_POINTS, 0, 0, true);
}
return indexGen.VertexCount();
}

bool DrawEngineCommon::CanUseHardwareTransform(int prim) {
Expand Down
44 changes: 39 additions & 5 deletions GPU/Common/DrawEngineCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ class DrawEngineCommon {
void UpdatePlanes();

void DecodeVerts(u8 *dest);
void DecodeInds();
int DecodeInds();

// Preprocessing for spline/bezier
u32 NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, int lowerBound, int upperBound, u32 vertType, int *vertexSize = nullptr);
Expand Down Expand Up @@ -202,6 +202,8 @@ class DrawEngineCommon {
vertexCountInDrawCalls_ = 0;
decodeIndsCounter_ = 0;
decodeVertsCounter_ = 0;
seenPrims_ = 0;
anyCCWOrIndexed_ = false;
gstate_c.vertexFullAlpha = true;

// Now seems as good a time as any to reset the min/max coords, which we may examine later.
Expand All @@ -211,6 +213,34 @@ class DrawEngineCommon {
gstate_c.vertBounds.maxV = 0;
}

inline bool CollectedPureDraw() const {
switch (seenPrims_) {
case 1 << GE_PRIM_TRIANGLE_STRIP:
return !anyCCWOrIndexed_ && numDrawInds_ == 1;
case 1 << GE_PRIM_LINES:
case 1 << GE_PRIM_POINTS:
case 1 << GE_PRIM_TRIANGLES:
return !anyCCWOrIndexed_;
default:
return false;
}
}

inline void DecodeIndsAndGetData(GEPrimitiveType *prim, int *numVerts, int *maxIndex, bool *useElements, bool forceIndexed) {
if (!forceIndexed && CollectedPureDraw()) {
*prim = drawInds_[0].prim;
*numVerts = numDecodedVerts_;
*maxIndex = numDecodedVerts_;
*useElements = false;
} else {
int vertexCount = DecodeInds();
*numVerts = vertexCount;
*maxIndex = numDecodedVerts_;
*prim = IndexGenerator::GeneralPrim((GEPrimitiveType)drawInds_[0].prim);
*useElements = true;
}
}

uint32_t ComputeDrawcallsHash() const;

bool useHWTransform_ = false;
Expand All @@ -227,9 +257,7 @@ class DrawEngineCommon {
u16 *decIndex_ = nullptr;

// Cached vertex decoders
u32 lastVType_ = -1; // corresponds to dec_. Could really just pick it out of dec_...
DenseHashMap<u32, VertexDecoder *> decoderMap_;
VertexDecoder *dec_ = nullptr;
VertexDecoderJitCache *decJitCache_ = nullptr;
VertexDecoderOptions decOptions_{};

Expand All @@ -239,18 +267,18 @@ class DrawEngineCommon {
// Defer all vertex decoding to a "Flush" (except when software skinning)
struct DeferredVerts {
const void *verts;
UVScale uvScale;
u32 vertexCount;
u16 indexLowerBound;
u16 indexUpperBound;
UVScale uvScale;
};

struct DeferredInds {
const void *inds;
u32 vertexCount;
u8 vertDecodeIndex; // index into the drawVerts_ array to look up the vertexOffset.
u8 indexType;
s8 prim;
GEPrimitiveType prim;
bool clockwise;
u16 offset;
};
Expand All @@ -261,13 +289,19 @@ class DrawEngineCommon {
uint32_t drawVertexOffsets_[MAX_DEFERRED_DRAW_VERTS];
DeferredInds drawInds_[MAX_DEFERRED_DRAW_INDS];

VertexDecoder *dec_ = nullptr;
u32 lastVType_ = -1; // corresponds to dec_. Could really just pick it out of dec_...
int numDrawVerts_ = 0;
int numDrawInds_ = 0;
int vertexCountInDrawCalls_ = 0;

int decodeVertsCounter_ = 0;
int decodeIndsCounter_ = 0;

int seenPrims_ = 0;
bool anyCCWOrIndexed_ = 0;
bool anyIndexed_ = 0;

// Vertex collector state
IndexGenerator indexGen;
int numDecodedVerts_ = 0;
Expand Down
Loading