Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize lighting for softgpu a bit #17295

Merged
merged 5 commits into from
Apr 16, 2023
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
softgpu: Use SSE for lighting ceil if available.
Tiny optimization, helps only a little.
  • Loading branch information
unknownbrackets committed Apr 16, 2023
commit 2868495cf89e09a148a1562b711e8a719fa250db
25 changes: 22 additions & 3 deletions GPU/Software/Lighting.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,25 @@ void GenerateLightST(VertexData &vertex, const WorldCoords &worldnormal) {
vertex.texturecoords.t() = GenerateLightCoord(vertex, worldnormal, gstate.getUVLS1());
}

#if defined(_M_SSE)
#if defined(__GNUC__) || defined(__clang__) || defined(__INTEL_COMPILER)
[[gnu::target("sse4.1")]]
#endif
static inline int LightCeilSSE4(float f) {
__m128 v = _mm_set_ss(f);
// This isn't terribly fast, but seems to be better than calling ceilf().
return _mm_cvt_ss2si(_mm_ceil_ss(v, v));
}
#endif

static inline int LightCeil(float f) {
#if defined(_M_SSE)
if (cpu_info.bSSE4_1)
return LightCeilSSE4(f);
#endif
return (int)ceilf(f);
}

void Process(VertexData &vertex, const WorldCoords &worldpos, const WorldCoords &worldnormal, const State &state) {
// Lighting blending rounds using the half offset method (like alpha blend.)
const Vec4<int> ones = Vec4<int>::AssignToAll(1);
Expand Down Expand Up @@ -250,7 +269,7 @@ void Process(VertexData &vertex, const WorldCoords &worldpos, const WorldCoords

// ambient lighting
if (lstate.ambient) {
int attspot = (int)ceilf(256 * 2 * att * spot + 1);
int attspot = (int)LightCeil(256 * 2 * att * spot + 1);
if (attspot > 512)
attspot = 512;
Vec4<int> lambient = (mac * lstate.ambientColorFactor * attspot) / (1024 * 512);
Expand All @@ -267,7 +286,7 @@ void Process(VertexData &vertex, const WorldCoords &worldpos, const WorldCoords
}

if (lstate.diffuse && diffuse_factor > 0.0f) {
int diffuse_attspot = (int)ceilf(256 * 2 * att * spot * diffuse_factor + 1);
int diffuse_attspot = (int)LightCeil(256 * 2 * att * spot * diffuse_factor + 1);
if (diffuse_attspot > 512)
diffuse_attspot = 512;
Vec4<int> mdc = state.colorForDiffuse ? colorFactor : state.material.diffuseColorFactor;
Expand All @@ -282,7 +301,7 @@ void Process(VertexData &vertex, const WorldCoords &worldpos, const WorldCoords
specular_factor = pspLightPow(specular_factor, state.specularExp);

if (specular_factor > 0.0f) {
int specular_attspot = (int)ceilf(256 * 2 * att * spot * specular_factor + 1);
int specular_attspot = (int)LightCeil(256 * 2 * att * spot * specular_factor + 1);
if (specular_attspot > 512)
specular_attspot = 512;

Expand Down