Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

debug avx2 slide left issue #1734

Merged
merged 1 commit into from
Jan 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 45 additions & 4 deletions include/eve/detail/shuffle_v2/simd/x86/idxm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,13 @@ x86_permute2f128_one_reg_mask(std::span<const std::ptrdiff_t, 2> _idxs)
constexpr int
x86_blend_immediate_mask(std::span<const std::ptrdiff_t> idxs, std::ptrdiff_t g)
{
int r = 0;
int s = std::ssize(idxs);
int r = 0;
int s = std::ssize(idxs);
int pos = 0;
for(auto i : idxs )
for( auto i : idxs )
{
for (int j = 0; j != g; ++j) {
for( int j = 0; j != g; ++j )
{
// we_ < s
if( i * g >= s ) { r |= 1 << pos; }
++pos;
Expand All @@ -67,4 +68,44 @@ x86_blend_immediate_mask(std::span<const std::ptrdiff_t> idxs, std::ptrdiff_t g)
return r;
}

template<std::ptrdiff_t G, std::size_t N>
constexpr auto
x86_pshuvb_pattern(const std::array<std::ptrdiff_t, N>& idxs);

template<std::ptrdiff_t G, std::size_t N>
constexpr auto
x86_pshuvb_pattern(std::span<const std::ptrdiff_t, N> idxs)
{
if constexpr( G != 1 ) return x86_pshuvb_pattern<1>(expand_group<G>(idxs));
else
{
static_assert(N == 16 || N == 32 || N == 64);
using arr_t = std::array<std::ptrdiff_t, N>;
using res_t = std::optional<arr_t>;

arr_t res = {};
for( std::size_t i = 0; i != N; ++i )
{
std::ptrdiff_t lb = i / 16 * 16;
std::ptrdiff_t ub = lb + 16;
if( idxs[i] < 0 )
{
res[i] = 0xff;
continue;
}
if( idxs[i] < lb || idxs[i] > ub ) return res_t {};
res[i] = idxs[i] - lb;
}

return res_t {res};
}
}

template<std::ptrdiff_t G, std::size_t N>
constexpr auto
x86_pshuvb_pattern(const std::array<std::ptrdiff_t, N>& idxs)
{
return x86_pshuvb_pattern<G>(std::span<const std::ptrdiff_t, N>(idxs));
}

}
34 changes: 11 additions & 23 deletions include/eve/detail/shuffle_v2/simd/x86/shuffle_l3.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,21 +14,10 @@ template<typename N, std::ptrdiff_t... I>
EVE_FORCEINLINE auto
x86_pshuvb(pattern_t<I...>, wide<std::uint8_t, N> x)
{
if constexpr( N() == 16 )
{
wide<std::uint8_t, N> mask {I...};
return _mm_shuffle_epi8(x, mask);
}
else if constexpr( N() == 32 )
{
wide<std::uint8_t, N> mask {I..., I...};
return _mm256_shuffle_epi8(x, mask);
}
else
{
wide<std::uint8_t, N> mask {I..., I..., I..., I...};
return _mm512_shuffle_epi8(x, mask);
}
wide<std::uint8_t, N> mask {I...};
if constexpr( N() == 16 ) return _mm_shuffle_epi8(x, mask);
else if constexpr( N() == 32 ) return _mm256_shuffle_epi8(x, mask);
else return _mm512_shuffle_epi8(x, mask);
}

template<typename P, arithmetic_scalar_value T, typename N, std::ptrdiff_t G>
Expand All @@ -37,16 +26,15 @@ shuffle_l3_x86_pshuvb(P, fixed<G>, wide<T, N> x)
{
if constexpr( current_api < ssse3 ) return no_matching_shuffle;
else if constexpr( current_api == avx && P::reg_size == 32 ) return no_matching_shuffle;
else if constexpr( !P::repeated_16 ) return no_matching_shuffle;
else
{
constexpr auto no_we = idxm::replace_we(*P::repeated_16, 0xff);
constexpr auto no_na = idxm::replace_na(no_we, 0xff);
constexpr auto expanded = idxm::expand_group<P::g_size>(no_na);

using u8xN = wide<std::uint8_t, eve::fixed<P::reg_size>>;

return x86_pshuvb(idxm::to_pattern<expanded>(), eve::bit_cast(x, eve::as<u8xN> {}));
constexpr auto pshuvb_pattern = idxm::x86_pshuvb_pattern<G * sizeof(T)>(P::idxs);
if constexpr (!pshuvb_pattern) return no_matching_shuffle;
else
{
using u8xN = wide<std::uint8_t, eve::fixed<P::reg_size>>;
return x86_pshuvb(idxm::to_pattern<*pshuvb_pattern>(), eve::bit_cast(x, eve::as<u8xN> {}));
}
}
}

Expand Down
2 changes: 1 addition & 1 deletion include/eve/detail/shuffle_v2/simd/x86/shuffle_l4_l5.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ shuffle_l4_l5_x86_put_u64x2_in_position(P, fixed<G>, wide<T, N> x)
if constexpr( P::reg_size < 32 ) return no;
// there is nothing we can do for shorts on avx
else if constexpr( P::reg_size == 32 && P::g_size <= 2 && current_api == avx ) return no;
else if constexpr( P::has_zeroes && current_api <= avx2 ) return no;
else if constexpr( P::has_zeroes && current_api < avx2 ) return no;
else if constexpr( !P::shuffle_16_first ) return no;
else
{
Expand Down
17 changes: 11 additions & 6 deletions test/unit/api/regular/shuffle_v2/idxm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -192,11 +192,11 @@ TTS_CASE("is_repeating_pattern")
no_test(std::array {na_, 0, we_, 3});

// 2 registers [0, 1, 2, 3] [4, 5, 6, 7]
yes_test(std::array{0, 4, 2, 6}, std::array{0, 4});
yes_test(std::array{1, 4, 3, 6}, std::array{1, 4});
yes_test(std::array{1, 4, 3, we_}, std::array{1, 4});
yes_test(std::array{1, we_, 3, 6}, std::array{1, 4});
yes_test(std::array{na_, 4, na_, 6}, std::array{na_, 4});
yes_test(std::array {0, 4, 2, 6}, std::array {0, 4});
yes_test(std::array {1, 4, 3, 6}, std::array {1, 4});
yes_test(std::array {1, 4, 3, we_}, std::array {1, 4});
yes_test(std::array {1, we_, 3, 6}, std::array {1, 4});
yes_test(std::array {na_, 4, na_, 6}, std::array {na_, 4});

no_test(std::array {0, 4, 2, 7});
no_test(std::array {0, 3});
Expand Down Expand Up @@ -837,14 +837,19 @@ TTS_CASE("put bigger group in position")
yes_test(std::array {3, 2, 0, 1}, eve::lane<2>, std::array {1, 0}, std::array {1, 0, 2, 3});
yes_test(std::array {3, 2, na_, 1}, eve::lane<2>, std::array {1, 0}, std::array {1, 0, na_, 3});
yes_test(std::array {3, 2, 3, 2}, eve::lane<2>, std::array {1, 1}, std::array {1, 0, 3, 2});
yes_test(std::array {3, 2, na_, na_}, eve::lane<2>, std::array {1, we_}, std::array {1, 0, na_, na_});
yes_test(
std::array {3, 2, na_, na_}, eve::lane<2>, std::array {1, we_}, std::array {1, 0, na_, na_});
yes_test(std::array {3, 2, 0, 1}, eve::lane<4>, std::array {0}, std::array {3, 2, 0, 1});
yes_test(std::array {3, 2, 0, 1}, eve::lane<4>, std::array {0}, std::array {3, 2, 0, 1});
yes_test(std::array {3, 2, 0, 1}, eve::lane<1>, std::array {3, 2, 0, 1}, std::array {0, 1, 2, 3});
yes_test(std::array {3, 2, 6, 7, 6, 7, 0, 1},
eve::lane<2>,
std::array {1, 3, 3, 0},
std::array {1, 0, 2, 3, 4, 5, 6, 7});
yes_test(std::array {7, na_, na_, na_, na_, na_, na_, na_},
eve::lane<4>,
std::array {1, we_},
std::array {3, na_, na_, na_, na_, na_, na_, na_});

no_test(std::array {3, 0, 0, 1}, eve::lane<2>);
};
Expand Down
9 changes: 4 additions & 5 deletions test/unit/api/regular/shuffle_v2/slide_left_1.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,10 @@ TTS_CASE("Slide left 1, example") {
TTS_CASE("Explicit") {
using w_i = eve::wide<std::uint32_t, eve::fixed<8>>;
w_i x{1, 2, 3, 4, 5, 6, 7, 8};
constexpr auto na_ = eve::na_;
auto [y, l] = eve::shuffle_v2_core(x, eve::lane<4>, eve::pattern<0, na_>);
//auto y = eve::slide_left2(x, eve::index<4>);
//TTS_EQUAL(y, w_i({8, 0, 0, 0, 0, 0, 0, 0}));
TTS_EQUAL(l(), 2);
//constexpr auto na_ = eve::na_;
auto y = eve::slide_left2(x, eve::index<7>);
TTS_EQUAL(y, w_i({8, 0, 0, 0, 0, 0, 0, 0}));
//TTS_EQUAL(l(), 2);
};
#endif

Expand Down