Skip to content

Commit

Permalink
Add dynamic AVX detection for packed GEMM
Browse files Browse the repository at this point in the history
  • Loading branch information
ykim362 committed Jul 10, 2019
1 parent a016611 commit c2074c9
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 3 deletions.
2 changes: 1 addition & 1 deletion src/3rd_party/fbgemm
Submodule fbgemm updated 1 files
+2 −2 src/Utils.cc
10 changes: 8 additions & 2 deletions src/graph/expression_operators.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@
#include "tensors/cpu/int16.h"
#include "tensors/cpu/expanded_gemm.h"

#if USE_FBGEMM
#include "fbgemm/Utils.h"
#endif

namespace marian {

Expr debug(Expr a, const std::string& message) {
Expand Down Expand Up @@ -444,7 +448,8 @@ Expr affine(Expr a, Expr b, Expr bias, bool transA, bool transB, float scale) {
// a combination of contant nodes which is also a constant variable
// when it's computed once.
// Those memoized nodes are cached to avoid duplicated computations.
if(b->memoize()) {
// 07/10/2019 - Use packed GEMM only if the cpu architecture supports AVX2
if(fbgemm::fbgemmHasAvx2Support() && b->memoize()) {
// add packed GEMM algorithm variant (Packed GEMM) to the autotuner
// Once an algorithm is added to the autotuner,
// autotuner runs all the added algorithms for a designated times.
Expand Down Expand Up @@ -538,7 +543,8 @@ Expr affine(Expr a, Expr b, Expr bias, bool transA, bool transB, float scale) {
scale);
} else if(gemmType == GemmType::FbFp16Packed) {
#if USE_FBGEMM
if(b->memoize()) {
// 07/10/2019 - Use packed GEMM only if the cpu architecture supports AVX2
if(fbgemm::fbgemmHasAvx2Support() && b->memoize()) {
auto packed = cpu::variant::pack(b, cpu::variant::PackMatrix::B, transB, clipValue);

return cpu::variant::affine(
Expand Down

0 comments on commit c2074c9

Please sign in to comment.