From c2074c98344ce4ea0da16b36ba293dad3d0f5e4e Mon Sep 17 00:00:00 2001 From: Young Jin Kim Date: Wed, 10 Jul 2019 15:49:35 -0700 Subject: [PATCH] Add dynamic AVX detection for packed GEMM --- src/3rd_party/fbgemm | 2 +- src/graph/expression_operators.cpp | 10 ++++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/3rd_party/fbgemm b/src/3rd_party/fbgemm index bc33ed947..49e8018ab 160000 --- a/src/3rd_party/fbgemm +++ b/src/3rd_party/fbgemm @@ -1 +1 @@ -Subproject commit bc33ed9474b4f944dc04a579c25919778eceb9d5 +Subproject commit 49e8018ab2397c175354317b35c6be6dd68f8932 diff --git a/src/graph/expression_operators.cpp b/src/graph/expression_operators.cpp index 0e762067a..8f1fb54a5 100755 --- a/src/graph/expression_operators.cpp +++ b/src/graph/expression_operators.cpp @@ -9,6 +9,10 @@ #include "tensors/cpu/int16.h" #include "tensors/cpu/expanded_gemm.h" +#if USE_FBGEMM +#include "fbgemm/Utils.h" +#endif + namespace marian { Expr debug(Expr a, const std::string& message) { @@ -444,7 +448,8 @@ Expr affine(Expr a, Expr b, Expr bias, bool transA, bool transB, float scale) { // a combination of contant nodes which is also a constant variable // when it's computed once. // Those memoized nodes are cached to avoid duplicated computations. - if(b->memoize()) { + // 07/10/2019 - Use packed GEMM only if the cpu architecture supports AVX2 + if(fbgemm::fbgemmHasAvx2Support() && b->memoize()) { // add packed GEMM algorithm variant (Packed GEMM) to the autotuner // Once an algorithm is added to the autotuner, // autotuner runs all the added algorithms for a designated times. @@ -538,7 +543,8 @@ Expr affine(Expr a, Expr b, Expr bias, bool transA, bool transB, float scale) { scale); } else if(gemmType == GemmType::FbFp16Packed) { #if USE_FBGEMM - if(b->memoize()) { + // 07/10/2019 - Use packed GEMM only if the cpu architecture supports AVX2 + if(fbgemm::fbgemmHasAvx2Support() && b->memoize()) { auto packed = cpu::variant::pack(b, cpu::variant::PackMatrix::B, transB, clipValue); return cpu::variant::affine(