Add dynamic AVX detection for packed GEMM

shashwatup9k · Jul 10, 2019 · c2074c9 · c2074c9
1 parent a016611
commit c2074c9
Show file tree

Hide file tree

Showing 2 changed files with 9 additions and 3 deletions.
diff --git a/src/3rd_party/fbgemm b/src/3rd_party/fbgemm
diff --git a/src/graph/expression_operators.cpp b/src/graph/expression_operators.cpp
@@ -9,6 +9,10 @@
 #include "tensors/cpu/int16.h"
 #include "tensors/cpu/expanded_gemm.h"
 
+#if USE_FBGEMM
+#include "fbgemm/Utils.h"
+#endif
+
 namespace marian {
 
 Expr debug(Expr a, const std::string& message) {
@@ -444,7 +448,8 @@ Expr affine(Expr a, Expr b, Expr bias, bool transA, bool transB, float scale) {
       // a combination of contant nodes which is also a constant variable
       // when it's computed once.
       // Those memoized nodes are cached to avoid duplicated computations.
-      if(b->memoize()) {
+      // 07/10/2019 - Use packed GEMM only if the cpu architecture supports AVX2
+      if(fbgemm::fbgemmHasAvx2Support() && b->memoize()) {
         // add packed GEMM algorithm variant (Packed GEMM) to the autotuner
         // Once an algorithm is added to the autotuner,
         // autotuner runs all the added algorithms for a designated times.
@@ -538,7 +543,8 @@ Expr affine(Expr a, Expr b, Expr bias, bool transA, bool transB, float scale) {
             scale);
       } else if(gemmType == GemmType::FbFp16Packed) {
 #if USE_FBGEMM
-        if(b->memoize()) {
+        // 07/10/2019 - Use packed GEMM only if the cpu architecture supports AVX2
+        if(fbgemm::fbgemmHasAvx2Support() && b->memoize()) {
           auto packed = cpu::variant::pack(b, cpu::variant::PackMatrix::B, transB, clipValue);
 
           return cpu::variant::affine(