Add new primitives: compute_in|out_degrees, compute_in|out_weight_sum…

…s to graph_view_t (#1394) Close #1208 - [x] add compute_in|out_degrees, compute_in|out_weight_sums - [x] replace PageRank's custom code to compute out-weight-sums to use graph_view_t's compute_out_weight_sums - [x] add SG C++ tests Authors: - Seunghwa Kang (@seunghwak) Approvers: - Chuck Hastings (@ChuckHastings) - Alex Fender (@afender) URL: #1394
rapidsai · Feb 25, 2021 · 5589605 · 5589605
1 parent 99d1328
commit 5589605
Show file tree

Hide file tree

Showing 10 changed files with 637 additions and 26 deletions.
diff --git a/cpp/include/experimental/detail/graph_utils.cuh b/cpp/include/experimental/detail/graph_utils.cuh
@@ -25,6 +25,7 @@
 #include <rmm/device_uvector.hpp>
 
 #include <thrust/sort.h>
+#include <thrust/tabulate.h>
 #include <thrust/transform.h>
 #include <cuco/detail/hash_functions.cuh>
 
@@ -39,7 +40,7 @@ namespace detail {
 // compute the numbers of nonzeros in rows (of the graph adjacency matrix, if store_transposed =
 // false) or columns (of the graph adjacency matrix, if store_transposed = true)
 template <typename vertex_t, typename edge_t>
-rmm::device_uvector<edge_t> compute_major_degree(
+rmm::device_uvector<edge_t> compute_major_degrees(
   raft::handle_t const &handle,
   std::vector<edge_t const *> const &adj_matrix_partition_offsets,
   partition_t<vertex_t> const &partition)
@@ -120,7 +121,7 @@ rmm::device_uvector<edge_t> compute_major_degree(
 // compute the numbers of nonzeros in rows (of the graph adjacency matrix, if store_transposed =
 // false) or columns (of the graph adjacency matrix, if store_transposed = true)
 template <typename vertex_t, typename edge_t>
-rmm::device_uvector<edge_t> compute_major_degree(
+rmm::device_uvector<edge_t> compute_major_degrees(
   raft::handle_t const &handle,
   std::vector<rmm::device_uvector<edge_t>> const &adj_matrix_partition_offsets,
   partition_t<vertex_t> const &partition)
@@ -131,7 +132,22 @@ rmm::device_uvector<edge_t> compute_major_degree(
                  adj_matrix_partition_offsets.end(),
                  tmp_offsets.begin(),
                  [](auto const &offsets) { return offsets.data(); });
-  return compute_major_degree(handle, tmp_offsets, partition);
+  return compute_major_degrees(handle, tmp_offsets, partition);
+}
+
+// compute the numbers of nonzeros in rows (of the graph adjacency matrix, if store_transposed =
+// false) or columns (of the graph adjacency matrix, if store_transposed = true)
+template <typename vertex_t, typename edge_t>
+rmm::device_uvector<edge_t> compute_major_degrees(raft::handle_t const &handle,
+                                                  edge_t const *offsets,
+                                                  vertex_t number_of_vertices)
+{
+  rmm::device_uvector<edge_t> degrees(number_of_vertices, handle.get_stream());
+  thrust::tabulate(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
+                   degrees.begin(),
+                   degrees.end(),
+                   [offsets] __device__(auto i) { return offsets[i + 1] - offsets[i]; });
+  return degrees;
 }
 
 template <typename vertex_t, typename edge_t>

diff --git a/cpp/include/experimental/graph_view.hpp b/cpp/include/experimental/graph_view.hpp
@@ -494,6 +494,12 @@ class graph_view_t<vertex_t,
              : static_cast<weight_t const*>(nullptr);
   }
 
+  rmm::device_uvector<edge_t> compute_in_degrees(raft::handle_t const& handle) const;
+  rmm::device_uvector<edge_t> compute_out_degrees(raft::handle_t const& handle) const;
+
+  rmm::device_uvector<weight_t> compute_in_weight_sums(raft::handle_t const& handle) const;
+  rmm::device_uvector<weight_t> compute_out_weight_sums(raft::handle_t const& handle) const;
+
  private:
   std::vector<edge_t const*> adj_matrix_partition_offsets_{};
   std::vector<vertex_t const*> adj_matrix_partition_indices_{};
@@ -638,6 +644,12 @@ class graph_view_t<vertex_t,
   // private.
   weight_t const* weights() const { return weights_; }
 
+  rmm::device_uvector<edge_t> compute_in_degrees(raft::handle_t const& handle) const;
+  rmm::device_uvector<edge_t> compute_out_degrees(raft::handle_t const& handle) const;
+
+  rmm::device_uvector<weight_t> compute_in_weight_sums(raft::handle_t const& handle) const;
+  rmm::device_uvector<weight_t> compute_out_weight_sums(raft::handle_t const& handle) const;
+
  private:
   edge_t const* offsets_{nullptr};
   vertex_t const* indices_{nullptr};

diff --git a/cpp/include/utilities/shuffle_comm.cuh b/cpp/include/utilities/shuffle_comm.cuh
@@ -69,7 +69,7 @@ rmm::device_uvector<size_t> sort_and_count(raft::comms::comms_t const &comm,
     d_tx_value_counts = std::move(d_counts);
   }
 
-  return std::move(d_tx_value_counts);
+  return d_tx_value_counts;
 }
 
 template <typename VertexIterator, typename ValueIterator, typename KeyToGPUIdOp>
@@ -111,7 +111,7 @@ rmm::device_uvector<size_t> sort_and_count(raft::comms::comms_t const &comm,
     d_tx_value_counts = std::move(d_counts);
   }
 
-  return std::move(d_tx_value_counts);
+  return d_tx_value_counts;
 }
 
 // inline to suppress a complaint about ODR violation

diff --git a/cpp/src/experimental/graph.cu b/cpp/src/experimental/graph.cu
@@ -278,7 +278,7 @@ graph_t<vertex_t, edge_t, weight_t, store_transposed, multi_gpu, std::enable_if_
   // update degree-based segment offsets (to be used for graph analytics kernel optimization)
 
   if (sorted_by_global_degree_within_vertex_partition) {
-    auto degrees = detail::compute_major_degree(
+    auto degrees = detail::compute_major_degrees(
       *(this->get_handle_ptr()), adj_matrix_partition_offsets_, partition_);
 
     // optional expensive checks (part 2/3)

diff --git a/cpp/src/experimental/graph_view.cu b/cpp/src/experimental/graph_view.cu
@@ -17,6 +17,7 @@
 #include <experimental/detail/graph_utils.cuh>
 #include <experimental/graph_view.hpp>
 #include <partition_manager.hpp>
+#include <patterns/copy_v_transform_reduce_in_out_nbr.cuh>
 #include <utilities/error.hpp>
 #include <utilities/host_scalar_comm.cuh>
 
@@ -70,6 +71,83 @@ std::vector<edge_t> update_adj_matrix_partition_edge_counts(
   return adj_matrix_partition_edge_counts;
 }
 
+template <typename vertex_t,
+          typename edge_t,
+          typename weight_t,
+          bool store_transposed,
+          bool multi_gpu>
+rmm::device_uvector<edge_t> compute_minor_degrees(
+  raft::handle_t const& handle,
+  graph_view_t<vertex_t, edge_t, weight_t, store_transposed, multi_gpu> const& graph_view)
+{
+  rmm::device_uvector<edge_t> minor_degrees(graph_view.get_number_of_local_vertices(),
+                                            handle.get_stream());
+  if (store_transposed) {
+    copy_v_transform_reduce_out_nbr(
+      handle,
+      graph_view,
+      thrust::make_constant_iterator(0) /* dummy */,
+      thrust::make_constant_iterator(0) /* dummy */,
+      [] __device__(vertex_t src, vertex_t dst, weight_t w, auto src_val, auto dst_val) {
+        return edge_t{1};
+      },
+      edge_t{0},
+      minor_degrees.data());
+  } else {
+    copy_v_transform_reduce_in_nbr(
+      handle,
+      graph_view,
+      thrust::make_constant_iterator(0) /* dummy */,
+      thrust::make_constant_iterator(0) /* dummy */,
+      [] __device__(vertex_t src, vertex_t dst, weight_t w, auto src_val, auto dst_val) {
+        return edge_t{1};
+      },
+      edge_t{0},
+      minor_degrees.data());
+  }
+
+  return minor_degrees;
+}
+
+template <bool major,
+          typename vertex_t,
+          typename edge_t,
+          typename weight_t,
+          bool store_transposed,
+          bool multi_gpu>
+rmm::device_uvector<weight_t> compute_weight_sums(
+  raft::handle_t const& handle,
+  graph_view_t<vertex_t, edge_t, weight_t, store_transposed, multi_gpu> const& graph_view)
+{
+  rmm::device_uvector<weight_t> weight_sums(graph_view.get_number_of_local_vertices(),
+                                            handle.get_stream());
+  if (major == store_transposed) {
+    copy_v_transform_reduce_in_nbr(
+      handle,
+      graph_view,
+      thrust::make_constant_iterator(0) /* dummy */,
+      thrust::make_constant_iterator(0) /* dummy */,
+      [] __device__(vertex_t src, vertex_t dst, weight_t w, auto src_val, auto dst_val) {
+        return w;
+      },
+      weight_t{0.0},
+      weight_sums.data());
+  } else {
+    copy_v_transform_reduce_out_nbr(
+      handle,
+      graph_view,
+      thrust::make_constant_iterator(0) /* dummy */,
+      thrust::make_constant_iterator(0) /* dummy */,
+      [] __device__(vertex_t src, vertex_t dst, weight_t w, auto src_val, auto dst_val) {
+        return w;
+      },
+      weight_t{0.0},
+      weight_sums.data());
+  }
+
+  return weight_sums;
+}
+
 }  // namespace
 
 template <typename vertex_t,
@@ -180,7 +258,7 @@ graph_view_t<vertex_t, edge_t, weight_t, store_transposed, multi_gpu, std::enabl
                     "number_of_local_edges.");
 
     if (sorted_by_global_degree_within_vertex_partition) {
-      auto degrees = detail::compute_major_degree(handle, adj_matrix_partition_offsets, partition);
+      auto degrees = detail::compute_major_degrees(handle, adj_matrix_partition_offsets, partition);
       CUGRAPH_EXPECTS(
         thrust::is_sorted(rmm::exec_policy(default_stream)->on(default_stream),
                           degrees.begin(),
@@ -301,6 +379,154 @@ graph_view_t<vertex_t,
   }
 }
 
+template <typename vertex_t,
+          typename edge_t,
+          typename weight_t,
+          bool store_transposed,
+          bool multi_gpu>
+rmm::device_uvector<edge_t>
+graph_view_t<vertex_t, edge_t, weight_t, store_transposed, multi_gpu, std::enable_if_t<multi_gpu>>::
+  compute_in_degrees(raft::handle_t const& handle) const
+{
+  if (store_transposed) {
+    return detail::compute_major_degrees(
+      handle, this->adj_matrix_partition_offsets_, this->partition_);
+  } else {
+    return compute_minor_degrees(handle, *this);
+  }
+}
+
+template <typename vertex_t,
+          typename edge_t,
+          typename weight_t,
+          bool store_transposed,
+          bool multi_gpu>
+rmm::device_uvector<edge_t>
+graph_view_t<vertex_t,
+             edge_t,
+             weight_t,
+             store_transposed,
+             multi_gpu,
+             std::enable_if_t<!multi_gpu>>::compute_in_degrees(raft::handle_t const& handle) const
+{
+  if (store_transposed) {
+    return detail::compute_major_degrees(
+      handle, this->offsets_, this->get_number_of_local_vertices());
+  } else {
+    return compute_minor_degrees(handle, *this);
+  }
+}
+
+template <typename vertex_t,
+          typename edge_t,
+          typename weight_t,
+          bool store_transposed,
+          bool multi_gpu>
+rmm::device_uvector<edge_t>
+graph_view_t<vertex_t, edge_t, weight_t, store_transposed, multi_gpu, std::enable_if_t<multi_gpu>>::
+  compute_out_degrees(raft::handle_t const& handle) const
+{
+  if (store_transposed) {
+    return compute_minor_degrees(handle, *this);
+  } else {
+    return detail::compute_major_degrees(
+      handle, this->adj_matrix_partition_offsets_, this->partition_);
+  }
+}
+
+template <typename vertex_t,
+          typename edge_t,
+          typename weight_t,
+          bool store_transposed,
+          bool multi_gpu>
+rmm::device_uvector<edge_t>
+graph_view_t<vertex_t,
+             edge_t,
+             weight_t,
+             store_transposed,
+             multi_gpu,
+             std::enable_if_t<!multi_gpu>>::compute_out_degrees(raft::handle_t const& handle) const
+{
+  if (store_transposed) {
+    return compute_minor_degrees(handle, *this);
+  } else {
+    return detail::compute_major_degrees(
+      handle, this->offsets_, this->get_number_of_local_vertices());
+  }
+}
+
+template <typename vertex_t,
+          typename edge_t,
+          typename weight_t,
+          bool store_transposed,
+          bool multi_gpu>
+rmm::device_uvector<weight_t>
+graph_view_t<vertex_t, edge_t, weight_t, store_transposed, multi_gpu, std::enable_if_t<multi_gpu>>::
+  compute_in_weight_sums(raft::handle_t const& handle) const
+{
+  if (store_transposed) {
+    return compute_weight_sums<true>(handle, *this);
+  } else {
+    return compute_weight_sums<false>(handle, *this);
+  }
+}
+
+template <typename vertex_t,
+          typename edge_t,
+          typename weight_t,
+          bool store_transposed,
+          bool multi_gpu>
+rmm::device_uvector<weight_t> graph_view_t<
+  vertex_t,
+  edge_t,
+  weight_t,
+  store_transposed,
+  multi_gpu,
+  std::enable_if_t<!multi_gpu>>::compute_in_weight_sums(raft::handle_t const& handle) const
+{
+  if (store_transposed) {
+    return compute_weight_sums<true>(handle, *this);
+  } else {
+    return compute_weight_sums<false>(handle, *this);
+  }
+}
+
+template <typename vertex_t,
+          typename edge_t,
+          typename weight_t,
+          bool store_transposed,
+          bool multi_gpu>
+rmm::device_uvector<weight_t>
+graph_view_t<vertex_t, edge_t, weight_t, store_transposed, multi_gpu, std::enable_if_t<multi_gpu>>::
+  compute_out_weight_sums(raft::handle_t const& handle) const
+{
+  if (store_transposed) {
+    return compute_weight_sums<false>(handle, *this);
+  } else {
+    return compute_weight_sums<true>(handle, *this);
+  }
+}
+
+template <typename vertex_t,
+          typename edge_t,
+          typename weight_t,
+          bool store_transposed,
+          bool multi_gpu>
+rmm::device_uvector<weight_t> graph_view_t<
+  vertex_t,
+  edge_t,
+  weight_t,
+  store_transposed,
+  multi_gpu,
+  std::enable_if_t<!multi_gpu>>::compute_out_weight_sums(raft::handle_t const& handle) const
+{
+  if (store_transposed) {
+    return compute_weight_sums<false>(handle, *this);
+  } else {
+    return compute_weight_sums<true>(handle, *this);
+  }
+}
+
 // explicit instantiation
 
 template class graph_view_t<int32_t, int32_t, float, true, true>;

diff --git a/cpp/src/experimental/pagerank.cu b/cpp/src/experimental/pagerank.cu
@@ -142,23 +142,9 @@ void pagerank(raft::handle_t const& handle,
 
   // 2. compute the sums of the out-going edge weights (if not provided)
 
-  rmm::device_uvector<weight_t> tmp_vertex_out_weight_sums(0, handle.get_stream());
-  if (precomputed_vertex_out_weight_sums == nullptr) {
-    tmp_vertex_out_weight_sums.resize(pull_graph_view.get_number_of_local_vertices(),
-                                      handle.get_stream());
-    // FIXME: better refactor this out (computing out-degree).
-    copy_v_transform_reduce_out_nbr(
-      handle,
-      pull_graph_view,
-      thrust::make_constant_iterator(0) /* dummy */,
-      thrust::make_constant_iterator(0) /* dummy */,
-      [alpha] __device__(vertex_t src, vertex_t dst, weight_t w, auto src_val, auto dst_val) {
-        return w;
-      },
-      weight_t{0.0},
-      tmp_vertex_out_weight_sums.data());
-  }
-
+  auto tmp_vertex_out_weight_sums = precomputed_vertex_out_weight_sums == nullptr
+                                      ? pull_graph_view.compute_out_weight_sums(handle)
+                                      : rmm::device_uvector<weight_t>(0, handle.get_stream());
   auto vertex_out_weight_sums = precomputed_vertex_out_weight_sums != nullptr
                                   ? precomputed_vertex_out_weight_sums
                                   : tmp_vertex_out_weight_sums.data();