rapidsai · rapids-bot · Feb 5, 2021 · Jan 7, 2021 · Jan 7, 2021 · Jan 7, 2021
@@ -357,7 +357,7 @@ add_library(cugraph SHARED
     src/community/louvain.cu
     src/community/leiden.cu
     src/community/ktruss.cu
-    src/community/ECG.cu
+    src/community/ecg.cu
     src/community/triangles_counting.cu
     src/community/extract_subgraph_by_vertex.cu
     src/cores/core_number.cu

@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <memory>
+#include <rmm/device_buffer.hpp>
+#include <vector>
+
+namespace cugraph {
+
+template <typename vertex_t>
+class Dendrogram {
+ public:
+  Dendrogram() : level_size_(), level_ptr_() {}
+
+  void add_level(vertex_t num_verts)
+  {
+    cudaStream_t stream{0};
+    rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource();
+
+    level_ptr_.push_back(
+      std::make_unique<rmm::device_buffer>(num_verts * sizeof(vertex_t), stream, mr));
+    level_size_.push_back(num_verts);
+  }
+
+  size_t current_level() const { return level_size_.size() - 1; }
+
+  size_t num_levels() const { return level_size_.size(); }
+
+  vertex_t *get_level_ptr_unsafe(size_t level) const
+  {
+    return static_cast<vertex_t *>(level_ptr_[level]->data());
+  }
+
+  vertex_t get_level_size_unsafe(size_t level) const { return level_size_[level]; }
+
+  vertex_t *current_level_begin() const { return get_level_ptr_unsafe(current_level()); }
+
+  vertex_t *current_level_end() const { return current_level_begin() + current_level_size(); }
+
+  vertex_t current_level_size() const { return get_level_size_unsafe(current_level()); }
+
+ private:
+  std::vector<vertex_t> level_size_;
+  std::vector<std::unique_ptr<rmm::device_buffer>> level_ptr_;
+};
+
+}  // namespace cugraph
@@ -18,10 +18,11 @@
 
 #include <rmm/thrust_rmm_allocator.h>
 #include <thrust/random.h>
+#include <community/louvain.cuh>
 #include <converters/permute_graph.cuh>
 #include <ctime>
 #include <utilities/error.hpp>
-#include "utilities/graph_utils.cuh"
+#include <utilities/graph_utils.cuh>
 
 namespace {
 template <typename IndexType>
@@ -42,25 +43,20 @@ binsearch_maxle(const IndexType *vec, const IndexType val, IndexType low, IndexT
 }
 
 template <typename IdxT, typename ValT>
-__global__ void match_check_kernel(IdxT size,
-                                   IdxT num_verts,
-                                   IdxT *offsets,
-                                   IdxT *indices,
-                                   IdxT *permutation,
-                                   IdxT *parts,
-                                   ValT *weights)
+__global__ void match_check_kernel(
+  IdxT size, IdxT num_verts, IdxT *offsets, IdxT *indices, IdxT *parts, ValT *weights)
 {
   IdxT tid = blockIdx.x * blockDim.x + threadIdx.x;
   while (tid < size) {
     IdxT source = binsearch_maxle(offsets, tid, (IdxT)0, num_verts);
     IdxT dest   = indices[tid];
-    if (parts[permutation[source]] == parts[permutation[dest]]) weights[tid] += 1;
+    if (parts[source] == parts[dest]) weights[tid] += 1;
     tid += gridDim.x * blockDim.x;
   }
 }
 
 struct prg {
-  __host__ __device__ float operator()(int n)
+  __device__ float operator()(int n)
   {
     thrust::default_random_engine rng;
     thrust::uniform_real_distribution<float> dist(0.0, 1.0);
@@ -103,6 +99,31 @@ void get_permutation_vector(T size, T seed, T *permutation, cudaStream_t stream)
     rmm::exec_policy(stream)->on(stream), randoms_v.begin(), randoms_v.end(), permutation);
 }
 
+template <typename graph_type>
+class EcgLouvain : public cugraph::Louvain<graph_type> {
+ public:
+  using graph_t  = graph_type;
+  using vertex_t = typename graph_type::vertex_type;
+  using edge_t   = typename graph_type::edge_type;
+  using weight_t = typename graph_type::weight_type;
+
+  EcgLouvain(raft::handle_t const &handle, graph_type const &graph, vertex_t seed)
+    : cugraph::Louvain<graph_type>(handle, graph), seed_(seed)
+  {
+  }
+
+  void initialize_dendrogram_level(vertex_t num_vertices)
+  {
+    this->dendrogram_->add_level(num_vertices);
+
+    get_permutation_vector(
+      num_vertices, seed_, this->dendrogram_->current_level_begin(), this->stream_);
+  }
+
+ private:
+  vertex_t seed_;
+};
+
 }  // anonymous namespace
 
 namespace cugraph {
@@ -114,37 +135,51 @@ void ecg(raft::handle_t const &handle,
          vertex_t ensemble_size,
          vertex_t *clustering)
 {
+  using graph_type = GraphCSRView<vertex_t, edge_t, weight_t>;
+
   CUGRAPH_EXPECTS(graph.edge_data != nullptr,
                   "Invalid input argument: louvain expects a weighted graph");
   CUGRAPH_EXPECTS(clustering != nullptr, "Invalid input argument: clustering is NULL");
 
+  // TODO:  New idea... rather than creating a permuted graph
+  //     Observe that because of the up/down behavior... the only difference graph ordering
+  //     has on the computation is in deciding what moves in each up/down pass.
+  //     This *should be* equivalent to the following:
+  //        Instead of initializing each vertex to be in a cluster by itself that
+  //        is equal to cluster id, initialize each vertex to be in a cluster by
+  //        itself equal to a random id.  For each random run of a 1-level Louvain,
+  //        the cluster ids would be randomized differently.
+  //
+  //        For MNMG, we could assume an equal distribution across the GPUs and
+  //        generate to a pattern so that we don't need to do any comms.  It wouldn't
+  //        be completely random, but it should result in an appropriate amount of
+  //        randomness to get the desired effect.
+  //
+  //        Note that this would require implementing the highest level Louvain function,
+  //        or splitting the initialization into an overridable function...
+  //
+  //
+  //  MNMG issue... in order to create an MNMG implementation we need to create
+  //    a distributed implementation of get_permutation_vector, preferably without
+  //    comms...
+  //
+
   cudaStream_t stream{0};
 
   rmm::device_vector<weight_t> ecg_weights_v(graph.edge_data,
                                              graph.edge_data + graph.number_of_edges);
 
   vertex_t size{graph.number_of_vertices};
+  // TODO:  This seed should be a parameter
+  // TODO:  MNMG - should add the rank to the seed so every thread is a separate seed
   vertex_t seed{1};
 
-  auto permuted_graph = std::make_unique<GraphCSR<vertex_t, edge_t, weight_t>>(
-    size, graph.number_of_edges, graph.has_data());
-
   // Iterate over each member of the ensemble
   for (vertex_t i = 0; i < ensemble_size; i++) {
-    // Take random permutation of the graph
-    rmm::device_vector<vertex_t> permutation_v(size);
-    vertex_t *d_permutation = permutation_v.data().get();
-
-    get_permutation_vector(size, seed, d_permutation, stream);
+    EcgLouvain<graph_type> runner(handle, graph, seed);
     seed += size;
 
-    detail::permute_graph<vertex_t, edge_t, weight_t>(graph, d_permutation, permuted_graph->view());
-
-    // Run one level of Louvain clustering on the random permutation
-    rmm::device_vector<vertex_t> parts_v(size);
-    vertex_t *d_parts = parts_v.data().get();
-
-    cugraph::louvain(handle, permuted_graph->view(), d_parts, size_t{1});
+    weight_t wt = runner(size_t{1}, weight_t{1});
 
     // For each edge in the graph determine whether the endpoints are in the same partition
     // Keep a sum for each edge of the total number of times its endpoints are in the same partition
@@ -155,8 +190,7 @@ void ecg(raft::handle_t const &handle,
                                                    graph.number_of_vertices,
                                                    graph.offsets,
                                                    graph.indices,
-                                                   permutation_v.data().get(),
-                                                   d_parts,
+                                                   runner.get_dendrogram().get_level_ptr_unsafe(0),
                                                    ecg_weights_v.data().get());
   }
 

@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <rmm/thrust_rmm_allocator.h>
+#include <community/dendrogram.cuh>
+#include <experimental/graph_functions.hpp>
+#include <raft/handle.hpp>
+
+namespace cugraph {
+
+template <typename vertex_t, bool multi_gpu>
+void partition_at_level(raft::handle_t const &handle,
+                        Dendrogram<vertex_t> const &dendrogram,
+                        vertex_t const *d_vertex_ids,
+                        vertex_t *d_partition,
+                        size_t level)
+{
+  vertex_t local_num_verts = dendrogram.get_level_size_unsafe(0);
+
+  thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
+               d_vertex_ids,
+               d_vertex_ids + local_num_verts,
+               d_partition);
+
+  std::for_each(thrust::make_counting_iterator<size_t>(0),
+                thrust::make_counting_iterator<size_t>(level),
+                [&handle, &dendrogram, d_vertex_ids, &d_partition, local_num_verts](size_t l) {
+                  cugraph::experimental::relabel<vertex_t, multi_gpu>(
+                    handle,
+                    std::tuple<vertex_t const *, vertex_t const *>(
+                      d_vertex_ids, dendrogram.get_level_ptr_unsafe(l)),
+                    dendrogram.get_level_size_unsafe(l),
+                    d_partition,
+                    local_num_verts);
+                });
+}
+
+}  // namespace cugraph
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include <community/flatten_dendrogram.cuh>
 #include <community/leiden.cuh>
 
 namespace cugraph {
@@ -30,8 +31,24 @@ std::pair<size_t, weight_t> leiden(raft::handle_t const &handle,
   CUGRAPH_EXPECTS(clustering != nullptr, "Invalid input argument: clustering is null");
 
   Leiden<GraphCSRView<vertex_t, edge_t, weight_t>> runner(handle, graph);
+  weight_t wt = runner(max_level, resolution);
 
-  return runner(clustering, max_level, resolution);
+  thrust::device_vector<vertex_t> vertex_ids_v(graph.number_of_vertices);
+
+  thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
+               thrust::make_counting_iterator<vertex_t>(0),  // MNMG - base vertex id
+               thrust::make_counting_iterator<vertex_t>(
+                 graph.number_of_vertices),  // MNMG - base vertex id + number_of_vertices
+               vertex_ids_v.begin());
+
+  partition_at_level<vertex_t, false>(handle,
+                                      runner.get_dendrogram(),
+                                      vertex_ids_v.data().get(),
+                                      clustering,
+                                      runner.get_dendrogram().num_levels());
+
+  // FIXME: Consider returning the Dendrogram at some point
+  return std::make_pair(runner.get_dendrogram().num_levels(), wt);
 }
 
 // Explicit template instantations