diff --git a/ci/gpu/notebook_list.py b/ci/gpu/notebook_list.py new file mode 100644 index 00000000000..bb54913ac8d --- /dev/null +++ b/ci/gpu/notebook_list.py @@ -0,0 +1,48 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +import sys +import glob + +from numba import cuda + +# +# Not strictly true... however what we mean is +# Pascal or earlier +# +pascal = False + +device = cuda.get_current_device() +cc = getattr(device, 'COMPUTE_CAPABILITY') +if (cc[0] < 7): + pascal = True + +for filename in glob.iglob('**/*.ipynb', recursive=True): + skip = False + for line in open(filename, 'r'): + if re.search('# Skip notebook test', line): + skip = True + print(f'SKIPPING {filename} (marked as skip)', file=sys.stderr) + break; + elif re.search('dask', line): + print(f'SKIPPING {filename} (suspected Dask usage, not currently automatable)', file=sys.stderr) + skip = True + break; + elif pascal and re.search('# Does not run on Pascal', line): + print(f'SKIPPING {filename} (does not run on Pascal)', file=sys.stderr) + skip = True + break; + + if not skip: + print(filename) diff --git a/ci/gpu/test-notebooks.sh b/ci/gpu/test-notebooks.sh index 389d3be0bfd..f5f768d7f12 100755 --- a/ci/gpu/test-notebooks.sh +++ b/ci/gpu/test-notebooks.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -20,11 +20,6 @@ LIBCUDF_KERNEL_CACHE_PATH=${WORKSPACE}/.jitcache cd ${NOTEBOOKS_DIR} TOPLEVEL_NB_FOLDERS=$(find . -name *.ipynb |cut -d'/' -f2|sort -u) -# Add notebooks that should be skipped here -# (space-separated list of filenames without paths) - -SKIPNBS="uvm.ipynb bfs_benchmark.ipynb louvain_benchmark.ipynb pagerank_benchmark.ipynb sssp_benchmark.ipynb release.ipynb nx_cugraph_bc_benchmarking.ipynb" - ## Check env env @@ -37,26 +32,14 @@ for folder in ${TOPLEVEL_NB_FOLDERS}; do echo "FOLDER: ${folder}" echo "========================================" cd ${NOTEBOOKS_DIR}/${folder} - for nb in $(find . -name "*.ipynb"); do + for nb in $(python ${WORKSPACE}/ci/gpu/notebook_list.py); do nbBasename=$(basename ${nb}) - # Skip all NBs that use dask (in the code or even in their name) - if ((echo ${nb}|grep -qi dask) || \ - (grep -q dask ${nb})); then - echo "--------------------------------------------------------------------------------" - echo "SKIPPING: ${nb} (suspected Dask usage, not currently automatable)" - echo "--------------------------------------------------------------------------------" - elif (echo " ${SKIPNBS} " | grep -q " ${nbBasename} "); then - echo "--------------------------------------------------------------------------------" - echo "SKIPPING: ${nb} (listed in skip list)" - echo "--------------------------------------------------------------------------------" - else - cd $(dirname ${nb}) - nvidia-smi - ${NBTEST} ${nbBasename} - EXITCODE=$((EXITCODE | $?)) - rm -rf ${LIBCUDF_KERNEL_CACHE_PATH}/* - cd ${NOTEBOOKS_DIR}/${folder} - fi + cd $(dirname ${nb}) + nvidia-smi + ${NBTEST} ${nbBasename} + EXITCODE=$((EXITCODE | $?)) + rm -rf ${LIBCUDF_KERNEL_CACHE_PATH}/* + cd ${NOTEBOOKS_DIR}/${folder} done done diff --git a/ci/test.sh b/ci/test.sh index db9390461c0..c173088862d 100755 --- a/ci/test.sh +++ b/ci/test.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -64,7 +64,7 @@ else cd $WORKSPACE/ci/artifacts/cugraph/cpu/conda_work/cpp/build fi -for gt in gtests/*; do +for gt in tests/*_TEST; do test_name=$(basename $gt) echo "Running GoogleTest $test_name" ${gt} ${GTEST_FILTER} ${GTEST_ARGS} diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 19bfd24a591..c17bed0a902 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -351,7 +351,7 @@ add_library(cugraph SHARED src/community/louvain.cu src/community/leiden.cu src/community/ktruss.cu - src/community/ECG.cu + src/community/ecg.cu src/community/triangles_counting.cu src/community/extract_subgraph_by_vertex.cu src/cores/core_number.cu diff --git a/cpp/src/community/dendrogram.cuh b/cpp/src/community/dendrogram.cuh new file mode 100644 index 00000000000..414f5f3854d --- /dev/null +++ b/cpp/src/community/dendrogram.cuh @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +#include +#include + +namespace cugraph { + +template +class Dendrogram { + public: + void add_level(vertex_t num_verts, + cudaStream_t stream = 0, + rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()) + { + level_ptr_.push_back( + std::make_unique(num_verts * sizeof(vertex_t), stream, mr)); + level_size_.push_back(num_verts); + } + + size_t current_level() const { return level_size_.size() - 1; } + + size_t num_levels() const { return level_size_.size(); } + + vertex_t const *get_level_ptr_nocheck(size_t level) const + { + return static_cast(level_ptr_[level]->data()); + } + + vertex_t *get_level_ptr_nocheck(size_t level) + { + return static_cast(level_ptr_[level]->data()); + } + + vertex_t get_level_size_nocheck(size_t level) const { return level_size_[level]; } + + vertex_t const *current_level_begin() const { return get_level_ptr_nocheck(current_level()); } + + vertex_t const *current_level_end() const { return current_level_begin() + current_level_size(); } + + vertex_t *current_level_begin() { return get_level_ptr_nocheck(current_level()); } + + vertex_t *current_level_end() { return current_level_begin() + current_level_size(); } + + vertex_t current_level_size() const { return get_level_size_nocheck(current_level()); } + + private: + std::vector level_size_; + std::vector> level_ptr_; +}; + +} // namespace cugraph diff --git a/cpp/src/community/ECG.cu b/cpp/src/community/ecg.cu similarity index 72% rename from cpp/src/community/ECG.cu rename to cpp/src/community/ecg.cu index ea21f87ff7e..994204ecd32 100644 --- a/cpp/src/community/ECG.cu +++ b/cpp/src/community/ecg.cu @@ -15,13 +15,15 @@ */ #include +#include +#include +#include +#include #include #include -#include + #include -#include -#include "utilities/graph_utils.cuh" namespace { template @@ -41,26 +43,23 @@ binsearch_maxle(const IndexType *vec, const IndexType val, IndexType low, IndexT } } +// FIXME: This shouldn't need to be a custom kernel, this +// seems like it should just be a thrust::transform template -__global__ void match_check_kernel(IdxT size, - IdxT num_verts, - IdxT *offsets, - IdxT *indices, - IdxT *permutation, - IdxT *parts, - ValT *weights) +__global__ void match_check_kernel( + IdxT size, IdxT num_verts, IdxT *offsets, IdxT *indices, IdxT *parts, ValT *weights) { IdxT tid = blockIdx.x * blockDim.x + threadIdx.x; while (tid < size) { IdxT source = binsearch_maxle(offsets, tid, (IdxT)0, num_verts); IdxT dest = indices[tid]; - if (parts[permutation[source]] == parts[permutation[dest]]) weights[tid] += 1; + if (parts[source] == parts[dest]) weights[tid] += 1; tid += gridDim.x * blockDim.x; } } struct prg { - __host__ __device__ float operator()(int n) + __device__ float operator()(int n) { thrust::default_random_engine rng; thrust::uniform_real_distribution dist(0.0, 1.0); @@ -93,7 +92,7 @@ struct update_functor { template void get_permutation_vector(T size, T seed, T *permutation, cudaStream_t stream) { - rmm::device_vector randoms_v(size); + rmm::device_uvector randoms_v(size, stream); thrust::counting_iterator index(seed); thrust::transform( @@ -103,6 +102,31 @@ void get_permutation_vector(T size, T seed, T *permutation, cudaStream_t stream) rmm::exec_policy(stream)->on(stream), randoms_v.begin(), randoms_v.end(), permutation); } +template +class EcgLouvain : public cugraph::Louvain { + public: + using graph_t = graph_type; + using vertex_t = typename graph_type::vertex_type; + using edge_t = typename graph_type::edge_type; + using weight_t = typename graph_type::weight_type; + + EcgLouvain(raft::handle_t const &handle, graph_type const &graph, vertex_t seed) + : cugraph::Louvain(handle, graph), seed_(seed) + { + } + + void initialize_dendrogram_level(vertex_t num_vertices) override + { + this->dendrogram_->add_level(num_vertices); + + get_permutation_vector( + num_vertices, seed_, this->dendrogram_->current_level_begin(), this->stream_); + } + + private: + vertex_t seed_; +}; + } // anonymous namespace namespace cugraph { @@ -114,37 +138,34 @@ void ecg(raft::handle_t const &handle, vertex_t ensemble_size, vertex_t *clustering) { + using graph_type = GraphCSRView; + CUGRAPH_EXPECTS(graph.edge_data != nullptr, - "Invalid input argument: louvain expects a weighted graph"); - CUGRAPH_EXPECTS(clustering != nullptr, "Invalid input argument: clustering is NULL"); + "Invalid input argument: ecg expects a weighted graph"); + CUGRAPH_EXPECTS(clustering != nullptr, + "Invalid input argument: clustering is NULL, should be a device pointer to " + "memory for storing the result"); cudaStream_t stream{0}; - rmm::device_vector ecg_weights_v(graph.edge_data, - graph.edge_data + graph.number_of_edges); + rmm::device_uvector ecg_weights_v(graph.number_of_edges, handle.get_stream()); + + thrust::copy(rmm::exec_policy(stream)->on(stream), + graph.edge_data, + graph.edge_data + graph.number_of_edges, + ecg_weights_v.data()); vertex_t size{graph.number_of_vertices}; - vertex_t seed{1}; - auto permuted_graph = std::make_unique>( - size, graph.number_of_edges, graph.has_data()); + // FIXME: This seed should be a parameter + vertex_t seed{1}; // Iterate over each member of the ensemble for (vertex_t i = 0; i < ensemble_size; i++) { - // Take random permutation of the graph - rmm::device_vector permutation_v(size); - vertex_t *d_permutation = permutation_v.data().get(); - - get_permutation_vector(size, seed, d_permutation, stream); + EcgLouvain runner(handle, graph, seed); seed += size; - detail::permute_graph(graph, d_permutation, permuted_graph->view()); - - // Run one level of Louvain clustering on the random permutation - rmm::device_vector parts_v(size); - vertex_t *d_parts = parts_v.data().get(); - - cugraph::louvain(handle, permuted_graph->view(), d_parts, size_t{1}); + weight_t wt = runner(size_t{1}, weight_t{1}); // For each edge in the graph determine whether the endpoints are in the same partition // Keep a sum for each edge of the total number of times its endpoints are in the same partition @@ -155,17 +176,16 @@ void ecg(raft::handle_t const &handle, graph.number_of_vertices, graph.offsets, graph.indices, - permutation_v.data().get(), - d_parts, - ecg_weights_v.data().get()); + runner.get_dendrogram().get_level_ptr_nocheck(0), + ecg_weights_v.data()); } // Set weights = min_weight + (1 - min-weight)*sum/ensemble_size update_functor uf(min_weight, ensemble_size); thrust::transform(rmm::exec_policy(stream)->on(stream), - ecg_weights_v.data().get(), - ecg_weights_v.data().get() + graph.number_of_edges, - ecg_weights_v.data().get(), + ecg_weights_v.begin(), + ecg_weights_v.end(), + ecg_weights_v.begin(), uf); // Run Louvain on the original graph using the computed weights @@ -173,7 +193,7 @@ void ecg(raft::handle_t const &handle, GraphCSRView louvain_graph; louvain_graph.indices = graph.indices; louvain_graph.offsets = graph.offsets; - louvain_graph.edge_data = ecg_weights_v.data().get(); + louvain_graph.edge_data = ecg_weights_v.data(); louvain_graph.number_of_vertices = graph.number_of_vertices; louvain_graph.number_of_edges = graph.number_of_edges; diff --git a/cpp/src/community/flatten_dendrogram.cuh b/cpp/src/community/flatten_dendrogram.cuh new file mode 100644 index 00000000000..892fe2d1c51 --- /dev/null +++ b/cpp/src/community/flatten_dendrogram.cuh @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +#include +#include + +namespace cugraph { + +template +void partition_at_level(raft::handle_t const &handle, + Dendrogram const &dendrogram, + vertex_t const *d_vertex_ids, + vertex_t *d_partition, + size_t level) +{ + vertex_t local_num_verts = dendrogram.get_level_size_nocheck(0); + + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + d_vertex_ids, + d_vertex_ids + local_num_verts, + d_partition); + + std::for_each(thrust::make_counting_iterator(0), + thrust::make_counting_iterator(level), + [&handle, &dendrogram, d_vertex_ids, &d_partition, local_num_verts](size_t l) { + cugraph::experimental::relabel( + handle, + std::tuple( + d_vertex_ids, dendrogram.get_level_ptr_nocheck(l)), + dendrogram.get_level_size_nocheck(l), + d_partition, + local_num_verts); + }); +} + +} // namespace cugraph diff --git a/cpp/src/community/leiden.cu b/cpp/src/community/leiden.cu index 9e5a847cdf0..427e62d3286 100644 --- a/cpp/src/community/leiden.cu +++ b/cpp/src/community/leiden.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,8 +14,11 @@ * limitations under the License. */ +#include #include +#include + namespace cugraph { template @@ -27,11 +30,29 @@ std::pair leiden(raft::handle_t const &handle, { CUGRAPH_EXPECTS(graph.edge_data != nullptr, "Invalid input argument: leiden expects a weighted graph"); - CUGRAPH_EXPECTS(clustering != nullptr, "Invalid input argument: clustering is null"); + CUGRAPH_EXPECTS(clustering != nullptr, + "Invalid input argument: clustering is null, should be a device pointer to " + "memory for storing the result"); Leiden> runner(handle, graph); + weight_t wt = runner(max_level, resolution); + + rmm::device_uvector vertex_ids_v(graph.number_of_vertices, handle.get_stream()); + + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::make_counting_iterator(0), // MNMG - base vertex id + thrust::make_counting_iterator( + graph.number_of_vertices), // MNMG - base vertex id + number_of_vertices + vertex_ids_v.begin()); + + partition_at_level(handle, + runner.get_dendrogram(), + vertex_ids_v.data(), + clustering, + runner.get_dendrogram().num_levels()); - return runner(clustering, max_level, resolution); + // FIXME: Consider returning the Dendrogram at some point + return std::make_pair(runner.get_dendrogram().num_levels(), wt); } // Explicit template instantations diff --git a/cpp/src/community/leiden.cuh b/cpp/src/community/leiden.cuh index f2f84433284..141f8beac40 100644 --- a/cpp/src/community/leiden.cuh +++ b/cpp/src/community/leiden.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,8 @@ #include +#include + namespace cugraph { template @@ -28,7 +30,8 @@ class Leiden : public Louvain { using weight_t = typename graph_type::weight_type; Leiden(raft::handle_t const &handle, graph_type const &graph) - : Louvain(handle, graph), constraint_v_(graph.number_of_vertices) + : Louvain(handle, graph), + constraint_v_(graph.number_of_vertices, handle.get_stream()) { } @@ -38,22 +41,28 @@ class Leiden : public Louvain { { this->timer_start("update_clustering_constrained"); - rmm::device_vector next_cluster_v(this->cluster_v_); - rmm::device_vector delta_Q_v(graph.number_of_edges); - rmm::device_vector cluster_hash_v(graph.number_of_edges); - rmm::device_vector old_cluster_sum_v(graph.number_of_vertices); + rmm::device_uvector next_cluster_v(this->dendrogram_->current_level_size(), + this->stream_); + rmm::device_uvector delta_Q_v(graph.number_of_edges, this->stream_); + rmm::device_uvector cluster_hash_v(graph.number_of_edges, this->stream_); + rmm::device_uvector old_cluster_sum_v(graph.number_of_vertices, this->stream_); - vertex_t const *d_src_indices = this->src_indices_v_.data().get(); + vertex_t const *d_src_indices = this->src_indices_v_.data(); vertex_t const *d_dst_indices = graph.indices; - vertex_t *d_cluster_hash = cluster_hash_v.data().get(); - vertex_t *d_cluster = this->cluster_v_.data().get(); - weight_t const *d_vertex_weights = this->vertex_weights_v_.data().get(); - weight_t *d_cluster_weights = this->cluster_weights_v_.data().get(); - weight_t *d_delta_Q = delta_Q_v.data().get(); - vertex_t *d_constraint = constraint_v_.data().get(); + vertex_t *d_cluster_hash = cluster_hash_v.data(); + vertex_t *d_cluster = this->dendrogram_->current_level_begin(); + weight_t const *d_vertex_weights = this->vertex_weights_v_.data(); + weight_t *d_cluster_weights = this->cluster_weights_v_.data(); + weight_t *d_delta_Q = delta_Q_v.data(); + vertex_t *d_constraint = constraint_v_.data(); + + thrust::copy(rmm::exec_policy(this->stream_)->on(this->stream_), + this->dendrogram_->current_level_begin(), + this->dendrogram_->current_level_end(), + next_cluster_v.data()); - weight_t new_Q = - this->modularity(total_edge_weight, resolution, graph, this->cluster_v_.data().get()); + weight_t new_Q = this->modularity( + total_edge_weight, resolution, graph, this->dendrogram_->current_level_begin()); weight_t cur_Q = new_Q - 1; @@ -83,13 +92,13 @@ class Leiden : public Louvain { up_down = !up_down; - new_Q = this->modularity(total_edge_weight, resolution, graph, next_cluster_v.data().get()); + new_Q = this->modularity(total_edge_weight, resolution, graph, next_cluster_v.data()); if (new_Q > cur_Q) { thrust::copy(rmm::exec_policy(this->stream_)->on(this->stream_), next_cluster_v.begin(), next_cluster_v.end(), - this->cluster_v_.begin()); + this->dendrogram_->current_level_begin()); } } @@ -97,9 +106,7 @@ class Leiden : public Louvain { return cur_Q; } - std::pair operator()(vertex_t *d_cluster_vec, - size_t max_level, - weight_t resolution) + weight_t operator()(size_t max_level, weight_t resolution) override { size_t num_level{0}; @@ -109,57 +116,50 @@ class Leiden : public Louvain { weight_t best_modularity = weight_t{-1}; - // - // Initialize every cluster to reference each vertex to itself - // - thrust::sequence(rmm::exec_policy(this->stream_)->on(this->stream_), - this->cluster_v_.begin(), - this->cluster_v_.end()); - thrust::copy(rmm::exec_policy(this->stream_)->on(this->stream_), - this->cluster_v_.begin(), - this->cluster_v_.end(), - d_cluster_vec); - // // Our copy of the graph. Each iteration of the outer loop will // shrink this copy of the graph. // - GraphCSRView current_graph(this->offsets_v_.data().get(), - this->indices_v_.data().get(), - this->weights_v_.data().get(), + GraphCSRView current_graph(this->offsets_v_.data(), + this->indices_v_.data(), + this->weights_v_.data(), this->number_of_vertices_, this->number_of_edges_); - current_graph.get_source_indices(this->src_indices_v_.data().get()); + current_graph.get_source_indices(this->src_indices_v_.data()); while (num_level < max_level) { + // + // Initialize every cluster to reference each vertex to itself + // + this->dendrogram_->add_level(current_graph.number_of_vertices); + + thrust::sequence(rmm::exec_policy(this->stream_)->on(this->stream_), + this->dendrogram_->current_level_begin(), + this->dendrogram_->current_level_end()); + this->compute_vertex_and_cluster_weights(current_graph); weight_t new_Q = this->update_clustering(total_edge_weight, resolution, current_graph); - thrust::copy(rmm::exec_policy(this->stream_)->on(this->stream_), - this->cluster_v_.begin(), - this->cluster_v_.end(), - constraint_v_.begin()); - new_Q = update_clustering_constrained(total_edge_weight, resolution, current_graph); if (new_Q <= best_modularity) { break; } best_modularity = new_Q; - this->shrink_graph(current_graph, d_cluster_vec); + this->shrink_graph(current_graph); num_level++; } this->timer_display(std::cout); - return std::make_pair(num_level, best_modularity); + return best_modularity; } private: - rmm::device_vector constraint_v_; + rmm::device_uvector constraint_v_; }; } // namespace cugraph diff --git a/cpp/src/community/louvain.cu b/cpp/src/community/louvain.cu index 81a68a31663..aef6fcdafde 100644 --- a/cpp/src/community/louvain.cu +++ b/cpp/src/community/louvain.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,10 +14,13 @@ * limitations under the License. */ +#include #include #include #include +#include + namespace cugraph { namespace detail { @@ -31,10 +34,29 @@ std::pair louvain(raft::handle_t const &handle, { CUGRAPH_EXPECTS(graph_view.edge_data != nullptr, "Invalid input argument: louvain expects a weighted graph"); - CUGRAPH_EXPECTS(clustering != nullptr, "Invalid input argument: clustering is null"); + CUGRAPH_EXPECTS(clustering != nullptr, + "Invalid input argument: clustering is null, should be a device pointer to " + "memory for storing the result"); Louvain> runner(handle, graph_view); - return runner(clustering, max_level, resolution); + weight_t wt = runner(max_level, resolution); + + rmm::device_uvector vertex_ids_v(graph_view.number_of_vertices, handle.get_stream()); + + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::make_counting_iterator(0), // MNMG - base vertex id + thrust::make_counting_iterator( + graph_view.number_of_vertices), // MNMG - base vertex id + number_of_vertices + vertex_ids_v.begin()); + + partition_at_level(handle, + runner.get_dendrogram(), + vertex_ids_v.data(), + clustering, + runner.get_dendrogram().num_levels()); + + // FIXME: Consider returning the Dendrogram at some point + return std::make_pair(runner.get_dendrogram().num_levels(), wt); } template @@ -45,7 +67,9 @@ std::pair louvain( size_t max_level, weight_t resolution) { - CUGRAPH_EXPECTS(clustering != nullptr, "Invalid input argument: clustering is null"); + CUGRAPH_EXPECTS(clustering != nullptr, + "Invalid input argument: clustering is null, should be a device pointer to " + "memory for storing the result"); // "FIXME": remove this check and the guards below // @@ -61,7 +85,13 @@ std::pair louvain( } else { experimental::Louvain> runner(handle, graph_view); - return runner(clustering, max_level, resolution); + + weight_t wt = runner(max_level, resolution); + // TODO: implement this... + // runner.get_dendrogram().partition_at_level(clustering, runner.get_dendrogram().num_levels()); + + // FIXME: Consider returning the Dendrogram at some point + return std::make_pair(runner.get_dendrogram().num_levels(), wt); } } diff --git a/cpp/src/community/louvain.cuh b/cpp/src/community/louvain.cuh index 7ca3638f42b..f13c64867cb 100644 --- a/cpp/src/community/louvain.cuh +++ b/cpp/src/community/louvain.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,11 +17,13 @@ #include -#include - #include #include +#include + +#include + //#define TIMING #ifdef TIMING @@ -44,26 +46,42 @@ class Louvain { hr_timer_(), #endif handle_(handle), + dendrogram_(std::make_unique>()), // FIXME: Don't really need to copy here but would need // to change the logic to populate this properly // in generate_superverticies_graph. // - offsets_v_(graph.offsets, graph.offsets + graph.number_of_vertices + 1), - indices_v_(graph.indices, graph.indices + graph.number_of_edges), - weights_v_(graph.edge_data, graph.edge_data + graph.number_of_edges), - src_indices_v_(graph.number_of_edges), - vertex_weights_v_(graph.number_of_vertices), - cluster_weights_v_(graph.number_of_vertices), - cluster_v_(graph.number_of_vertices), - tmp_arr_v_(graph.number_of_vertices), - cluster_inverse_v_(graph.number_of_vertices), + offsets_v_(graph.number_of_vertices + 1, handle.get_stream()), + indices_v_(graph.number_of_edges, handle.get_stream()), + weights_v_(graph.number_of_edges, handle.get_stream()), + src_indices_v_(graph.number_of_edges, handle.get_stream()), + vertex_weights_v_(graph.number_of_vertices, handle.get_stream()), + cluster_weights_v_(graph.number_of_vertices, handle.get_stream()), + tmp_arr_v_(graph.number_of_vertices, handle.get_stream()), + cluster_inverse_v_(graph.number_of_vertices, handle.get_stream()), number_of_vertices_(graph.number_of_vertices), number_of_edges_(graph.number_of_edges), stream_(handle.get_stream()) { + thrust::copy(rmm::exec_policy(stream_)->on(stream_), + graph.offsets, + graph.offsets + graph.number_of_vertices + 1, + offsets_v_.begin()); + + thrust::copy(rmm::exec_policy(stream_)->on(stream_), + graph.indices, + graph.indices + graph.number_of_edges, + indices_v_.begin()); + + thrust::copy(rmm::exec_policy(stream_)->on(stream_), + graph.edge_data, + graph.edge_data + graph.number_of_edges, + weights_v_.begin()); } + virtual ~Louvain() {} + weight_t modularity(weight_t total_edge_weight, weight_t resolution, graph_t const &graph, @@ -71,43 +89,45 @@ class Louvain { { vertex_t n_verts = graph.number_of_vertices; - rmm::device_vector inc(n_verts, weight_t{0.0}); - rmm::device_vector deg(n_verts, weight_t{0.0}); + rmm::device_uvector inc(n_verts, stream_); + rmm::device_uvector deg(n_verts, stream_); - edge_t const *d_offsets = graph.offsets; - vertex_t const *d_indices = graph.indices; - weight_t const *d_weights = graph.edge_data; - weight_t *d_inc = inc.data().get(); - weight_t *d_deg = deg.data().get(); + thrust::fill(rmm::exec_policy(stream_)->on(stream_), inc.begin(), inc.end(), weight_t{0.0}); + thrust::fill(rmm::exec_policy(stream_)->on(stream_), deg.begin(), deg.end(), weight_t{0.0}); // FIXME: Already have weighted degree computed in main loop, // could pass that in rather than computing d_deg... which // would save an atomicAdd (synchronization) // - thrust::for_each( - rmm::exec_policy(stream_)->on(stream_), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(graph.number_of_vertices), - [d_inc, d_deg, d_offsets, d_indices, d_weights, d_cluster] __device__(vertex_t v) { - vertex_t community = d_cluster[v]; - weight_t increase{0.0}; - weight_t degree{0.0}; - - for (edge_t loc = d_offsets[v]; loc < d_offsets[v + 1]; ++loc) { - vertex_t neighbor = d_indices[loc]; - degree += d_weights[loc]; - if (d_cluster[neighbor] == community) { increase += d_weights[loc]; } - } + thrust::for_each(rmm::exec_policy(stream_)->on(stream_), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(graph.number_of_vertices), + [d_inc = inc.data(), + d_deg = deg.data(), + d_offsets = graph.offsets, + d_indices = graph.indices, + d_weights = graph.edge_data, + d_cluster] __device__(vertex_t v) { + vertex_t community = d_cluster[v]; + weight_t increase{0.0}; + weight_t degree{0.0}; + + for (edge_t loc = d_offsets[v]; loc < d_offsets[v + 1]; ++loc) { + vertex_t neighbor = d_indices[loc]; + degree += d_weights[loc]; + if (d_cluster[neighbor] == community) { increase += d_weights[loc]; } + } - if (degree > weight_t{0.0}) atomicAdd(d_deg + community, degree); - if (increase > weight_t{0.0}) atomicAdd(d_inc + community, increase); - }); + if (degree > weight_t{0.0}) atomicAdd(d_deg + community, degree); + if (increase > weight_t{0.0}) atomicAdd(d_inc + community, increase); + }); weight_t Q = thrust::transform_reduce( rmm::exec_policy(stream_)->on(stream_), thrust::make_counting_iterator(0), thrust::make_counting_iterator(graph.number_of_vertices), - [d_deg, d_inc, total_edge_weight, resolution] __device__(vertex_t community) { + [d_deg = deg.data(), d_inc = inc.data(), total_edge_weight, resolution] __device__( + vertex_t community) { return ((d_inc[community] / total_edge_weight) - resolution * (d_deg[community] * d_deg[community]) / (total_edge_weight * total_edge_weight)); @@ -118,37 +138,35 @@ class Louvain { return Q; } - virtual std::pair operator()(vertex_t *d_cluster_vec, - size_t max_level, - weight_t resolution) - { - size_t num_level{0}; + Dendrogram &get_dendrogram() const { return *dendrogram_; } + + std::unique_ptr> move_dendrogram() { return dendrogram_; } + virtual weight_t operator()(size_t max_level, weight_t resolution) + { weight_t total_edge_weight = thrust::reduce(rmm::exec_policy(stream_)->on(stream_), weights_v_.begin(), weights_v_.end()); weight_t best_modularity = weight_t{-1}; - // - // Initialize every cluster to reference each vertex to itself - // - thrust::sequence(rmm::exec_policy(stream_)->on(stream_), cluster_v_.begin(), cluster_v_.end()); - thrust::copy( - rmm::exec_policy(stream_)->on(stream_), cluster_v_.begin(), cluster_v_.end(), d_cluster_vec); - // // Our copy of the graph. Each iteration of the outer loop will // shrink this copy of the graph. // - GraphCSRView current_graph(offsets_v_.data().get(), - indices_v_.data().get(), - weights_v_.data().get(), + GraphCSRView current_graph(offsets_v_.data(), + indices_v_.data(), + weights_v_.data(), number_of_vertices_, number_of_edges_); - current_graph.get_source_indices(src_indices_v_.data().get()); + current_graph.get_source_indices(src_indices_v_.data()); + + while (dendrogram_->num_levels() < max_level) { + // + // Initialize every cluster to reference each vertex to itself + // + initialize_dendrogram_level(current_graph.number_of_vertices); - while (num_level < max_level) { compute_vertex_and_cluster_weights(current_graph); weight_t new_Q = update_clustering(total_edge_weight, resolution, current_graph); @@ -157,14 +175,17 @@ class Louvain { best_modularity = new_Q; - shrink_graph(current_graph, d_cluster_vec); + shrink_graph(current_graph); - num_level++; + // TODO: Note, somehow after shrink_graph - having converted to device_uvector - the + // modularity of the new graph is too small... + // Was that always true? Perhaps I need to discard the bottom of the dendrogram + // in the break statement above? } timer_display(std::cout); - return std::make_pair(num_level, best_modularity); + return best_modularity; } protected: @@ -190,6 +211,15 @@ class Louvain { #endif } + virtual void initialize_dendrogram_level(vertex_t num_vertices) + { + dendrogram_->add_level(num_vertices); + + thrust::sequence(rmm::exec_policy(stream_)->on(stream_), + dendrogram_->current_level_begin(), + dendrogram_->current_level_end()); + } + public: void compute_vertex_and_cluster_weights(graph_type const &graph) { @@ -198,8 +228,8 @@ class Louvain { edge_t const *d_offsets = graph.offsets; vertex_t const *d_indices = graph.indices; weight_t const *d_weights = graph.edge_data; - weight_t *d_vertex_weights = vertex_weights_v_.data().get(); - weight_t *d_cluster_weights = cluster_weights_v_.data().get(); + weight_t *d_vertex_weights = vertex_weights_v_.data(); + weight_t *d_cluster_weights = cluster_weights_v_.data(); // // MNMG: copy_v_transform_reduce_out_nbr, then copy @@ -229,18 +259,25 @@ class Louvain { // // MNMG: This is the hard one, see writeup // - rmm::device_vector next_cluster_v(cluster_v_); - rmm::device_vector delta_Q_v(graph.number_of_edges); - rmm::device_vector cluster_hash_v(graph.number_of_edges); - rmm::device_vector old_cluster_sum_v(graph.number_of_vertices); - vertex_t *d_cluster_hash = cluster_hash_v.data().get(); - vertex_t *d_cluster = cluster_v_.data().get(); - weight_t const *d_vertex_weights = vertex_weights_v_.data().get(); - weight_t *d_cluster_weights = cluster_weights_v_.data().get(); - weight_t *d_delta_Q = delta_Q_v.data().get(); + // TODO: will this work, or do I need to use the size and then copy? + rmm::device_uvector next_cluster_v(dendrogram_->current_level_size(), stream_); + rmm::device_uvector delta_Q_v(graph.number_of_edges, stream_); + rmm::device_uvector cluster_hash_v(graph.number_of_edges, stream_); + rmm::device_uvector old_cluster_sum_v(graph.number_of_vertices, stream_); + + vertex_t *d_cluster = dendrogram_->current_level_begin(); + weight_t const *d_vertex_weights = vertex_weights_v_.data(); + weight_t *d_cluster_weights = cluster_weights_v_.data(); + weight_t *d_delta_Q = delta_Q_v.data(); + + thrust::copy(rmm::exec_policy(stream_)->on(stream_), + dendrogram_->current_level_begin(), + dendrogram_->current_level_end(), + next_cluster_v.data()); - weight_t new_Q = modularity(total_edge_weight, resolution, graph, cluster_v_.data().get()); + weight_t new_Q = + modularity(total_edge_weight, resolution, graph, dendrogram_->current_level_begin()); weight_t cur_Q = new_Q - 1; @@ -259,13 +296,13 @@ class Louvain { up_down = !up_down; - new_Q = modularity(total_edge_weight, resolution, graph, next_cluster_v.data().get()); + new_Q = modularity(total_edge_weight, resolution, graph, next_cluster_v.data()); if (new_Q > cur_Q) { thrust::copy(rmm::exec_policy(stream_)->on(stream_), next_cluster_v.begin(), next_cluster_v.end(), - cluster_v_.begin()); + dendrogram_->current_level_begin()); } } @@ -276,45 +313,37 @@ class Louvain { void compute_delta_modularity(weight_t total_edge_weight, weight_t resolution, graph_type const &graph, - rmm::device_vector &cluster_hash_v, - rmm::device_vector &old_cluster_sum_v, - rmm::device_vector &delta_Q_v) + rmm::device_uvector &cluster_hash_v, + rmm::device_uvector &old_cluster_sum_v, + rmm::device_uvector &delta_Q_v) { - vertex_t const *d_src_indices = src_indices_v_.data().get(); - vertex_t const *d_dst_indices = graph.indices; edge_t const *d_offsets = graph.offsets; weight_t const *d_weights = graph.edge_data; - vertex_t const *d_cluster = cluster_v_.data().get(); - weight_t const *d_vertex_weights = vertex_weights_v_.data().get(); - weight_t const *d_cluster_weights = cluster_weights_v_.data().get(); + vertex_t const *d_cluster = dendrogram_->current_level_begin(); + weight_t const *d_vertex_weights = vertex_weights_v_.data(); + weight_t const *d_cluster_weights = cluster_weights_v_.data(); - vertex_t *d_cluster_hash = cluster_hash_v.data().get(); - weight_t *d_delta_Q = delta_Q_v.data().get(); - weight_t *d_old_cluster_sum = old_cluster_sum_v.data().get(); + vertex_t *d_cluster_hash = cluster_hash_v.data(); + weight_t *d_delta_Q = delta_Q_v.data(); + weight_t *d_old_cluster_sum = old_cluster_sum_v.data(); weight_t *d_new_cluster_sum = d_delta_Q; - thrust::fill(cluster_hash_v.begin(), cluster_hash_v.end(), vertex_t{-1}); - thrust::fill(delta_Q_v.begin(), delta_Q_v.end(), weight_t{0.0}); - thrust::fill(old_cluster_sum_v.begin(), old_cluster_sum_v.end(), weight_t{0.0}); + thrust::fill(rmm::exec_policy(stream_)->on(stream_), + cluster_hash_v.begin(), + cluster_hash_v.end(), + vertex_t{-1}); + thrust::fill( + rmm::exec_policy(stream_)->on(stream_), delta_Q_v.begin(), delta_Q_v.end(), weight_t{0.0}); + thrust::fill(rmm::exec_policy(stream_)->on(stream_), + old_cluster_sum_v.begin(), + old_cluster_sum_v.end(), + weight_t{0.0}); - // MNMG: New technique using reduce_by_key. Would require a segmented sort - // or a pair of sorts on each node, so probably slower than what's here. - // This might still be faster even in MNMG... - // - // - // FIXME: Eventually this should use cuCollections concurrent map - // implementation, but that won't be available for a while. - // - // For each source vertex, we're going to build a hash - // table to the destination cluster ids. We can use - // the offsets ranges to define the bounds of the hash - // table. - // thrust::for_each(rmm::exec_policy(stream_)->on(stream_), thrust::make_counting_iterator(0), thrust::make_counting_iterator(graph.number_of_edges), - [d_src_indices, - d_dst_indices, + [d_src_indices = src_indices_v_.data(), + d_dst_indices = graph.indices, d_cluster, d_offsets, d_cluster_hash, @@ -355,7 +384,7 @@ class Louvain { [total_edge_weight, resolution, d_cluster_hash, - d_src_indices, + d_src_indices = src_indices_v_.data(), d_cluster, d_vertex_weights, d_delta_Q, @@ -383,33 +412,37 @@ class Louvain { } void assign_nodes(graph_type const &graph, - rmm::device_vector &cluster_hash_v, - rmm::device_vector &next_cluster_v, - rmm::device_vector &delta_Q_v, + rmm::device_uvector &cluster_hash_v, + rmm::device_uvector &next_cluster_v, + rmm::device_uvector &delta_Q_v, bool up_down) { - rmm::device_vector temp_vertices_v(graph.number_of_vertices); - rmm::device_vector temp_cluster_v(graph.number_of_vertices, vertex_t{-1}); - rmm::device_vector temp_delta_Q_v(graph.number_of_vertices, weight_t{0.0}); + rmm::device_uvector temp_vertices_v(graph.number_of_vertices, stream_); + rmm::device_uvector temp_cluster_v(graph.number_of_vertices, stream_); + rmm::device_uvector temp_delta_Q_v(graph.number_of_vertices, stream_); + + thrust::fill(rmm::exec_policy(stream_)->on(stream_), + temp_cluster_v.begin(), + temp_cluster_v.end(), + vertex_t{-1}); - weight_t *d_delta_Q = delta_Q_v.data().get(); - vertex_t *d_next_cluster = next_cluster_v.data().get(); - vertex_t *d_cluster_hash = cluster_hash_v.data().get(); - weight_t const *d_vertex_weights = vertex_weights_v_.data().get(); - weight_t *d_cluster_weights = cluster_weights_v_.data().get(); + thrust::fill(rmm::exec_policy(stream_)->on(stream_), + temp_delta_Q_v.begin(), + temp_delta_Q_v.end(), + weight_t{0}); auto cluster_reduce_iterator = - thrust::make_zip_iterator(thrust::make_tuple(d_cluster_hash, d_delta_Q)); + thrust::make_zip_iterator(thrust::make_tuple(cluster_hash_v.begin(), delta_Q_v.begin())); - auto output_edge_iterator2 = thrust::make_zip_iterator( - thrust::make_tuple(temp_cluster_v.data().get(), temp_delta_Q_v.data().get())); + auto output_edge_iterator2 = + thrust::make_zip_iterator(thrust::make_tuple(temp_cluster_v.begin(), temp_delta_Q_v.begin())); auto cluster_reduce_end = thrust::reduce_by_key(rmm::exec_policy(stream_)->on(stream_), src_indices_v_.begin(), src_indices_v_.end(), cluster_reduce_iterator, - temp_vertices_v.data().get(), + temp_vertices_v.data(), output_edge_iterator2, thrust::equal_to(), [] __device__(auto pair1, auto pair2) { @@ -422,22 +455,18 @@ class Louvain { return pair2; }); - vertex_t final_size = thrust::distance(temp_vertices_v.data().get(), cluster_reduce_end.first); - - vertex_t *d_temp_vertices = temp_vertices_v.data().get(); - vertex_t *d_temp_clusters = temp_cluster_v.data().get(); - weight_t *d_temp_delta_Q = temp_delta_Q_v.data().get(); + vertex_t final_size = thrust::distance(temp_vertices_v.data(), cluster_reduce_end.first); thrust::for_each(rmm::exec_policy(stream_)->on(stream_), thrust::make_counting_iterator(0), thrust::make_counting_iterator(final_size), - [d_temp_delta_Q, - up_down, - d_next_cluster, - d_temp_vertices, - d_vertex_weights, - d_temp_clusters, - d_cluster_weights] __device__(vertex_t id) { + [up_down, + d_temp_delta_Q = temp_delta_Q_v.data(), + d_next_cluster = next_cluster_v.data(), + d_temp_vertices = temp_vertices_v.data(), + d_vertex_weights = vertex_weights_v_.data(), + d_temp_clusters = temp_cluster_v.data(), + d_cluster_weights = cluster_weights_v_.data()] __device__(vertex_t id) { if ((d_temp_clusters[id] >= 0) && (d_temp_delta_Q[id] > weight_t{0.0})) { vertex_t new_cluster = d_temp_clusters[id]; vertex_t old_cluster = d_next_cluster[d_temp_vertices[id]]; @@ -453,38 +482,38 @@ class Louvain { }); } - void shrink_graph(graph_t &graph, vertex_t *d_cluster_vec) + void shrink_graph(graph_t &graph) { timer_start("shrinking graph"); // renumber the clusters to the range 0..(num_clusters-1) - vertex_t num_clusters = renumber_clusters(d_cluster_vec); - cluster_weights_v_.resize(num_clusters); + vertex_t num_clusters = renumber_clusters(); + cluster_weights_v_.resize(num_clusters, stream_); // shrink our graph to represent the graph of supervertices generate_superverticies_graph(graph, num_clusters); - // assign each new vertex to its own cluster - thrust::sequence(rmm::exec_policy(stream_)->on(stream_), cluster_v_.begin(), cluster_v_.end()); - timer_stop(stream_); } - vertex_t renumber_clusters(vertex_t *d_cluster_vec) + vertex_t renumber_clusters() { - vertex_t *d_tmp_array = tmp_arr_v_.data().get(); - vertex_t *d_cluster_inverse = cluster_inverse_v_.data().get(); - vertex_t *d_cluster = cluster_v_.data().get(); + vertex_t *d_tmp_array = tmp_arr_v_.data(); + vertex_t *d_cluster_inverse = cluster_inverse_v_.data(); + vertex_t *d_cluster = dendrogram_->current_level_begin(); - vertex_t old_num_clusters = cluster_v_.size(); + vertex_t old_num_clusters = dendrogram_->current_level_size(); // // New technique. Initialize cluster_inverse_v_ to 0 // - thrust::fill(cluster_inverse_v_.begin(), cluster_inverse_v_.end(), vertex_t{0}); + thrust::fill(rmm::exec_policy(stream_)->on(stream_), + cluster_inverse_v_.begin(), + cluster_inverse_v_.end(), + vertex_t{0}); // - // Iterate over every element c in cluster_v_ and set cluster_inverse_v to 1 + // Iterate over every element c in the current clustering and set cluster_inverse_v to 1 // auto first_1 = thrust::make_constant_iterator(1); auto last_1 = first_1 + old_num_clusters; @@ -492,7 +521,7 @@ class Louvain { thrust::scatter(rmm::exec_policy(stream_)->on(stream_), first_1, last_1, - cluster_v_.begin(), + dendrogram_->current_level_begin(), cluster_inverse_v_.begin()); // @@ -506,7 +535,7 @@ class Louvain { [d_cluster_inverse] __device__(const vertex_t idx) { return d_cluster_inverse[idx] == 1; }); vertex_t new_num_clusters = thrust::distance(tmp_arr_v_.begin(), copy_end); - tmp_arr_v_.resize(new_num_clusters); + tmp_arr_v_.resize(new_num_clusters, stream_); // // Now we can set each value in cluster_inverse of a cluster to its index @@ -525,32 +554,16 @@ class Louvain { d_cluster[i] = d_cluster_inverse[d_cluster[i]]; }); - thrust::for_each(rmm::exec_policy(stream_)->on(stream_), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(number_of_vertices_), - [d_cluster_vec, d_cluster] __device__(vertex_t i) { - d_cluster_vec[i] = d_cluster[d_cluster_vec[i]]; - }); - - cluster_inverse_v_.resize(new_num_clusters); - cluster_v_.resize(new_num_clusters); + cluster_inverse_v_.resize(new_num_clusters, stream_); return new_num_clusters; } void generate_superverticies_graph(graph_t &graph, vertex_t num_clusters) { - rmm::device_vector new_src_v(graph.number_of_edges); - rmm::device_vector new_dst_v(graph.number_of_edges); - rmm::device_vector new_weight_v(graph.number_of_edges); - - vertex_t *d_old_src = src_indices_v_.data().get(); - vertex_t *d_old_dst = graph.indices; - weight_t *d_old_weight = graph.edge_data; - vertex_t *d_new_src = new_src_v.data().get(); - vertex_t *d_new_dst = new_dst_v.data().get(); - vertex_t *d_clusters = cluster_v_.data().get(); - weight_t *d_new_weight = new_weight_v.data().get(); + rmm::device_uvector new_src_v(graph.number_of_edges, stream_); + rmm::device_uvector new_dst_v(graph.number_of_edges, stream_); + rmm::device_uvector new_weight_v(graph.number_of_edges, stream_); // // Renumber the COO @@ -558,13 +571,13 @@ class Louvain { thrust::for_each(rmm::exec_policy(stream_)->on(stream_), thrust::make_counting_iterator(0), thrust::make_counting_iterator(graph.number_of_edges), - [d_old_src, - d_old_dst, - d_old_weight, - d_new_src, - d_new_dst, - d_new_weight, - d_clusters] __device__(edge_t e) { + [d_old_src = src_indices_v_.data(), + d_old_dst = graph.indices, + d_old_weight = graph.edge_data, + d_new_src = new_src_v.data(), + d_new_dst = new_dst_v.data(), + d_new_weight = new_weight_v.data(), + d_clusters = dendrogram_->current_level_begin()] __device__(edge_t e) { d_new_src[e] = d_clusters[d_old_src[e]]; d_new_dst[e] = d_clusters[d_old_dst[e]]; d_new_weight[e] = d_old_weight[e]; @@ -572,39 +585,42 @@ class Louvain { thrust::stable_sort_by_key( rmm::exec_policy(stream_)->on(stream_), - d_new_dst, - d_new_dst + graph.number_of_edges, - thrust::make_zip_iterator(thrust::make_tuple(d_new_src, d_new_weight))); + new_dst_v.begin(), + new_dst_v.end(), + thrust::make_zip_iterator(thrust::make_tuple(new_src_v.begin(), new_weight_v.begin()))); thrust::stable_sort_by_key( rmm::exec_policy(stream_)->on(stream_), - d_new_src, - d_new_src + graph.number_of_edges, - thrust::make_zip_iterator(thrust::make_tuple(d_new_dst, d_new_weight))); + new_src_v.begin(), + new_src_v.end(), + thrust::make_zip_iterator(thrust::make_tuple(new_dst_v.begin(), new_weight_v.begin()))); // // Now we reduce by key to combine the weights of duplicate // edges. // - auto start = thrust::make_zip_iterator(thrust::make_tuple(d_new_src, d_new_dst)); - auto new_start = thrust::make_zip_iterator(thrust::make_tuple(d_old_src, d_old_dst)); - auto new_end = thrust::reduce_by_key(rmm::exec_policy(stream_)->on(stream_), + auto start = + thrust::make_zip_iterator(thrust::make_tuple(new_src_v.begin(), new_dst_v.begin())); + auto new_start = + thrust::make_zip_iterator(thrust::make_tuple(src_indices_v_.data(), graph.indices)); + auto new_end = thrust::reduce_by_key(rmm::exec_policy(stream_)->on(stream_), start, start + graph.number_of_edges, - d_new_weight, + new_weight_v.begin(), new_start, - d_old_weight, + graph.edge_data, thrust::equal_to>(), thrust::plus()); graph.number_of_edges = thrust::distance(new_start, new_end.first); graph.number_of_vertices = num_clusters; - detail::fill_offset(d_old_src, graph.offsets, num_clusters, graph.number_of_edges, stream_); + detail::fill_offset( + src_indices_v_.data(), graph.offsets, num_clusters, graph.number_of_edges, stream_); CHECK_CUDA(stream_); - src_indices_v_.resize(graph.number_of_edges); - indices_v_.resize(graph.number_of_edges); - weights_v_.resize(graph.number_of_edges); + src_indices_v_.resize(graph.number_of_edges, stream_); + indices_v_.resize(graph.number_of_edges, stream_); + weights_v_.resize(graph.number_of_edges, stream_); } protected: @@ -613,27 +629,28 @@ class Louvain { edge_t number_of_edges_; cudaStream_t stream_; + std::unique_ptr> dendrogram_; + // // Copy of graph // - rmm::device_vector offsets_v_; - rmm::device_vector indices_v_; - rmm::device_vector weights_v_; - rmm::device_vector src_indices_v_; + rmm::device_uvector offsets_v_; + rmm::device_uvector indices_v_; + rmm::device_uvector weights_v_; + rmm::device_uvector src_indices_v_; // // Weights and clustering across iterations of algorithm // - rmm::device_vector vertex_weights_v_; - rmm::device_vector cluster_weights_v_; - rmm::device_vector cluster_v_; + rmm::device_uvector vertex_weights_v_; + rmm::device_uvector cluster_weights_v_; // // Temporaries used within kernels. Each iteration uses less // of this memory // - rmm::device_vector tmp_arr_v_; - rmm::device_vector cluster_inverse_v_; + rmm::device_uvector tmp_arr_v_; + rmm::device_uvector cluster_inverse_v_; #ifdef TIMING HighResTimer hr_timer_; diff --git a/cpp/src/experimental/louvain.cuh b/cpp/src/experimental/louvain.cuh index 4257953d390..cbd831a67bc 100644 --- a/cpp/src/experimental/louvain.cuh +++ b/cpp/src/experimental/louvain.cuh @@ -33,6 +33,8 @@ #include +#include + //#define TIMING #ifdef TIMING @@ -374,9 +376,9 @@ create_graph(raft::handle_t const &handle, // as above would allow us to eventually run the single GPU version of single level Louvain // on the contracted graphs - which should be more efficient. // -// FIXME: We should return the dendogram and let the python layer clean it up (or have a -// separate C++ function to flatten the dendogram). There are customers that might -// like the dendogram and the implementation would be a bit cleaner if we did the +// FIXME: We should return the dendrogram and let the python layer clean it up (or have a +// separate C++ function to flatten the dendrogram). There are customers that might +// like the dendrogram and the implementation would be a bit cleaner if we did the // collapsing as a separate step // template @@ -398,6 +400,7 @@ class Louvain { hr_timer_(), #endif handle_(handle), + dendrogram_(std::make_unique>()), current_graph_view_(graph_view), compute_partition_(graph_view), local_num_vertices_(graph_view.get_number_of_local_vertices()), @@ -406,7 +409,6 @@ class Louvain { local_num_edges_(graph_view.get_number_of_edges()), vertex_weights_v_(graph_view.get_number_of_local_vertices()), cluster_weights_v_(graph_view.get_number_of_local_vertices()), - cluster_v_(graph_view.get_number_of_local_vertices()), number_of_vertices_(graph_view.get_number_of_local_vertices()), stream_(handle.get_stream()) { @@ -440,11 +442,12 @@ class Louvain { } } - virtual std::pair operator()(vertex_t *d_cluster_vec, - size_t max_level, - weight_t resolution) + Dendrogram &get_dendrogram() const { return *dendrogram_; } + + std::unique_ptr> move_dendrogram() { return dendrogram_; } + + virtual weight_t operator()(size_t max_level, weight_t resolution) { - size_t num_level{0}; weight_t best_modularity = weight_t{-1}; #ifdef CUCO_STATIC_MAP_DEFINED @@ -457,17 +460,12 @@ class Louvain { [] __device__(auto, auto, weight_t wt, auto, auto) { return wt; }, weight_t{0}); - // - // Initialize every cluster to reference each vertex to itself - // - thrust::sequence(rmm::exec_policy(stream_)->on(stream_), - cluster_v_.begin(), - cluster_v_.end(), - base_vertex_id_); - thrust::copy( - rmm::exec_policy(stream_)->on(stream_), cluster_v_.begin(), cluster_v_.end(), d_cluster_vec); + while (dendrogram_->num_levels() < max_level) { + // + // Initialize every cluster to reference each vertex to itself + // + initialize_dendrogram_level(current_graph_view_.get_number_of_local_vertices()); - while (num_level < max_level) { compute_vertex_and_cluster_weights(); weight_t new_Q = update_clustering(total_edge_weight, resolution); @@ -476,15 +474,13 @@ class Louvain { best_modularity = new_Q; - shrink_graph(d_cluster_vec); - - num_level++; + shrink_graph(); } timer_display(std::cout); #endif - return std::make_pair(num_level, best_modularity); + return best_modularity; } protected: @@ -512,6 +508,17 @@ class Louvain { #endif } + protected: + void initialize_dendrogram_level(vertex_t num_vertices) + { + dendrogram_->add_level(num_vertices); + + thrust::sequence(rmm::exec_policy(stream_)->on(stream_), + dendrogram_->current_level_begin(), + dendrogram_->current_level_end(), + base_vertex_id_); + } + public: weight_t modularity(weight_t total_edge_weight, weight_t resolution) { @@ -561,23 +568,16 @@ class Louvain { cluster_weights_v_.begin()); cache_vertex_properties( - vertex_weights_v_, src_vertex_weights_cache_v_, dst_vertex_weights_cache_v_); + vertex_weights_v_.begin(), src_vertex_weights_cache_v_, dst_vertex_weights_cache_v_); cache_vertex_properties( - cluster_weights_v_, src_cluster_weights_cache_v_, dst_cluster_weights_cache_v_); + cluster_weights_v_.begin(), src_cluster_weights_cache_v_, dst_cluster_weights_cache_v_); timer_stop(stream_); } - // - // FIXME: Consider returning d_src_cache and d_dst_cache - // (as a pair). This would be a nice optimization - // for single GPU, as we wouldn't need to make 3 copies - // of the data, could return a pair of device pointers to - // local_input_v. - // - template - void cache_vertex_properties(rmm::device_vector const &local_input_v, + template + void cache_vertex_properties(iterator_t const &local_input_iterator, rmm::device_vector &src_cache_v, rmm::device_vector &dst_cache_v, bool src = true, @@ -586,13 +586,13 @@ class Louvain { if (src) { src_cache_v.resize(current_graph_view_.get_number_of_local_adj_matrix_partition_rows()); copy_to_adj_matrix_row( - handle_, current_graph_view_, local_input_v.begin(), src_cache_v.begin()); + handle_, current_graph_view_, local_input_iterator, src_cache_v.begin()); } if (dst) { dst_cache_v.resize(current_graph_view_.get_number_of_local_adj_matrix_partition_cols()); copy_to_adj_matrix_col( - handle_, current_graph_view_, local_input_v.begin(), dst_cache_v.begin()); + handle_, current_graph_view_, local_input_iterator, dst_cache_v.begin()); } } @@ -601,9 +601,10 @@ class Louvain { { timer_start("update_clustering"); - rmm::device_vector next_cluster_v(cluster_v_); + rmm::device_vector next_cluster_v(dendrogram_->current_level_begin(), + dendrogram_->current_level_end()); - cache_vertex_properties(next_cluster_v, src_cluster_cache_v_, dst_cluster_cache_v_); + cache_vertex_properties(next_cluster_v.begin(), src_cluster_cache_v_, dst_cluster_cache_v_); weight_t new_Q = modularity(total_edge_weight, resolution); weight_t cur_Q = new_Q - 1; @@ -620,7 +621,7 @@ class Louvain { up_down = !up_down; - cache_vertex_properties(next_cluster_v, src_cluster_cache_v_, dst_cluster_cache_v_); + cache_vertex_properties(next_cluster_v.begin(), src_cluster_cache_v_, dst_cluster_cache_v_); new_Q = modularity(total_edge_weight, resolution); @@ -628,12 +629,13 @@ class Louvain { thrust::copy(rmm::exec_policy(stream_)->on(stream_), next_cluster_v.begin(), next_cluster_v.end(), - cluster_v_.begin()); + dendrogram_->current_level_begin()); } } // cache the final clustering locally on each cpu - cache_vertex_properties(cluster_v_, src_cluster_cache_v_, dst_cluster_cache_v_); + cache_vertex_properties( + dendrogram_->current_level_begin(), src_cluster_cache_v_, dst_cluster_cache_v_); timer_stop(stream_); return cur_Q; @@ -662,7 +664,7 @@ class Louvain { old_cluster_sum_v.begin()); cache_vertex_properties( - old_cluster_sum_v, src_old_cluster_sum_cache_v, empty_cache_weight_v_, true, false); + old_cluster_sum_v.begin(), src_old_cluster_sum_cache_v, empty_cache_weight_v_, true, false); detail::src_cluster_equality_comparator_t compare( src_indices_v_.data().get(), @@ -1118,7 +1120,7 @@ class Louvain { }); cache_vertex_properties( - cluster_weights_v_, src_cluster_weights_cache_v_, dst_cluster_weights_cache_v_); + cluster_weights_v_.begin(), src_cluster_weights_cache_v_, dst_cluster_weights_cache_v_); } template @@ -1204,7 +1206,7 @@ class Louvain { return std::make_pair(relevant_edges_v, relevant_edge_weights_v); } - void shrink_graph(vertex_t *d_cluster_vec) + void shrink_graph() { timer_start("shrinking graph"); @@ -1216,18 +1218,12 @@ class Louvain { // renumber the clusters to the range 0..(num_clusters-1) vertex_t num_clusters = renumber_clusters(hash_map); - renumber_result(hash_map, d_cluster_vec, num_clusters); + // TODO: renumber result needs to be moved to the dendrogram + // renumber_result(hash_map, num_clusters); // shrink our graph to represent the graph of supervertices generate_supervertices_graph(hash_map, num_clusters); - // assign each new vertex to its own cluster - // MNMG: This can be done locally with no communication required - thrust::sequence(rmm::exec_policy(stream_)->on(stream_), - cluster_v_.begin(), - cluster_v_.end(), - base_vertex_id_); - timer_stop(stream_); } @@ -1401,119 +1397,6 @@ class Louvain { } } - void renumber_result(cuco::static_map const &hash_map, - vertex_t *d_cluster_vec, - vertex_t num_clusters) - { - if (graph_view_t::is_multi_gpu) { - // - // FIXME: Perhaps there's a general purpose function hidden here... - // Given a set of vertex_t values, and a distributed set of - // vertex properties, go to the proper node and retrieve - // the vertex properties and return them to this gpu. - // - std::size_t capacity{static_cast((local_num_vertices_) / 0.7)}; - cuco::static_map result_hash_map( - capacity, std::numeric_limits::max(), std::numeric_limits::max()); - - auto cluster_iter = thrust::make_transform_iterator(d_cluster_vec, [] __device__(vertex_t c) { - return detail::create_cuco_pair_t()(c); - }); - - result_hash_map.insert(cluster_iter, cluster_iter + local_num_vertices_); - - rmm::device_vector used_cluster_ids_v(result_hash_map.get_size()); - - auto transform_iter = thrust::make_transform_iterator( - thrust::make_counting_iterator(0), - [d_result_hash_map = result_hash_map.get_device_view()] __device__(std::size_t idx) { - return d_result_hash_map.begin_slot()[idx].first.load(); - }); - - used_cluster_ids_v = detail::remove_elements_from_vector( - used_cluster_ids_v, - transform_iter, - transform_iter + result_hash_map.get_capacity(), - [vmax = std::numeric_limits::max()] __device__(vertex_t cluster) { - return cluster != vmax; - }, - stream_); - - auto partition_cluster_ids_iter = thrust::make_transform_iterator( - used_cluster_ids_v.begin(), - [d_vertex_device_view = compute_partition_.vertex_device_view()] __device__(vertex_t v) { - return d_vertex_device_view(v); - }); - - rmm::device_vector old_cluster_ids_v = - variable_shuffle(handle_, - used_cluster_ids_v.size(), - used_cluster_ids_v.begin(), - partition_cluster_ids_iter); - - rmm::device_vector original_gpus_v = - variable_shuffle( - handle_, - used_cluster_ids_v.size(), - thrust::make_constant_iterator(rank_), - partition_cluster_ids_iter); - - // Now each GPU has old cluster ids, let's compute new cluster ids - rmm::device_vector new_cluster_ids_v(old_cluster_ids_v.size()); - - thrust::transform(rmm::exec_policy(stream_)->on(stream_), - old_cluster_ids_v.begin(), - old_cluster_ids_v.end(), - new_cluster_ids_v.begin(), - [base_vertex_id = base_vertex_id_, - d_cluster = cluster_v_.data().get(), - d_hash_map = hash_map.get_device_view()] __device__(vertex_t cluster_id) { - vertex_t c = d_cluster[cluster_id - base_vertex_id]; - auto pos = d_hash_map.find(c); - return pos->second.load(); - }); - - // Shuffle everything back - old_cluster_ids_v = variable_shuffle( - handle_, old_cluster_ids_v.size(), old_cluster_ids_v.begin(), original_gpus_v.begin()); - new_cluster_ids_v = variable_shuffle( - handle_, new_cluster_ids_v.size(), new_cluster_ids_v.begin(), original_gpus_v.begin()); - - // Update result_hash_map - thrust::for_each_n( - rmm::exec_policy(stream_)->on(stream_), - thrust::make_zip_iterator( - thrust::make_tuple(old_cluster_ids_v.begin(), new_cluster_ids_v.begin())), - old_cluster_ids_v.size(), - [d_result_hash_map = result_hash_map.get_device_view()] __device__(auto pair) mutable { - auto pos = d_result_hash_map.find(thrust::get<0>(pair)); - pos->second.store(thrust::get<1>(pair)); - }); - - thrust::transform( - rmm::exec_policy(stream_)->on(stream_), - d_cluster_vec, - d_cluster_vec + number_of_vertices_, - d_cluster_vec, - [d_result_hash_map = result_hash_map.get_device_view()] __device__(vertex_t c) { - auto pos = d_result_hash_map.find(c); - return pos->second.load(); - }); - - } else { - thrust::transform(rmm::exec_policy(stream_)->on(stream_), - d_cluster_vec, - d_cluster_vec + number_of_vertices_, - d_cluster_vec, - [d_hash_map = hash_map.get_device_view(), - d_dst_cluster = dst_cluster_cache_v_.data()] __device__(vertex_t v) { - vertex_t c = d_dst_cluster[v]; - auto pos = d_hash_map.find(c); - return pos->second.load(); - }); - } - } - void generate_supervertices_graph(cuco::static_map const &hash_map, vertex_t num_clusters) { @@ -1672,6 +1555,8 @@ class Louvain { raft::handle_t const &handle_; cudaStream_t stream_; + std::unique_ptr> dendrogram_; + vertex_t number_of_vertices_; vertex_t base_vertex_id_{0}; vertex_t base_src_vertex_id_{0}; @@ -1707,7 +1592,6 @@ class Louvain { rmm::device_vector src_cluster_weights_cache_v_{}; rmm::device_vector dst_cluster_weights_cache_v_{}; - rmm::device_vector cluster_v_; rmm::device_vector src_cluster_cache_v_{}; rmm::device_vector dst_cluster_cache_v_{}; diff --git a/cpp/tests/experimental/louvain_test.cu b/cpp/tests/experimental/louvain_test.cu index 4a47b1a1aca..35a26923df6 100644 --- a/cpp/tests/experimental/louvain_test.cu +++ b/cpp/tests/experimental/louvain_test.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/notebooks/community/ECG.ipynb b/notebooks/community/ECG.ipynb index d7595dadb26..4a9eedd3c3a 100644 --- a/notebooks/community/ECG.ipynb +++ b/notebooks/community/ECG.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# Ensemble Clustering for Graphs (ECG)\n", + "# Does not run on Pascal\n", "In this notebook, we will use cuGraph to identify the cluster in a test graph using the Ensemble Clustering for Graph approach. \n", "\n", "\n", diff --git a/notebooks/community/Louvain.ipynb b/notebooks/community/Louvain.ipynb index e5e5e6a04ed..bfb8e299f49 100755 --- a/notebooks/community/Louvain.ipynb +++ b/notebooks/community/Louvain.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# Louvain Community Detection\n", + "# Does not run on Pascal\n", "\n", "\n", "In this notebook, we will use cuGraph to identify the cluster in a test graph using the Louvain algorithm \n", diff --git a/notebooks/community/Subgraph-Extraction.ipynb b/notebooks/community/Subgraph-Extraction.ipynb index e068ef53aa5..cac52262d4d 100755 --- a/notebooks/community/Subgraph-Extraction.ipynb +++ b/notebooks/community/Subgraph-Extraction.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# Subgraph Extraction\n", + "# Does not run on Pascal\n", "\n", "In this notebook, we will use cuGraph to extract a subgraph from the test graph. \n", "\n", diff --git a/notebooks/community/Triangle-Counting.ipynb b/notebooks/community/Triangle-Counting.ipynb index 09d7906a526..19d3f838fc6 100755 --- a/notebooks/community/Triangle-Counting.ipynb +++ b/notebooks/community/Triangle-Counting.ipynb @@ -21,7 +21,7 @@ "\n", "\n", "## Introduction\n", - "Triancle Counting, as the name implies, finds the number of triangles in a graph. Triangles are important in computing the clustering Coefficient and can be used for clustering. \n", + "Triangle Counting, as the name implies, finds the number of triangles in a graph. Triangles are important in computing the clustering Coefficient and can be used for clustering. \n", "\n", "\n", "To compute the Pagerank scores for a graph in cuGraph we use:
\n", diff --git a/notebooks/cugraph_benchmarks/bfs_benchmark.ipynb b/notebooks/cugraph_benchmarks/bfs_benchmark.ipynb index 58eb94bf0ee..6ae695e206e 100644 --- a/notebooks/cugraph_benchmarks/bfs_benchmark.ipynb +++ b/notebooks/cugraph_benchmarks/bfs_benchmark.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# BFS Performance Benchmarking\n", + "# Skip notebook test\n", "\n", "This notebook benchmarks performance of running BFS within cuGraph against NetworkX. \n", "\n", diff --git a/notebooks/cugraph_benchmarks/louvain_benchmark.ipynb b/notebooks/cugraph_benchmarks/louvain_benchmark.ipynb index a12b7c4bcc2..00e99a28617 100644 --- a/notebooks/cugraph_benchmarks/louvain_benchmark.ipynb +++ b/notebooks/cugraph_benchmarks/louvain_benchmark.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# Louvain Performance Benchmarking\n", + "# Skip notebook test\n", "\n", "This notebook benchmarks performance improvement of running the Louvain clustering algorithm within cuGraph against NetworkX. The test is run over eight test networks (graphs) and then results plotted. \n", "

\n", diff --git a/notebooks/cugraph_benchmarks/nx_cugraph_bc_benchmarking.ipynb b/notebooks/cugraph_benchmarks/nx_cugraph_bc_benchmarking.ipynb index 6f76868f9a4..403c317ac0a 100644 --- a/notebooks/cugraph_benchmarks/nx_cugraph_bc_benchmarking.ipynb +++ b/notebooks/cugraph_benchmarks/nx_cugraph_bc_benchmarking.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# Benchmarking NetworkX compatibility\n", + "# Skip notebook test\n", "This notebook benchmark the use of a NetworkX Graph object as input into algorithms.

\n", "The intention of the feature is to be able to drop cuGraph into existing NetworkX code in spot where performance is not optimal.\n", "\n", diff --git a/notebooks/cugraph_benchmarks/pagerank_benchmark.ipynb b/notebooks/cugraph_benchmarks/pagerank_benchmark.ipynb index c2933a10c7d..d0416efdd87 100644 --- a/notebooks/cugraph_benchmarks/pagerank_benchmark.ipynb +++ b/notebooks/cugraph_benchmarks/pagerank_benchmark.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# PageRank Performance Benchmarking\n", + "# Skip notebook test\n", "\n", "This notebook benchmarks performance of running PageRank within cuGraph against NetworkX. NetworkX contains several implementations of PageRank. This benchmark will compare cuGraph versus the defaukt Nx implementation as well as the SciPy version\n", "\n", diff --git a/notebooks/cugraph_benchmarks/release.ipynb b/notebooks/cugraph_benchmarks/release.ipynb index d3110da3621..3c6da55abc0 100644 --- a/notebooks/cugraph_benchmarks/release.ipynb +++ b/notebooks/cugraph_benchmarks/release.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# Release Benchmarking\n", + "# Skip notebook test\n", "\n", "With every release, RAPIDS publishes a release slide deck that includes the current performance state of cuGraph. \n", "This notebook, starting with release 0.15, runs all the various algorithms to computes the performance gain. \n", diff --git a/notebooks/cugraph_benchmarks/sssp_benchmark.ipynb b/notebooks/cugraph_benchmarks/sssp_benchmark.ipynb index 2d040e0acaf..32b562e7a1e 100644 --- a/notebooks/cugraph_benchmarks/sssp_benchmark.ipynb +++ b/notebooks/cugraph_benchmarks/sssp_benchmark.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# SSSP Performance Benchmarking\n", + "# Skip notebook test\n", "\n", "This notebook benchmarks performance of running SSSP within cuGraph against NetworkX. \n", "\n", diff --git a/notebooks/demo/uvm.ipynb b/notebooks/demo/uvm.ipynb index d279be8ed54..8fa2b08b6d1 100644 --- a/notebooks/demo/uvm.ipynb +++ b/notebooks/demo/uvm.ipynb @@ -6,6 +6,7 @@ "source": [ "# Oversubscribing GPU memory in cuGraph\n", "#### Author : Alex Fender\n", + "# Skip notebook test\n", "\n", "In this notebook, we will show how to **scale to 4x larger graphs than before** without incurring a performance drop using managed memory features in cuGraph. We will compute the PageRank of each user in Twitter's dataset on a single GPU as an example. This technique applies to all features.\n", "\n", diff --git a/python/cugraph/tests/test_ecg.py b/python/cugraph/tests/test_ecg.py index 86f9ed343ce..3028c702721 100644 --- a/python/cugraph/tests/test_ecg.py +++ b/python/cugraph/tests/test_ecg.py @@ -16,32 +16,38 @@ import pytest import networkx as nx import cugraph + from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than + from pathlib import PurePath def cugraph_call(G, min_weight, ensemble_size): df = cugraph.ecg(G, min_weight, ensemble_size) num_parts = df["partition"].max() + 1 - score = cugraph.analyzeClustering_modularity(G, num_parts, df, - 'vertex', 'partition') + score = cugraph.analyzeClustering_modularity( + G, num_parts, df, "vertex", "partition" + ) return score, num_parts def golden_call(graph_file): - if graph_file == PurePath(utils.RAPIDS_DATASET_ROOT_DIR)/"dolphins.csv": + if graph_file == PurePath(utils.RAPIDS_DATASET_ROOT_DIR) / "dolphins.csv": return 0.4962422251701355 - if graph_file == PurePath(utils.RAPIDS_DATASET_ROOT_DIR)/"karate.csv": + if graph_file == PurePath(utils.RAPIDS_DATASET_ROOT_DIR) / "karate.csv": return 0.38428664207458496 - if graph_file == PurePath(utils.RAPIDS_DATASET_ROOT_DIR)/"netscience.csv": + if ( + graph_file + == PurePath(utils.RAPIDS_DATASET_ROOT_DIR) / "netscience.csv" + ): return 0.9279554486274719 -DATASETS = [PurePath(utils.RAPIDS_DATASET_ROOT_DIR)/f for f in [ - "karate.csv", - "dolphins.csv", - "netscience.csv"] +DATASETS = [ + PurePath(utils.RAPIDS_DATASET_ROOT_DIR) / f + for f in ["karate.csv", "dolphins.csv", "netscience.csv"] ] MIN_WEIGHTS = [0.05, 0.10, 0.15] @@ -55,20 +61,32 @@ def golden_call(graph_file): def test_ecg_clustering(graph_file, min_weight, ensemble_size): gc.collect() - # Read in the graph and get a cugraph object - cu_M = utils.read_csv_file(graph_file, read_weights_in_sp=False) - G = cugraph.Graph() - G.from_cudf_edgelist(cu_M, source="0", destination="1", edge_attr="2") + if is_device_version_less_than((7, 0)): + cu_M = utils.read_csv_file(graph_file, read_weights_in_sp=False) + G = cugraph.Graph() + G.from_cudf_edgelist(cu_M, source="0", destination="1", edge_attr="2") - # Get the modularity score for partitioning versus random assignment - cu_score, num_parts = cugraph_call(G, min_weight, ensemble_size) - golden_score = golden_call(graph_file) + # Get the modularity score for partitioning versus random assignment + with pytest.raises(RuntimeError): + cu_score, num_parts = cugraph_call(G, min_weight, ensemble_size) + else: + # Read in the graph and get a cugraph object + cu_M = utils.read_csv_file(graph_file, read_weights_in_sp=False) + G = cugraph.Graph() + G.from_cudf_edgelist(cu_M, source="0", destination="1", edge_attr="2") + + # Get the modularity score for partitioning versus random assignment + cu_score, num_parts = cugraph_call(G, min_weight, ensemble_size) + golden_score = golden_call(graph_file) - # Assert that the partitioning has better modularity than the random - # assignment - assert cu_score > (0.95 * golden_score) + # Assert that the partitioning has better modularity than the random + # assignment + assert cu_score > (0.95 * golden_score) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", DATASETS) @pytest.mark.parametrize("min_weight", MIN_WEIGHTS) @pytest.mark.parametrize("ensemble_size", ENSEMBLE_SIZES) @@ -78,8 +96,7 @@ def test_ecg_clustering_nx(graph_file, min_weight, ensemble_size): # Read in the graph and get a NetworkX graph M = utils.read_csv_for_nx(graph_file, read_weights_in_sp=True) G = nx.from_pandas_edgelist( - M, source="0", target="1", edge_attr="weight", - create_using=nx.Graph() + M, source="0", target="1", edge_attr="weight", create_using=nx.Graph() ) # Get the modularity score for partitioning versus random assignment diff --git a/python/cugraph/tests/test_leiden.py b/python/cugraph/tests/test_leiden.py index d6a7f86b5c5..89203d5014c 100644 --- a/python/cugraph/tests/test_leiden.py +++ b/python/cugraph/tests/test_leiden.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -20,6 +20,8 @@ import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than + # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from # 'collections.abc' is deprecated, and in 3.8 it will stop working) for @@ -53,6 +55,9 @@ def cugraph_louvain(G, edgevals=False): return parts, mod +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_leiden(graph_file): gc.collect() @@ -73,6 +78,9 @@ def test_leiden(graph_file): assert leiden_mod >= (0.99 * louvain_mod) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_leiden_nx(graph_file): gc.collect() @@ -81,18 +89,13 @@ def test_leiden_nx(graph_file): NM = utils.read_csv_for_nx(graph_file) if edgevals: - G = nx.from_pandas_edgelist(NM, - create_using=nx.Graph(), - source="0", - target="1" - ) + G = nx.from_pandas_edgelist( + NM, create_using=nx.Graph(), source="0", target="1" + ) else: - G = nx.from_pandas_edgelist(NM, - create_using=nx.Graph(), - source="0", - target="1", - edge_attr="2" - ) + G = nx.from_pandas_edgelist( + NM, create_using=nx.Graph(), source="0", target="1", edge_attr="2" + ) leiden_parts, leiden_mod = cugraph_leiden(G, edgevals=True) louvain_parts, louvain_mod = cugraph_louvain(G, edgevals=True) diff --git a/python/cugraph/tests/test_louvain.py b/python/cugraph/tests/test_louvain.py index d6b0030eb73..50e9ccaa4c5 100644 --- a/python/cugraph/tests/test_louvain.py +++ b/python/cugraph/tests/test_louvain.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -18,6 +18,7 @@ import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -71,51 +72,63 @@ def networkx_call(M): def test_louvain_with_edgevals(graph_file): gc.collect() - M = utils.read_csv_for_nx(graph_file) - cu_M = utils.read_csv_file(graph_file) - cu_parts, cu_mod = cugraph_call(cu_M, edgevals=True) + if is_device_version_less_than((7, 0)): + cu_M = utils.read_csv_file(graph_file) + with pytest.raises(RuntimeError): + cu_parts, cu_mod = cugraph_call(cu_M) + else: + M = utils.read_csv_for_nx(graph_file) + cu_M = utils.read_csv_file(graph_file) + cu_parts, cu_mod = cugraph_call(cu_M, edgevals=True) - nx_parts = networkx_call(M) - # Calculating modularity scores for comparison - Gnx = nx.from_pandas_edgelist( - M, source="0", target="1", edge_attr="weight", create_using=nx.Graph() - ) + nx_parts = networkx_call(M) + # Calculating modularity scores for comparison + Gnx = nx.from_pandas_edgelist( + M, source="0", target="1", + edge_attr="weight", create_using=nx.Graph() + ) - cu_parts = cu_parts.to_pandas() - cu_map = dict(zip(cu_parts['vertex'], cu_parts['partition'])) + cu_parts = cu_parts.to_pandas() + cu_map = dict(zip(cu_parts["vertex"], cu_parts["partition"])) - assert set(nx_parts.keys()) == set(cu_map.keys()) + assert set(nx_parts.keys()) == set(cu_map.keys()) - cu_mod_nx = community.modularity(cu_map, Gnx) - nx_mod = community.modularity(nx_parts, Gnx) + cu_mod_nx = community.modularity(cu_map, Gnx) + nx_mod = community.modularity(nx_parts, Gnx) - assert len(cu_parts) == len(nx_parts) - assert cu_mod > (0.82 * nx_mod) - assert abs(cu_mod - cu_mod_nx) < 0.0001 + assert len(cu_parts) == len(nx_parts) + assert cu_mod > (0.82 * nx_mod) + assert abs(cu_mod - cu_mod_nx) < 0.0001 @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_louvain(graph_file): gc.collect() - M = utils.read_csv_for_nx(graph_file) - cu_M = utils.read_csv_file(graph_file) - cu_parts, cu_mod = cugraph_call(cu_M) - nx_parts = networkx_call(M) + if is_device_version_less_than((7, 0)): + cu_M = utils.read_csv_file(graph_file) + with pytest.raises(RuntimeError): + cu_parts, cu_mod = cugraph_call(cu_M) + else: + M = utils.read_csv_for_nx(graph_file) + cu_M = utils.read_csv_file(graph_file) + cu_parts, cu_mod = cugraph_call(cu_M) + nx_parts = networkx_call(M) - # Calculating modularity scores for comparison - Gnx = nx.from_pandas_edgelist( - M, source="0", target="1", edge_attr="weight", create_using=nx.Graph() - ) + # Calculating modularity scores for comparison + Gnx = nx.from_pandas_edgelist( + M, source="0", target="1", + edge_attr="weight", create_using=nx.Graph() + ) - cu_parts = cu_parts.to_pandas() - cu_map = dict(zip(cu_parts['vertex'], cu_parts['partition'])) + cu_parts = cu_parts.to_pandas() + cu_map = dict(zip(cu_parts["vertex"], cu_parts["partition"])) - assert set(nx_parts.keys()) == set(cu_map.keys()) + assert set(nx_parts.keys()) == set(cu_map.keys()) - cu_mod_nx = community.modularity(cu_map, Gnx) - nx_mod = community.modularity(nx_parts, Gnx) + cu_mod_nx = community.modularity(cu_map, Gnx) + nx_mod = community.modularity(nx_parts, Gnx) - assert len(cu_parts) == len(nx_parts) - assert cu_mod > (0.82 * nx_mod) - assert abs(cu_mod - cu_mod_nx) < 0.0001 + assert len(cu_parts) == len(nx_parts) + assert cu_mod > (0.82 * nx_mod) + assert abs(cu_mod - cu_mod_nx) < 0.0001