Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Build a Dendrogram class, adapt Louvain/Leiden/ECG to use it #1359

Merged
merged 21 commits into from
Feb 5, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
ab335b7
create a dendogram in the C++ code
ChuckHastings Jan 7, 2021
4d984c6
fix clang format issues
ChuckHastings Jan 7, 2021
9c276ae
update copyright date
ChuckHastings Jan 7, 2021
0ded26d
Merge branch 'branch-0.18' into fea_louvain_dendogram
ChuckHastings Jan 14, 2021
637aee1
New idea for ECG
ChuckHastings Jan 21, 2021
7187bdb
Merge branch 'branch-0.18' into fea_louvain_dendogram
ChuckHastings Jan 21, 2021
347051c
rename ECG to make it consistent
ChuckHastings Jan 21, 2021
755c298
missed renaming ECG in CMakeLists.txt
ChuckHastings Jan 21, 2021
142024c
Merge branch 'branch-0.18' into fea_louvain_dendogram
ChuckHastings Jan 27, 2021
07ad090
fix spelling of dendrogram, fix clang formatting issues
ChuckHastings Jan 27, 2021
becbc63
Merge branch 'branch-0.18' into fea_louvain_dendogram
ChuckHastings Feb 2, 2021
bc3f26d
address comments from PR
ChuckHastings Feb 3, 2021
a3dd26d
Merge branch 'branch-0.18' into fea_louvain_dendogram
ChuckHastings Feb 3, 2021
84d3bf1
add checks for Pascal architecture in Louvain derived tests
ChuckHastings Feb 4, 2021
2e89e2c
reformat and update copyright dates
ChuckHastings Feb 4, 2021
825d6af
a few more flake8 errors
ChuckHastings Feb 4, 2021
4f0453e
Merge branch 'branch-0.18' into fea_louvain_dendogram
ChuckHastings Feb 4, 2021
9afc8de
refactor notebook tests to make it easier to filter tests; filter out…
ChuckHastings Feb 5, 2021
3db5697
delete some unused code in script
ChuckHastings Feb 5, 2021
d40f5a0
misspelled update from Rick
ChuckHastings Feb 5, 2021
9532b13
add early breaks, fix copyright dates
ChuckHastings Feb 5, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
New idea for ECG
1) Separate flatten_dendogram from dendogram class
2) Add initialize_dendogram_level function
3) Created an ECG variation of Louvain that initializes
   the dendogram with a random ordering of vertex ids
   rather than creating a new graph.
  • Loading branch information
ChuckHastings committed Jan 21, 2021
commit 637aee19b092fc1fff3045fd4210b3f5754aa9f1
125 changes: 98 additions & 27 deletions cpp/src/community/ECG.cu
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,11 @@

#include <rmm/thrust_rmm_allocator.h>
#include <thrust/random.h>
#include <community/louvain.cuh>
#include <converters/permute_graph.cuh>
#include <ctime>
#include <utilities/error.hpp>
#include "utilities/graph_utils.cuh"
#include <utilities/graph_utils.cuh>

namespace {
template <typename IndexType>
Expand All @@ -42,25 +43,20 @@ binsearch_maxle(const IndexType *vec, const IndexType val, IndexType low, IndexT
}

template <typename IdxT, typename ValT>
__global__ void match_check_kernel(IdxT size,
IdxT num_verts,
IdxT *offsets,
IdxT *indices,
IdxT *permutation,
IdxT *parts,
ValT *weights)
__global__ void match_check_kernel(
IdxT size, IdxT num_verts, IdxT *offsets, IdxT *indices, IdxT *parts, ValT *weights)
{
IdxT tid = blockIdx.x * blockDim.x + threadIdx.x;
while (tid < size) {
IdxT source = binsearch_maxle(offsets, tid, (IdxT)0, num_verts);
IdxT dest = indices[tid];
if (parts[permutation[source]] == parts[permutation[dest]]) weights[tid] += 1;
if (parts[source] == parts[dest]) weights[tid] += 1;
tid += gridDim.x * blockDim.x;
}
}

struct prg {
__host__ __device__ float operator()(int n)
__device__ float operator()(int n)
{
thrust::default_random_engine rng;
thrust::uniform_real_distribution<float> dist(0.0, 1.0);
Expand Down Expand Up @@ -103,47 +99,123 @@ void get_permutation_vector(T size, T seed, T *permutation, cudaStream_t stream)
rmm::exec_policy(stream)->on(stream), randoms_v.begin(), randoms_v.end(), permutation);
}

template <typename graph_type>
class EcgLouvain : public cugraph::Louvain<graph_type> {
public:
using graph_t = graph_type;
using vertex_t = typename graph_type::vertex_type;
using edge_t = typename graph_type::edge_type;
using weight_t = typename graph_type::weight_type;

EcgLouvain(raft::handle_t const &handle, graph_type const &graph, vertex_t seed)
: cugraph::Louvain<graph_type>(handle, graph), seed_(seed)
{
}

void initialize_dendogram_level(vertex_t num_vertices)
{
this->dendogram_->add_level(num_vertices);

get_permutation_vector(
num_vertices, seed_, this->dendogram_->current_level_begin(), this->stream_);
}

private:
vertex_t seed_;
};

} // anonymous namespace

namespace cugraph {

#if 0
template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
void ecg(raft::handle_t const &handle,
experimental::graph_view_t<vertex_t, edge_t, weight_t, false, multi_gpu> const &graph_view,
weight_t min_weight,
vertex_t ensemble_size,
vertex_t *clustering)
{
using graph_type = experimental::graph_view_t<vertex_t, edge_t, weight_t, false, multi_gpu>;

CUGRAPH_EXPECTS(clustering != nullptr, "Invalid input argument: clustering is NULL");

// "FIXME": remove this check and the guards below
//
// Disable louvain(experimental::graph_view_t,...)
// versions for GPU architectures < 700
// (cuco/static_map.cuh depends on features not supported on or before Pascal)
//
cudaDeviceProp device_prop;
CUDA_CHECK(cudaGetDeviceProperties(&device_prop, 0));

if (device_prop.major < 7) {
CUGRAPH_FAIL("ECG not supported on Pascal and older architectures");
} else {
experimental::Louvain<experimental::graph_view_t<vertex_t, edge_t, weight_t, false, multi_gpu>>
runner(handle, graph_view);

weight_t wt = runner(max_level, resolution);
// TODO: implement this...
//runner.get_dendogram().partition_at_level(clustering, runner.get_dendogram().num_levels());

// FIXME: Consider returning the Dendogram at some point
return std::make_pair(runner.get_dendogram().num_levels(), wt);
}
}
#endif

template <typename vertex_t, typename edge_t, typename weight_t>
void ecg(raft::handle_t const &handle,
GraphCSRView<vertex_t, edge_t, weight_t> const &graph,
weight_t min_weight,
vertex_t ensemble_size,
vertex_t *clustering)
{
using graph_type = GraphCSRView<vertex_t, edge_t, weight_t>;

CUGRAPH_EXPECTS(graph.edge_data != nullptr, "API error, louvain expects a weighted graph");
CUGRAPH_EXPECTS(clustering != nullptr, "Invalid input argument: clustering is NULL");

// TODO: New idea... rather than creating a permuted graph
// Observe that because of the up/down behavior... the only difference graph ordering
// has on the computation is in deciding what moves in each up/down pass.
// This *should be* equivalent to the following:
// Instead of initializing each vertex to be in a cluster by itself that
// is equal to cluster id, initialize each vertex to be in a cluster by
// itself equal to a random id. For each random run of a 1-level Louvain,
// the cluster ids would be randomized differently.
//
// For MNMG, we could assume an equal distribution across the GPUs and
// generate to a pattern so that we don't need to do any comms. It wouldn't
// be completely random, but it should result in an appropriate amount of
// randomness to get the desired effect.
//
// Note that this would require implementing the highest level Louvain function,
// or splitting the initialization into an overridable function...
//
//
// MNMG issue... in order to create an MNMG implementation we need to create
// a distributed implementation of get_permutation_vector, preferably without
// comms...
//

cudaStream_t stream{0};

rmm::device_vector<weight_t> ecg_weights_v(graph.edge_data,
graph.edge_data + graph.number_of_edges);

vertex_t size{graph.number_of_vertices};
// TODO: This seed should be a parameter
// TODO: MNMG - should add the rank to the seed so every thread is a separate seed
vertex_t seed{1};

auto permuted_graph = std::make_unique<GraphCSR<vertex_t, edge_t, weight_t>>(
size, graph.number_of_edges, graph.has_data());

// Iterate over each member of the ensemble
for (vertex_t i = 0; i < ensemble_size; i++) {
// Take random permutation of the graph
rmm::device_vector<vertex_t> permutation_v(size);
vertex_t *d_permutation = permutation_v.data().get();

get_permutation_vector(size, seed, d_permutation, stream);
EcgLouvain<graph_type> runner(handle, graph, seed);
seed += size;

detail::permute_graph<vertex_t, edge_t, weight_t>(graph, d_permutation, permuted_graph->view());

// Run one level of Louvain clustering on the random permutation
rmm::device_vector<vertex_t> parts_v(size);
vertex_t *d_parts = parts_v.data().get();

cugraph::louvain(handle, permuted_graph->view(), d_parts, size_t{1});
weight_t wt = runner(size_t{1}, weight_t{1});

// For each edge in the graph determine whether the endpoints are in the same partition
// Keep a sum for each edge of the total number of times its endpoints are in the same partition
Expand All @@ -154,8 +226,7 @@ void ecg(raft::handle_t const &handle,
graph.number_of_vertices,
graph.offsets,
graph.indices,
permutation_v.data().get(),
d_parts,
runner.get_dendogram().get_level_ptr_unsafe(0),
ecg_weights_v.data().get());
}

Expand Down
20 changes: 0 additions & 20 deletions cpp/src/community/dendogram.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -49,26 +49,6 @@ class Dendogram {

vertex_t current_level_size() const { return get_level_size_unsafe(current_level()); }

void partition_at_level(vertex_t *d_partition, size_t level) const
{
cudaStream_t stream{0};
std::vector<vertex_t *> level_ptrs_v(level);

for (size_t i = 0; i < level; ++i) { level_ptrs_v[i] = get_level_ptr_unsafe(i); }

rmm::device_vector<vertex_t *> d_level_ptrs_v(level_ptrs_v);

thrust::for_each(
rmm::exec_policy(stream)->on(stream),
thrust::make_counting_iterator<vertex_t>(0),
thrust::make_counting_iterator<vertex_t>(get_level_size_unsafe(0)),
[d_partition, level, d_level_ptrs = d_level_ptrs_v.data().get()] __device__(vertex_t v) {
vertex_t p = v;
for (int l = 0; l < level; ++l) { p = d_level_ptrs[l][p]; }
d_partition[v] = p;
});
}

private:
std::vector<vertex_t> level_size_;
std::vector<std::unique_ptr<rmm::device_buffer>> level_ptr_;
Expand Down
52 changes: 52 additions & 0 deletions cpp/src/community/flatten_dendogram.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include <community/dendogram.cuh>
#include <experimental/graph_functions.hpp>
#include <raft/handle.hpp>

namespace cugraph {

template <typename vertex_t, bool multi_gpu>
void partition_at_level(raft::handle_t const &handle,
Dendogram<vertex_t> const &dendogram,
vertex_t const *d_vertex_ids,
vertex_t *d_partition,
size_t level)
{
vertex_t local_num_verts = dendogram.get_level_size_unsafe(0);

thrust::copy(
rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
d_vertex_ids,
d_vertex_ids + local_num_verts,
d_partition);

std::for_each(thrust::make_counting_iterator<size_t>(0),
thrust::make_counting_iterator<size_t>(level),
[&handle, &dendogram, d_vertex_ids, &d_partition, local_num_verts](size_t l) {
cugraph::experimental::relabel<vertex_t, multi_gpu>(
handle,
std::tuple<vertex_t const *, vertex_t const *>(
d_vertex_ids, dendogram.get_level_ptr_unsafe(l)),
dendogram.get_level_size_unsafe(l),
d_partition,
local_num_verts);
});
}

} // namespace cugraph
11 changes: 10 additions & 1 deletion cpp/src/community/leiden.cu
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*/

#include <community/leiden.cuh>
#include <community/flatten_dendogram.cuh>

namespace cugraph {

Expand All @@ -32,7 +33,15 @@ std::pair<size_t, weight_t> leiden(raft::handle_t const &handle,
Leiden<GraphCSRView<vertex_t, edge_t, weight_t>> runner(handle, graph);
weight_t wt = runner(max_level, resolution);

runner.get_dendogram().partition_at_level(clustering, runner.get_dendogram().num_levels());
thrust::device_vector<vertex_t> vertex_ids_v(graph.number_of_vertices);
Copy link
Contributor

@seunghwak seunghwak Feb 2, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you're not dealing with non-arithmetic types, rmm::device_uvector will be more efficient and more stream ready.

Note that rmm::device_uvector does not invoke default constructor to initialize vector elements, so if your code expects initialization, you need to call thrust::fill().

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changed in the next push.


thrust::copy(
rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
thrust::make_counting_iterator<vertex_t>(0), // MNMG - base vertex id
thrust::make_counting_iterator<vertex_t>(graph.number_of_vertices), // MNMG - base vertex id + number_of_vertices
vertex_ids_v.begin());

partition_at_level<vertex_t, false>(handle, runner.get_dendogram(), vertex_ids_v.data().get(), clustering, runner.get_dendogram().num_levels());

// FIXME: Consider returning the Dendogram at some point
return std::make_pair(runner.get_dendogram().num_levels(), wt);
Expand Down
19 changes: 17 additions & 2 deletions cpp/src/community/louvain.cu
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*/

#include <community/louvain.cuh>
#include <community/flatten_dendogram.cuh>
#include <experimental/graph.hpp>
#include <experimental/louvain.cuh>

Expand All @@ -36,7 +37,15 @@ std::pair<size_t, weight_t> louvain(raft::handle_t const &handle,
Louvain<GraphCSRView<vertex_t, edge_t, weight_t>> runner(handle, graph_view);
weight_t wt = runner(max_level, resolution);

runner.get_dendogram().partition_at_level(clustering, runner.get_dendogram().num_levels());
thrust::device_vector<vertex_t> vertex_ids_v(graph_view.number_of_vertices);

thrust::copy(
rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
thrust::make_counting_iterator<vertex_t>(0), // MNMG - base vertex id
thrust::make_counting_iterator<vertex_t>(graph_view.number_of_vertices), // MNMG - base vertex id + number_of_vertices
vertex_ids_v.begin());

partition_at_level<vertex_t, false>(handle, runner.get_dendogram(), vertex_ids_v.data().get(), clustering, runner.get_dendogram().num_levels());

// FIXME: Consider returning the Dendogram at some point
return std::make_pair(runner.get_dendogram().num_levels(), wt);
Expand Down Expand Up @@ -66,7 +75,13 @@ std::pair<size_t, weight_t> louvain(
} else {
experimental::Louvain<experimental::graph_view_t<vertex_t, edge_t, weight_t, false, multi_gpu>>
runner(handle, graph_view);
return runner(clustering, max_level, resolution);

weight_t wt = runner(max_level, resolution);
// TODO: implement this...
//runner.get_dendogram().partition_at_level(clustering, runner.get_dendogram().num_levels());

// FIXME: Consider returning the Dendogram at some point
return std::make_pair(runner.get_dendogram().num_levels(), wt);
}
}

Expand Down
14 changes: 9 additions & 5 deletions cpp/src/community/louvain.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -147,11 +147,7 @@ class Louvain {
//
// Initialize every cluster to reference each vertex to itself
//
dendogram_->add_level(current_graph.number_of_vertices);

thrust::sequence(rmm::exec_policy(stream_)->on(stream_),
dendogram_->current_level_begin(),
dendogram_->current_level_end());
initialize_dendogram_level(current_graph.number_of_vertices);

compute_vertex_and_cluster_weights(current_graph);

Expand Down Expand Up @@ -192,6 +188,14 @@ class Louvain {
#endif
}

void initialize_dendogram_level(vertex_t num_vertices) {
dendogram_->add_level(num_vertices);

thrust::sequence(rmm::exec_policy(stream_)->on(stream_),
dendogram_->current_level_begin(),
dendogram_->current_level_end());
}

public:
void compute_vertex_and_cluster_weights(graph_type const &graph)
{
Expand Down
Loading