Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add new primitives: compute_in|out_degrees, compute_in|out_weight_sums to graph_view_t #1394

Merged
merged 10 commits into from
Feb 25, 2021
22 changes: 19 additions & 3 deletions cpp/include/experimental/detail/graph_utils.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include <rmm/device_uvector.hpp>

#include <thrust/sort.h>
#include <thrust/tabulate.h>
#include <thrust/transform.h>
#include <cuco/detail/hash_functions.cuh>

Expand All @@ -39,7 +40,7 @@ namespace detail {
// compute the numbers of nonzeros in rows (of the graph adjacency matrix, if store_transposed =
// false) or columns (of the graph adjacency matrix, if store_transposed = true)
template <typename vertex_t, typename edge_t>
rmm::device_uvector<edge_t> compute_major_degree(
rmm::device_uvector<edge_t> compute_major_degrees(
raft::handle_t const &handle,
std::vector<edge_t const *> const &adj_matrix_partition_offsets,
partition_t<vertex_t> const &partition)
Expand Down Expand Up @@ -120,7 +121,7 @@ rmm::device_uvector<edge_t> compute_major_degree(
// compute the numbers of nonzeros in rows (of the graph adjacency matrix, if store_transposed =
// false) or columns (of the graph adjacency matrix, if store_transposed = true)
template <typename vertex_t, typename edge_t>
rmm::device_uvector<edge_t> compute_major_degree(
rmm::device_uvector<edge_t> compute_major_degrees(
raft::handle_t const &handle,
std::vector<rmm::device_uvector<edge_t>> const &adj_matrix_partition_offsets,
partition_t<vertex_t> const &partition)
Expand All @@ -131,7 +132,22 @@ rmm::device_uvector<edge_t> compute_major_degree(
adj_matrix_partition_offsets.end(),
tmp_offsets.begin(),
[](auto const &offsets) { return offsets.data(); });
return compute_major_degree(handle, tmp_offsets, partition);
return compute_major_degrees(handle, tmp_offsets, partition);
}

// compute the numbers of nonzeros in rows (of the graph adjacency matrix, if store_transposed =
// false) or columns (of the graph adjacency matrix, if store_transposed = true)
template <typename vertex_t, typename edge_t>
rmm::device_uvector<edge_t> compute_major_degrees(raft::handle_t const &handle,
edge_t const *offsets,
vertex_t number_of_vertices)
{
rmm::device_uvector<edge_t> degrees(number_of_vertices, handle.get_stream());
thrust::tabulate(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
degrees.begin(),
degrees.end(),
[offsets] __device__(auto i) { return offsets[i + 1] - offsets[i]; });
return degrees;
}

template <typename vertex_t, typename edge_t>
Expand Down
12 changes: 12 additions & 0 deletions cpp/include/experimental/graph_view.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -494,6 +494,12 @@ class graph_view_t<vertex_t,
: static_cast<weight_t const*>(nullptr);
}

rmm::device_uvector<edge_t> compute_in_degrees(raft::handle_t const& handle) const;
rmm::device_uvector<edge_t> compute_out_degrees(raft::handle_t const& handle) const;

rmm::device_uvector<weight_t> compute_in_weight_sums(raft::handle_t const& handle) const;
rmm::device_uvector<weight_t> compute_out_weight_sums(raft::handle_t const& handle) const;

private:
std::vector<edge_t const*> adj_matrix_partition_offsets_{};
std::vector<vertex_t const*> adj_matrix_partition_indices_{};
Expand Down Expand Up @@ -638,6 +644,12 @@ class graph_view_t<vertex_t,
// private.
weight_t const* weights() const { return weights_; }

rmm::device_uvector<edge_t> compute_in_degrees(raft::handle_t const& handle) const;
rmm::device_uvector<edge_t> compute_out_degrees(raft::handle_t const& handle) const;

rmm::device_uvector<weight_t> compute_in_weight_sums(raft::handle_t const& handle) const;
rmm::device_uvector<weight_t> compute_out_weight_sums(raft::handle_t const& handle) const;

private:
edge_t const* offsets_{nullptr};
vertex_t const* indices_{nullptr};
Expand Down
4 changes: 2 additions & 2 deletions cpp/include/utilities/shuffle_comm.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ rmm::device_uvector<size_t> sort_and_count(raft::comms::comms_t const &comm,
d_tx_value_counts = std::move(d_counts);
}

return std::move(d_tx_value_counts);
return d_tx_value_counts;
}

template <typename VertexIterator, typename ValueIterator, typename KeyToGPUIdOp>
Expand Down Expand Up @@ -111,7 +111,7 @@ rmm::device_uvector<size_t> sort_and_count(raft::comms::comms_t const &comm,
d_tx_value_counts = std::move(d_counts);
}

return std::move(d_tx_value_counts);
return d_tx_value_counts;
}

// inline to suppress a complaint about ODR violation
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/experimental/graph.cu
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ graph_t<vertex_t, edge_t, weight_t, store_transposed, multi_gpu, std::enable_if_
// update degree-based segment offsets (to be used for graph analytics kernel optimization)

if (sorted_by_global_degree_within_vertex_partition) {
auto degrees = detail::compute_major_degree(
auto degrees = detail::compute_major_degrees(
*(this->get_handle_ptr()), adj_matrix_partition_offsets_, partition_);

// optional expensive checks (part 2/3)
Expand Down
228 changes: 227 additions & 1 deletion cpp/src/experimental/graph_view.cu
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <experimental/detail/graph_utils.cuh>
#include <experimental/graph_view.hpp>
#include <partition_manager.hpp>
#include <patterns/copy_v_transform_reduce_in_out_nbr.cuh>
#include <utilities/error.hpp>
#include <utilities/host_scalar_comm.cuh>

Expand Down Expand Up @@ -70,6 +71,83 @@ std::vector<edge_t> update_adj_matrix_partition_edge_counts(
return adj_matrix_partition_edge_counts;
}

template <typename vertex_t,
typename edge_t,
typename weight_t,
bool store_transposed,
bool multi_gpu>
rmm::device_uvector<edge_t> compute_minor_degrees(
raft::handle_t const& handle,
graph_view_t<vertex_t, edge_t, weight_t, store_transposed, multi_gpu> const& graph_view)
{
rmm::device_uvector<edge_t> minor_degrees(graph_view.get_number_of_local_vertices(),
handle.get_stream());
if (store_transposed) {
copy_v_transform_reduce_out_nbr(
handle,
graph_view,
thrust::make_constant_iterator(0) /* dummy */,
thrust::make_constant_iterator(0) /* dummy */,
[] __device__(vertex_t src, vertex_t dst, weight_t w, auto src_val, auto dst_val) {
return edge_t{1};
},
edge_t{0},
minor_degrees.data());
} else {
copy_v_transform_reduce_in_nbr(
handle,
graph_view,
thrust::make_constant_iterator(0) /* dummy */,
thrust::make_constant_iterator(0) /* dummy */,
[] __device__(vertex_t src, vertex_t dst, weight_t w, auto src_val, auto dst_val) {
return edge_t{1};
},
edge_t{0},
minor_degrees.data());
}

return minor_degrees;
}

template <bool major,
typename vertex_t,
typename edge_t,
typename weight_t,
bool store_transposed,
bool multi_gpu>
rmm::device_uvector<weight_t> compute_weight_sums(
raft::handle_t const& handle,
graph_view_t<vertex_t, edge_t, weight_t, store_transposed, multi_gpu> const& graph_view)
{
rmm::device_uvector<weight_t> weight_sums(graph_view.get_number_of_local_vertices(),
handle.get_stream());
if (major == store_transposed) {
copy_v_transform_reduce_in_nbr(
handle,
graph_view,
thrust::make_constant_iterator(0) /* dummy */,
thrust::make_constant_iterator(0) /* dummy */,
[] __device__(vertex_t src, vertex_t dst, weight_t w, auto src_val, auto dst_val) {
return w;
},
weight_t{0.0},
weight_sums.data());
} else {
copy_v_transform_reduce_out_nbr(
handle,
graph_view,
thrust::make_constant_iterator(0) /* dummy */,
thrust::make_constant_iterator(0) /* dummy */,
[] __device__(vertex_t src, vertex_t dst, weight_t w, auto src_val, auto dst_val) {
return w;
},
weight_t{0.0},
weight_sums.data());
}

return weight_sums;
}

} // namespace

template <typename vertex_t,
Expand Down Expand Up @@ -180,7 +258,7 @@ graph_view_t<vertex_t, edge_t, weight_t, store_transposed, multi_gpu, std::enabl
"number_of_local_edges.");

if (sorted_by_global_degree_within_vertex_partition) {
auto degrees = detail::compute_major_degree(handle, adj_matrix_partition_offsets, partition);
auto degrees = detail::compute_major_degrees(handle, adj_matrix_partition_offsets, partition);
CUGRAPH_EXPECTS(
thrust::is_sorted(rmm::exec_policy(default_stream)->on(default_stream),
degrees.begin(),
Expand Down Expand Up @@ -301,6 +379,154 @@ graph_view_t<vertex_t,
}
}

template <typename vertex_t,
typename edge_t,
typename weight_t,
bool store_transposed,
bool multi_gpu>
rmm::device_uvector<edge_t>
graph_view_t<vertex_t, edge_t, weight_t, store_transposed, multi_gpu, std::enable_if_t<multi_gpu>>::
compute_in_degrees(raft::handle_t const& handle) const
{
if (store_transposed) {
return detail::compute_major_degrees(
handle, this->adj_matrix_partition_offsets_, this->partition_);
} else {
return compute_minor_degrees(handle, *this);
}
}

template <typename vertex_t,
typename edge_t,
typename weight_t,
bool store_transposed,
bool multi_gpu>
rmm::device_uvector<edge_t>
graph_view_t<vertex_t,
edge_t,
weight_t,
store_transposed,
multi_gpu,
std::enable_if_t<!multi_gpu>>::compute_in_degrees(raft::handle_t const& handle) const
{
if (store_transposed) {
return detail::compute_major_degrees(
handle, this->offsets_, this->get_number_of_local_vertices());
} else {
return compute_minor_degrees(handle, *this);
}
}

template <typename vertex_t,
typename edge_t,
typename weight_t,
bool store_transposed,
bool multi_gpu>
rmm::device_uvector<edge_t>
graph_view_t<vertex_t, edge_t, weight_t, store_transposed, multi_gpu, std::enable_if_t<multi_gpu>>::
compute_out_degrees(raft::handle_t const& handle) const
{
if (store_transposed) {
return compute_minor_degrees(handle, *this);
Copy link
Member

@afender afender Feb 18, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I noticed we have started to use minor/major terminology but seeing this here used concurrently with src/dst and in/out terminology makes me wonder if that's really helping. We should try to stick to the smallest possible set of names that refer to the same thing.
Reconciling this is beyond this PR, we should have a separate issue/PR for this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So the origin of minor/major might be from https://github.com/rapidsai/cugraph/blob/branch-0.19/cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh and this terminology has been used since the start of graph primitives.

This was introduced mainly for source code reuse, noting that iterating over incoming edges when the graph adjacency matrix is stored as is (CSR) and iterating over outgoing edges when the graph adjacency matrix stored as transposed (CSC) can be implemented using the same code.

Yeah... we may open a several issue and we may need to go through more intense discussion on terminologies as these are now used all over the graph primitives. If there is a clear benefit to make a switch, yes we should make a switch, but I assume there should be a very strong justification.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And this might be a much bigger discussion, but we may later more intensively investigate the performance benefit of storing graph adjacency matrix as transposed.

This was mainly introduced to avoid atomics, but AFAIK, GPU's performance on atomics has improved drastically in the past some years and if the performance gain is not large enough, we may reconsider this issue. If the performance gain is marginal, dropping this will significantly simplify our codebase.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And this terminology issue will become irrelevant.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good to me. Thanks for elaborating. Let's open an issue to keep track of this.

} else {
return detail::compute_major_degrees(
handle, this->adj_matrix_partition_offsets_, this->partition_);
}
}

template <typename vertex_t,
typename edge_t,
typename weight_t,
bool store_transposed,
bool multi_gpu>
rmm::device_uvector<edge_t>
graph_view_t<vertex_t,
edge_t,
weight_t,
store_transposed,
multi_gpu,
std::enable_if_t<!multi_gpu>>::compute_out_degrees(raft::handle_t const& handle) const
{
if (store_transposed) {
return compute_minor_degrees(handle, *this);
} else {
return detail::compute_major_degrees(
handle, this->offsets_, this->get_number_of_local_vertices());
}
}

template <typename vertex_t,
typename edge_t,
typename weight_t,
bool store_transposed,
bool multi_gpu>
rmm::device_uvector<weight_t>
graph_view_t<vertex_t, edge_t, weight_t, store_transposed, multi_gpu, std::enable_if_t<multi_gpu>>::
compute_in_weight_sums(raft::handle_t const& handle) const
{
if (store_transposed) {
return compute_weight_sums<true>(handle, *this);
} else {
return compute_weight_sums<false>(handle, *this);
}
}

template <typename vertex_t,
typename edge_t,
typename weight_t,
bool store_transposed,
bool multi_gpu>
rmm::device_uvector<weight_t> graph_view_t<
vertex_t,
edge_t,
weight_t,
store_transposed,
multi_gpu,
std::enable_if_t<!multi_gpu>>::compute_in_weight_sums(raft::handle_t const& handle) const
{
if (store_transposed) {
return compute_weight_sums<true>(handle, *this);
} else {
return compute_weight_sums<false>(handle, *this);
}
}

template <typename vertex_t,
typename edge_t,
typename weight_t,
bool store_transposed,
bool multi_gpu>
rmm::device_uvector<weight_t>
graph_view_t<vertex_t, edge_t, weight_t, store_transposed, multi_gpu, std::enable_if_t<multi_gpu>>::
compute_out_weight_sums(raft::handle_t const& handle) const
{
if (store_transposed) {
return compute_weight_sums<false>(handle, *this);
} else {
return compute_weight_sums<true>(handle, *this);
}
}

template <typename vertex_t,
typename edge_t,
typename weight_t,
bool store_transposed,
bool multi_gpu>
rmm::device_uvector<weight_t> graph_view_t<
vertex_t,
edge_t,
weight_t,
store_transposed,
multi_gpu,
std::enable_if_t<!multi_gpu>>::compute_out_weight_sums(raft::handle_t const& handle) const
{
if (store_transposed) {
return compute_weight_sums<false>(handle, *this);
} else {
return compute_weight_sums<true>(handle, *this);
}
}

// explicit instantiation

template class graph_view_t<int32_t, int32_t, float, true, true>;
Expand Down
20 changes: 3 additions & 17 deletions cpp/src/experimental/pagerank.cu
Original file line number Diff line number Diff line change
Expand Up @@ -142,23 +142,9 @@ void pagerank(raft::handle_t const& handle,

// 2. compute the sums of the out-going edge weights (if not provided)

rmm::device_uvector<weight_t> tmp_vertex_out_weight_sums(0, handle.get_stream());
if (precomputed_vertex_out_weight_sums == nullptr) {
tmp_vertex_out_weight_sums.resize(pull_graph_view.get_number_of_local_vertices(),
handle.get_stream());
// FIXME: better refactor this out (computing out-degree).
copy_v_transform_reduce_out_nbr(
handle,
pull_graph_view,
thrust::make_constant_iterator(0) /* dummy */,
thrust::make_constant_iterator(0) /* dummy */,
[alpha] __device__(vertex_t src, vertex_t dst, weight_t w, auto src_val, auto dst_val) {
return w;
},
weight_t{0.0},
tmp_vertex_out_weight_sums.data());
}

auto tmp_vertex_out_weight_sums = precomputed_vertex_out_weight_sums == nullptr
? pull_graph_view.compute_out_weight_sums(handle)
: rmm::device_uvector<weight_t>(0, handle.get_stream());
auto vertex_out_weight_sums = precomputed_vertex_out_weight_sums != nullptr
? precomputed_vertex_out_weight_sums
: tmp_vertex_out_weight_sums.data();
Expand Down
Loading