Skip to content

Commit

Permalink
Merge pull request rapidsai#1354 from seunghwak/fea_induced_subgraph
Browse files Browse the repository at this point in the history
Implement induced subgraph extraction (SG C++)
  • Loading branch information
BradReesWork committed Jan 26, 2021
2 parents 4f35bcb + a46f863 commit 9820990
Show file tree
Hide file tree
Showing 23 changed files with 964 additions and 135 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
- PR #1279 Add self loop check variable in graph
- PR #1277 SciPy sparse matrix input support for WCC, SCC, SSSP, and BFS
- PR #1278 Add support for shortest_path_length and fix graph vertex checks
- PR #1280 Add Multi(Di)Graph support

## Improvements
- PR #1227 Pin cmake policies to cmake 3.17 version
Expand Down
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,7 @@ add_library(cugraph SHARED
src/experimental/coarsen_graph.cu
src/experimental/renumber_edgelist.cu
src/experimental/relabel.cu
src/experimental/induced_subgraph.cu
src/experimental/bfs.cu
src/experimental/sssp.cu
src/experimental/pagerank.cu
Expand Down
44 changes: 44 additions & 0 deletions cpp/include/experimental/graph_functions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -243,5 +243,49 @@ void relabel(raft::handle_t const& handle,
vertex_t num_labels,
bool do_expensive_check = false);

/**
* @brief extract induced subgraph(s).
*
* @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
* @tparam edge_t Type of edge identifiers. Needs to be an integral type.
* @tparam weight_t Type of edge weights.
* @tparam store_transposed Flag indicating whether to store the graph adjacency matrix as is or as
* transposed.
* @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false)
* or multi-GPU (true).
* @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
* handles to various CUDA libraries) to run graph algorithms.
* @param graph_view Graph view object, we extract induced subgraphs from @p graph_view.
* @param subgraph_offsets Pointer to subgraph vertex offsets (size == @p num_subgraphs + 1).
* @param subgraph_vertices Pointer to subgraph vertices (size == @p subgraph_offsets[@p
* num_subgraphs]). The elements of @p subgraph_vertices for each subgraph should be sorted in
* ascending order and unique.
* @param num_subgraphs Number of induced subgraphs to extract.
* @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`).
* @return std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>,
* rmm::device_uvector<weight_t>, rmm::device_uvector<size_t>> Quadraplet of edge major (destination
* if @p store_transposed is true, source otherwise) vertices, edge minor (source if @p
* store_transposed is true, destination otherwise) vertices, edge weights, and edge offsets for
* each induced subgraphs (size == num_subgraphs + 1). The sizes of the edge major & minor vertices
* are edge_offsets[num_subgraphs]. The size of the edge weights is either
* edge_offsets[num_subgraphs] (if @p graph_view is weighted) or 0 (if @p graph_view is unweighted).
*/
template <typename vertex_t,
typename edge_t,
typename weight_t,
bool store_transposed,
bool multi_gpu>
std::tuple<rmm::device_uvector<vertex_t>,
rmm::device_uvector<vertex_t>,
rmm::device_uvector<weight_t>,
rmm::device_uvector<size_t>>
extract_induced_subgraphs(
raft::handle_t const& handle,
graph_view_t<vertex_t, edge_t, weight_t, store_transposed, multi_gpu> const& graph_view,
size_t const* subgraph_offsets /* size == num_subgraphs + 1 */,
vertex_t const* subgraph_vertices /* size == subgraph_offsets[num_subgraphs] */,
size_t num_subgraphs,
bool do_expensive_check = false);

} // namespace experimental
} // namespace cugraph
20 changes: 0 additions & 20 deletions cpp/include/patterns/copy_to_adj_matrix_row_col.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -175,12 +175,6 @@ void copy_to_matrix_major(raft::handle_t const& handle,
map_first,
matrix_major_value_output_first);
}

CUDA_TRY(cudaStreamSynchronize(
handle.get_stream())); // this is as necessary rx_tmp_buffer will become out-of-scope
// once control flow exits this block (FIXME: we can reduce stream
// synchronization if we compute the maximum rx_counts and
// allocate rx_tmp_buffer outside the loop)
}
}
} else {
Expand Down Expand Up @@ -370,10 +364,6 @@ void copy_to_matrix_minor(raft::handle_t const& handle,
rx_count,
comm_src_rank,
handle.get_stream());

CUDA_TRY(cudaStreamSynchronize(
handle.get_stream())); // this is as necessary src_tmp_buffer will become out-of-scope
// once control flow exits this block
}

// FIXME: now we can clear tx_tmp_buffer
Expand Down Expand Up @@ -424,17 +414,7 @@ void copy_to_matrix_minor(raft::handle_t const& handle,
map_first,
matrix_minor_value_output_first);
}

CUDA_TRY(cudaStreamSynchronize(
handle.get_stream())); // this is as necessary rx_tmp_buffer will become out-of-scope
// once control flow exits this block (FIXME: we can reduce stream
// synchronization if we compute the maximum rx_counts and
// allocate rx_tmp_buffer outside the loop)
}

CUDA_TRY(cudaStreamSynchronize(
handle.get_stream())); // this is as necessary dst_tmp_buffer will become out-of-scope once
// control flow exits this block
}
} else {
assert(graph_view.get_number_of_local_vertices() ==
Expand Down
10 changes: 0 additions & 10 deletions cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -525,12 +525,6 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle,
handle.get_stream());
}
}

CUDA_TRY(cudaStreamSynchronize(
handle.get_stream())); // this is as necessary major_tmp_buffer will become out-of-scope once
// control flow exits this block (FIXME: we can reduce stream
// synchronization if we compute the maximum major_tmp_buffer_size and
// allocate major_tmp_buffer outside the loop)
}

if (GraphViewType::is_multi_gpu && (in != GraphViewType::is_adj_matrix_transposed)) {
Expand Down Expand Up @@ -592,10 +586,6 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle,
}
}
}

CUDA_TRY(cudaStreamSynchronize(
handle.get_stream())); // this is as necessary minor_tmp_buffer will become out-of-scope once
// control flow exits this block
}

} // namespace detail
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -383,9 +383,6 @@ void copy_v_transform_reduce_key_aggregated_out_nbr(
tmp_major_vertices = std::move(rx_major_vertices);
tmp_minor_keys = std::move(rx_minor_keys);
tmp_key_aggregated_edge_weights = std::move(rx_key_aggregated_edge_weights);

CUDA_TRY(
cudaStreamSynchronize(handle.get_stream())); // tx_value_counts will become out-of-scope
}

auto tmp_e_op_result_buffer =
Expand Down Expand Up @@ -464,18 +461,9 @@ void copy_v_transform_reduce_key_aggregated_out_nbr(
major_vertices = std::move(rx_major_vertices);
e_op_result_buffer = std::move(rx_tmp_e_op_result_buffer);
}

CUDA_TRY(cudaStreamSynchronize(
handle
.get_stream())); // tmp_minor_keys, tmp_key_aggregated_edge_weights, rx_major_vertices,
// and rx_tmp_e_op_result_buffer will become out-of-scope
} else {
major_vertices = std::move(tmp_major_vertices);
e_op_result_buffer = std::move(tmp_e_op_result_buffer);

CUDA_TRY(cudaStreamSynchronize(
handle.get_stream())); // tmp_minor_keys and tmp_key_aggregated_edge_weights will become
// out-of-scope
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -254,10 +254,6 @@ transform_reduce_by_adj_matrix_row_col_key_e(

tmp_keys = std::move(rx_unique_keys);
tmp_value_buffer = std::move(rx_value_for_unique_key_buffer);

CUDA_TRY(cudaStreamSynchronize(
handle
.get_stream())); // unique_keys & value_for_unique_key_buffer will become out-of-scope
}

auto cur_size = keys.size();
Expand Down
9 changes: 5 additions & 4 deletions cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ size_t reduce_buffer_elements(raft::handle_t const& handle,
// FIXME: actually, we can find how many unique keys are here by now.
// FIXME: if GraphViewType::is_multi_gpu is true, this should be executed on the GPU holding the
// vertex unless reduce_op is a pure function.
rmm::device_vector<key_t> keys(num_buffer_elements);
rmm::device_uvector<key_t> keys(num_buffer_elements, handle.get_stream());
rmm::device_vector<payload_t> values(num_buffer_elements);
auto it = thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
buffer_key_output_first,
Expand All @@ -176,9 +176,10 @@ size_t reduce_buffer_elements(raft::handle_t const& handle,
values.begin(),
values.begin() + num_reduced_buffer_elements,
buffer_payload_output_first);
CUDA_TRY(cudaStreamSynchronize(
handle.get_stream())); // this is necessary as kyes & values will become out-of-scope once
// this function returns
// FIXME: this is unecessary if we use a tuple of rmm::device_uvector objects for values
CUDA_TRY(
cudaStreamSynchronize(handle.get_stream())); // this is necessary as values will become
// out-of-scope once this function returns
return num_reduced_buffer_elements;
}
}
Expand Down
13 changes: 0 additions & 13 deletions cpp/src/experimental/coarsen_graph.cu
Original file line number Diff line number Diff line change
Expand Up @@ -120,10 +120,6 @@ void sort_and_coarsen_edgelist(rmm::device_uvector<vertex_t> &edgelist_major_ver
tmp_edgelist_weights.begin());
number_of_edges = thrust::distance(tmp_edgelist_weights.begin(), thrust::get<1>(it));

CUDA_TRY(cudaStreamSynchronize(
stream)); // memory blocks owned by edgelist_(major_vertices,minor_vertices,weights) will be
// freed after the assignments below

edgelist_major_vertices = std::move(tmp_edgelist_major_vertices);
edgelist_minor_vertices = std::move(tmp_edgelist_minor_vertices);
edgelist_weights = std::move(tmp_edgelist_weights);
Expand Down Expand Up @@ -313,10 +309,6 @@ coarsen_graph(
src_edge_first + edgelist_major_vertices.size(),
dst_edge_first);
}

CUDA_TRY(cudaStreamSynchronize(
handle.get_stream())); // edgelist_(major_vertices,minor_vertices,weights)
// will become out-of-scope
}

sort_and_coarsen_edgelist(coarsened_edgelist_major_vertices,
Expand Down Expand Up @@ -355,11 +347,6 @@ coarsen_graph(
rx_edgelist_weights,
handle.get_stream());

CUDA_TRY(cudaStreamSynchronize(
handle.get_stream())); // memory blocks owned by
// coarsened_edgelist_(major_vertices,minor_vertices,weights)
// will be freed after the assignments below

coarsened_edgelist_major_vertices = std::move(rx_edgelist_major_vertices);
coarsened_edgelist_minor_vertices = std::move(rx_edgelist_minor_vertices);
coarsened_edgelist_weights = std::move(rx_edgelist_weights);
Expand Down
4 changes: 1 addition & 3 deletions cpp/src/experimental/graph.cu
Original file line number Diff line number Diff line change
Expand Up @@ -470,9 +470,7 @@ graph_t<vertex_t, edge_t, weight_t, store_transposed, multi_gpu, std::enable_if_
segment_offsets_.data(), segment_offsets.data(), segment_offsets.size(), default_stream);

CUDA_TRY(cudaStreamSynchronize(
default_stream)); // this is necessary as d_thresholds and segment_offsets will become
// out-of-scpe once control flow exits this block and segment_offsets_ can
// be used right after return.
default_stream)); // this is necessary as segment_offsets_ can be used right after return.
}

// optional expensive checks (part 3/3)
Expand Down
Loading

0 comments on commit 9820990

Please sign in to comment.