Skip to content

Commit

Permalink
Put in fix of rebuild_indices() method, findMatches tests now working…
Browse files Browse the repository at this point in the history
… for cuda 10.2
  • Loading branch information
jwyles committed Apr 10, 2020
1 parent c7867ef commit 647bb17
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 143 deletions.
70 changes: 30 additions & 40 deletions cpp/src/db/db_object.cu
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <db/db_object.cuh>
#include <cub/device/device_run_length_encode.cuh>
#include <sstream>
#include <thrust/binary_search.h>

namespace cugraph {
namespace db {
Expand Down Expand Up @@ -214,6 +215,28 @@ idx_t db_column_index<idx_t>::getIndirectionSize() {
return indirection_size;
}

template<typename idx_t>
std::string db_column_index<idx_t>::toString(){
std::stringstream ss;
ss << "db_column_index:\n";
ss << "Offsets: ";
idx_t* hostOffsets = (idx_t*)malloc(sizeof(idx_t) * offsets_size);
cudaMemcpy(hostOffsets, offsets, sizeof(idx_t) * offsets_size, cudaMemcpyDefault);
for (idx_t i = 0; i < offsets_size; i++) {
ss << hostOffsets[i] << " ";
}
free(hostOffsets);
ss << "\nIndirection: ";
idx_t* hostIndirection = (idx_t*)malloc(sizeof(idx_t) * indirection_size);
cudaMemcpy(hostIndirection, indirection, sizeof(idx_t) * indirection_size, cudaMemcpyDefault);
for (idx_t i = 0; i < indirection_size; i++) {
ss << hostIndirection[i] << " ";
}
free(hostIndirection);
ss << "\n";
return ss.str();
}

template class db_column_index<int32_t>;
template class db_column_index<int64_t>;

Expand Down Expand Up @@ -373,50 +396,17 @@ void db_table<idx_t>::rebuildIndices() {
// Compute offsets array based on sorted column
idx_t maxId;
cudaMemcpy(&maxId, tempColumn + size - 1, sizeof(idx_t), cudaMemcpyDefault);
idx_t *unique, *counts, *runCount;
ALLOC_TRY(&unique, (maxId + 1) * sizeof(idx_t), nullptr);
ALLOC_TRY(&counts, (maxId + 1) * sizeof(idx_t), nullptr);
ALLOC_TRY(&runCount, sizeof(idx_t), nullptr);
void* tmpStorage = nullptr;
size_t tmpBytes = 0;
cub::DeviceRunLengthEncode::Encode(tmpStorage,
tmpBytes,
tempColumn,
unique,
counts,
runCount,
size);
ALLOC_TRY(&tmpStorage, tmpBytes, nullptr);
cub::DeviceRunLengthEncode::Encode(tmpStorage,
tmpBytes,
tempColumn,
unique,
counts,
runCount,
size);
ALLOC_FREE_TRY(tmpStorage, nullptr);
idx_t runCount_h;
cudaMemcpy(&runCount_h, runCount, sizeof(idx_t), cudaMemcpyDefault);
idx_t* offsets;

// Allocating the new offsets array
ALLOC_TRY(&offsets, (maxId + 2) * sizeof(idx_t), nullptr);
thrust::lower_bound(rmm::exec_policy(nullptr)->on(nullptr),
tempColumn,
tempColumn + size,
thrust::counting_iterator<idx_t>(0),
thrust::counting_iterator<idx_t>(maxId + 2),
offsets);

// Filling values in offsets array from the encoded run lengths
int threadsPerBlock = 1024;
int numBlocks = (runCount_h + threadsPerBlock - 1) / threadsPerBlock;
offsetsKernel<<<numBlocks, threadsPerBlock>>>(runCount_h, unique, counts, offsets);
CUDA_CHECK_LAST();

// Taking the exclusive scan of the run lengths to get the final offsets.
thrust::exclusive_scan(rmm::exec_policy(nullptr)->on(nullptr),
offsets,
offsets + maxId + 2,
offsets);
// Clean up temporary allocations
ALLOC_FREE_TRY(tempColumn, nullptr);
ALLOC_FREE_TRY(unique, nullptr);
ALLOC_FREE_TRY(counts, nullptr);
ALLOC_FREE_TRY(runCount, nullptr);

// Assign new offsets array and indirection vector to index
indices[i].resetData(offsets, maxId + 2, indirection, size);
Expand Down
6 changes: 6 additions & 0 deletions cpp/src/db/db_object.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,12 @@ namespace db {
idx_t getOffsetsSize();
idx_t* getIndirection();
idx_t getIndirectionSize();

/**
* For debugging purposes only.
* @return Human readable representation
*/
std::string toString();
};

/**
Expand Down
46 changes: 1 addition & 45 deletions cpp/src/db/db_operators.cu
Original file line number Diff line number Diff line change
Expand Up @@ -149,10 +149,6 @@ namespace cugraph {
idx_t valB = columnB[row_id];
idx_t valC = columnC[row_id];

// Debugging output
// printf("RowId: %d, valA: %d, valB: %d, valC: %d\n", row_id, valA, valB, valC);
// printf("PatternA: %d, PatternB: %d, PatternC: %d\n", patternA, patternB, patternC);

// Compare the row values with constants in the pattern
bool matchA = outputA != nullptr ? true : patternA == valA;
bool matchB = outputB != nullptr ? true : patternB == valB;
Expand Down Expand Up @@ -249,9 +245,6 @@ namespace cugraph {
idx_t output_size;
cudaMemcpy(&output_size, &exsum_degree[frontierSize], sizeof(idx_t), cudaMemcpyDefault);

// Debugging output
// std::cout << "OutputSize = " << output_size << "\n";

idx_t num_blocks = (output_size + FIND_MATCHES_BLOCK_SIZE - 1) / FIND_MATCHES_BLOCK_SIZE;
idx_t *block_bucket_offsets = nullptr;
ALLOC_TRY(&block_bucket_offsets, sizeof(idx_t) * (num_blocks + 1), nullptr);
Expand Down Expand Up @@ -320,44 +313,6 @@ namespace cugraph {
patternB,
patternC);

// Debugging output
// if (outputA != nullptr) {
// idx_t* outputA_h = (idx_t*)malloc(sizeof(idx_t) * output_size);
// cudaMemcpy(outputA_h, outputA, sizeof(idx_t)*output_size, cudaMemcpyDefault);
// std::cout << "OutputA: ";
// for (int i = 0; i < output_size; i++)
// std::cout << outputA_h[i] << " ";
// std::cout << "\n";
// free(outputA_h);
// }
// if (outputB != nullptr) {
// idx_t* outputB_h = (idx_t*) malloc(sizeof(idx_t) * output_size);
// cudaMemcpy(outputB_h, outputB, sizeof(idx_t) * output_size, cudaMemcpyDefault);
// std::cout << "OutputB: ";
// for (int i = 0; i < output_size; i++)
// std::cout << outputB_h[i] << " ";
// std::cout << "\n";
// free(outputB_h);
// }
// if (outputC != nullptr) {
// idx_t* outputC_h = (idx_t*) malloc(sizeof(idx_t) * output_size);
// cudaMemcpy(outputC_h, outputC, sizeof(idx_t) * output_size, cudaMemcpyDefault);
// std::cout << "OutputC: ";
// for (int i = 0; i < output_size; i++)
// std::cout << outputC_h[i] << " ";
// std::cout << "\n";
// free(outputC_h);
// }
// if (outputD != nullptr) {
// idx_t* outputD_h = (idx_t*) malloc(sizeof(idx_t) * output_size);
// cudaMemcpy(outputD_h, outputD, sizeof(idx_t) * output_size, cudaMemcpyDefault);
// std::cout << "OutputD: ";
// for (int i = 0; i < output_size; i++)
// std::cout << outputD_h[i] << " ";
// std::cout << "\n";
// free(outputD_h);
// }

// Get the non-null output columns
std::vector<idx_t*> columns;
std::vector<std::string> names;
Expand Down Expand Up @@ -409,6 +364,7 @@ namespace cugraph {
output_size);
idx_t compactSize_h;
cudaMemcpy(&compactSize_h, compactSize_d, sizeof(idx_t), cudaMemcpyDefault);

for (size_t i = 1; i < columns.size(); i++) {
col_ptr = columns[i];
cub::DeviceSelect::Flagged(tempSpace,
Expand Down
72 changes: 14 additions & 58 deletions cpp/tests/db/find_matches_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -53,62 +53,14 @@ public:
};

TEST_F(Test_FindMatches, verifyIndices) {
int32_t* offsets_d = reinterpret_cast<int32_t*>(table.getIndex(0).getOffsets());
int32_t offsetsSize = table.getIndex(0).getOffsetsSize();
int32_t* indirection_d = reinterpret_cast<int32_t*>(table.getIndex(0).getIndirection());
int32_t indirectionSize = table.getIndex(0).getIndirectionSize();
int32_t* offsets_h = new int32_t[offsetsSize];
int32_t* indirection_h = new int32_t[indirectionSize];
cudaMemcpy(offsets_h, offsets_d, sizeof(int32_t) * offsetsSize, cudaMemcpyDefault);
cudaMemcpy(indirection_h, indirection_d, sizeof(int32_t) * indirectionSize, cudaMemcpyDefault);
std::cout << "Offsets[0]: ";
for (int i = 0; i < offsetsSize; i++)
std::cout << offsets_h[i] << " ";
std::cout << "\n";
std::cout << "Indirection[0]: ";
for (int i = 0; i < indirectionSize; i++)
std::cout << indirection_h[i] << " ";
std::cout << "\n";
delete[] offsets_h;
delete[] indirection_h;

offsets_d = reinterpret_cast<int32_t*>(table.getIndex(1).getOffsets());
offsetsSize = table.getIndex(1).getOffsetsSize();
indirection_d = reinterpret_cast<int32_t*>(table.getIndex(1).getIndirection());
indirectionSize = table.getIndex(1).getIndirectionSize();
offsets_h = new int32_t[offsetsSize];
indirection_h = new int32_t[indirectionSize];
cudaMemcpy(offsets_h, offsets_d, sizeof(int32_t) * offsetsSize, cudaMemcpyDefault);
cudaMemcpy(indirection_h, indirection_d, sizeof(int32_t) * indirectionSize, cudaMemcpyDefault);
std::cout << "Offsets[1]: ";
for (int i = 0; i < offsetsSize; i++)
std::cout << offsets_h[i] << " ";
std::cout << "\n";
std::cout << "Indirection[1]: ";
for (int i = 0; i < indirectionSize; i++)
std::cout << indirection_h[i] << " ";
std::cout << "\n";
delete[] offsets_h;
delete[] indirection_h;

offsets_d = reinterpret_cast<int32_t*>(table.getIndex(2).getOffsets());
offsetsSize = table.getIndex(2).getOffsetsSize();
indirection_d = reinterpret_cast<int32_t*>(table.getIndex(2).getIndirection());
indirectionSize = table.getIndex(2).getIndirectionSize();
offsets_h = new int32_t[offsetsSize];
indirection_h = new int32_t[indirectionSize];
cudaMemcpy(offsets_h, offsets_d, sizeof(int32_t) * offsetsSize, cudaMemcpyDefault);
cudaMemcpy(indirection_h, indirection_d, sizeof(int32_t) * indirectionSize, cudaMemcpyDefault);
std::cout << "Offsets[2]: ";
for (int i = 0; i < offsetsSize; i++)
std::cout << offsets_h[i] << " ";
std::cout << "\n";
std::cout << "Indirection[2]: ";
for (int i = 0; i < indirectionSize; i++)
std::cout << indirection_h[i] << " ";
std::cout << "\n";
delete[] offsets_h;
delete[] indirection_h;
insertConstantEntry(0, 1, 1);
insertConstantEntry(2, 0, 1);
table.flush_input();

std::cout << table.toString();
std::cout << "Index[0]: " << table.getIndex(0).toString();
std::cout << "Index[1]: " << table.getIndex(1).toString();
std::cout << "Index[2]: " << table.getIndex(2).toString();
}

TEST_F(Test_FindMatches, firstTest){
Expand Down Expand Up @@ -138,6 +90,10 @@ TEST_F(Test_FindMatches, secondTest) {
insertConstantEntry(2, 0, 1);
table.flush_input();

std::cout << table.toString() << "\n\n";

std::cout << table.getIndex(2).toString() << "\n";

cugraph::db::db_pattern<int32_t> q;
cugraph::db::db_pattern_entry<int32_t> q1(0);
cugraph::db::db_pattern_entry<int32_t> q2("a");
Expand All @@ -148,14 +104,14 @@ TEST_F(Test_FindMatches, secondTest) {

cugraph::db::db_result<int32_t> result = cugraph::db::findMatches(q, table, nullptr, 2);

std::cout << result.toString();

ASSERT_EQ(result.getSize(), 2);
int32_t* resultA = new int32_t[result.getSize()];
int32_t* resultB = new int32_t[result.getSize()];
cudaMemcpy(resultA, result.getData("a"), sizeof(int32_t) * result.getSize(), cudaMemcpyDefault);
cudaMemcpy(resultB, result.getData("b"), sizeof(int32_t) * result.getSize(), cudaMemcpyDefault);

std::cout << result.toString();

ASSERT_EQ(resultA[0], 1);
ASSERT_EQ(resultB[0], 1);
ASSERT_EQ(resultA[1], 1);
Expand Down

0 comments on commit 647bb17

Please sign in to comment.