Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bug fix and major refactor of code #21

Merged
merged 21 commits into from
Jun 1, 2017
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
e32ee27
assorted clean-ups
drossetti May 19, 2017
12178fd
add flags param to gds_stream_post_descriptors
drossetti May 19, 2017
d7609f5
consolidate APIs into apis.cpp
drossetti May 19, 2017
6df673c
use descriptor APIs
drossetti May 19, 2017
9b9df07
remove useless code
drossetti May 19, 2017
54e0b97
right type for write descriptor
Akshay-Venkatesh May 19, 2017
9bb7447
move desc as 1st param in gds_prepare_wait|write_value32()
drossetti May 22, 2017
9b00f7b
fix bug: make sanity checkers conform to C++ conventions
drossetti May 22, 2017
567e37a
use descriptor APIs for write32 and wait32
drossetti May 22, 2017
626882e
add descriptor APIs support (-U) to gds kernel latency tests
drossetti May 22, 2017
40f993b
send updated + rollback function (old changes applied to new branch)
e-ago May 22, 2017
32c504c
change extended param name for -U, add support for USE_GPU
drossetti May 22, 2017
1a92e95
Warnings fixed
e-ago May 22, 2017
b030bc7
use exp_ verbs names instead of a custom abstracted names
drossetti May 22, 2017
aaf966f
minor clean-ups in test/
drossetti May 22, 2017
0cdf58d
get rid of gds_register_peer()
drossetti May 23, 2017
951813f
move APIs into separate sections and add initial documentation
drossetti May 23, 2017
ac773e3
fix build problem with device.cuh
drossetti May 26, 2017
3ca6da9
protect from inclusion in C modules. fix build error.
drossetti May 26, 2017
e512e98
include device.cuh as sanity check
drossetti May 26, 2017
a7e902a
propagate CPPFLAGS to CUDA C builds
drossetti May 27, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
add descriptor APIs support (-U) to gds kernel latency tests
  • Loading branch information
drossetti committed May 27, 2017
commit 626882e03d010e8c5841b06c9a805984762c816a
148 changes: 117 additions & 31 deletions tests/gds_kernel_latency.c
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ struct pingpong_context {
int peersync;
int peersync_gpu_cq;
int consume_rx_cqe;
int use_desc_apis;
};

static int my_rank, comm_size;
Expand Down Expand Up @@ -207,7 +208,8 @@ static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size,
int peersync_gpu_cq,
int peersync_gpu_dbrec,
int consume_rx_cqe,
int sched_mode)
int sched_mode,
int use_desc_apis)
{
struct pingpong_context *ctx;

Expand All @@ -224,6 +226,7 @@ static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size,
ctx->calc_size = calc_size;
ctx->rx_depth = rx_depth;
ctx->gpu_id = gpu_id;
ctx->use_desc_apis = use_desc_apis;

size_t alloc_size = 3 * align_to(size + 40, page_size);
if (ctx->gpu_id >= 0)
Expand Down Expand Up @@ -508,10 +511,47 @@ static int pp_post_gpu_send(struct pingpong_context *ctx, uint32_t qpn)
ewr.next = NULL;
}
struct ibv_exp_send_wr *bad_ewr;
//printf("gpu_post_send_on_stream\n");
return gds_stream_queue_send(gpu_stream, ctx->gds_qp, &ewr, &bad_ewr);
}

static int pp_prepare_gpu_send(struct pingpong_context *ctx, uint32_t qpn, gds_send_request_t *req)
{
int ret = 0;
struct ibv_sge list = {
.addr = (uintptr_t) ctx->txbuf,
.length = ctx->size,
.lkey = ctx->mr->lkey
};
struct ibv_exp_send_wr ewr = {
.wr_id = PINGPONG_SEND_WRID,
.sg_list = &list,
.num_sge = 1,
.exp_opcode = IBV_EXP_WR_SEND,
.exp_send_flags = IBV_EXP_SEND_SIGNALED,
.wr = {
.ud = {
.ah = ctx->ah,
.remote_qpn = qpn,
.remote_qkey = 0x11111111
}
},
.comp_mask = 0
};

if (IBV_QPT_UD != gds_qpt) {
memset(&ewr, 0, sizeof(ewr));
ewr.num_sge = 1;
ewr.exp_send_flags = IBV_EXP_SEND_SIGNALED;
ewr.exp_opcode = IBV_EXP_WR_SEND;
ewr.wr_id = PINGPONG_SEND_WRID;
ewr.sg_list = &list;
ewr.next = NULL;
}
struct ibv_exp_send_wr *bad_ewr;
return gds_prepare_send(ctx->gds_qp, &ewr, &bad_ewr, req);
}


static int pp_post_work(struct pingpong_context *ctx, int n_posts, int rcnt, uint32_t qpn, int is_client)
{
int i, ret = 0;
Expand All @@ -536,25 +576,57 @@ static int pp_post_work(struct pingpong_context *ctx, int n_posts, int rcnt, uin

PROF(&prof, prof_idx++);

gds_send_request_t send_rq[posted_recv];
gds_wait_request_t wait_tx_rq[posted_recv];
gds_wait_request_t wait_rx_rq[posted_recv];
gds_descriptor_t descs[2];

for (i = 0; i < posted_recv; ++i) {
if (is_client) {

if (gds_enable_event_prof && (event_idx < MAX_EVENTS)) {
cudaEventRecord(start_time[event_idx], gpu_stream);
}
ret = pp_post_gpu_send(ctx, qpn);
if (ret) {
fprintf(stderr,"ERROR: can't post GPU send (%d) posted_recv=%d posted_so_far=%d is_client=%d \n",
ret, posted_recv, i, is_client);
i = -ret;
break;
}
if (ctx->use_desc_apis) {
int k = 0;
ret = pp_prepare_gpu_send(ctx, qpn, &send_rq[i]);
if (ret) {
i = -ret;
break;
}
descs[k].tag = GDS_TAG_SEND;
descs[k].send = &send_rq[i];
++k;

ret = gds_stream_wait_cq(gpu_stream, &ctx->gds_qp->recv_cq, ctx->consume_rx_cqe);
if (ret) {
fprintf(stderr,"ERROR: error in gpu_post_poll_cq (%d)\n", ret);
i = -ret;
break;
ret = gds_prepare_wait_cq(&ctx->gds_qp->recv_cq, &wait_rx_rq[i], 0);
if (ret) {
i = -ret;
break;
}
descs[k].tag = GDS_TAG_WAIT;
descs[k].wait = &wait_rx_rq[i];
++k;

ret = gds_stream_post_descriptors(gpu_stream, k, descs, 0);
if (ret) {
i = -ret;
break;
}
} else {
ret = pp_post_gpu_send(ctx, qpn);
if (ret) {
fprintf(stderr,"ERROR: can't post GPU send (%d) posted_recv=%d posted_so_far=%d is_client=%d \n",
ret, posted_recv, i, is_client);
i = -ret;
break;
}

ret = gds_stream_wait_cq(gpu_stream, &ctx->gds_qp->recv_cq, ctx->consume_rx_cqe);
if (ret) {
// TODO: rollback gpu send and wait send_cq
fprintf(stderr,"ERROR: error in gpu_post_poll_cq (%d)\n", ret);
i = -ret;
break;
}
}
if (gds_enable_event_prof && (event_idx < MAX_EVENTS)) {
cudaEventRecord(stop_time[event_idx], gpu_stream);
Expand All @@ -563,7 +635,8 @@ static int pp_post_work(struct pingpong_context *ctx, int n_posts, int rcnt, uin
if (ctx->calc_size)
gpu_launch_kernel(ctx->calc_size, ctx->peersync);
} else {

// no point in using descriptor APIs here, as kernel launch
// would be sitting in between
ret = gds_stream_wait_cq(gpu_stream, &ctx->gds_qp->recv_cq, ctx->consume_rx_cqe);
if (ret) {
fprintf(stderr, "ERROR: error in gpu_post_poll_cq (%d)\n", ret);
Expand All @@ -578,6 +651,7 @@ static int pp_post_work(struct pingpong_context *ctx, int n_posts, int rcnt, uin
}
ret = pp_post_gpu_send(ctx, qpn);
if (ret) {
// TODO: rollback gpu send and kernel launch
fprintf(stderr, "ERROR: can't post GPU send\n");
i = -ret;
break;
Expand Down Expand Up @@ -657,6 +731,7 @@ int main(int argc, char *argv[])
int gds_qp_type = 1;
int sched_mode = CU_CTX_SCHED_AUTO;
int ret = 0;
int use_desc_apis = 0;

MPI_CHECK(MPI_Init(&argc, &argv));
MPI_CHECK(MPI_Comm_size(MPI_COMM_WORLD, &comm_size));
Expand Down Expand Up @@ -706,10 +781,11 @@ int main(int argc, char *argv[])
{ .name = "time-gds-ops", .has_arg = 0, .val = 'T' },
{ .name = "qp-kind", .has_arg = 1, .val = 'K' },
{ .name = "gpu-sched-mode", .has_arg = 1, .val = 'M' },
{ .name = "use-desc-api", .has_arg = 0, .val = 'U' },
{ 0 }
};

c = getopt_long(argc, argv, "p:d:i:s:r:n:l:eg:G:K:S:B:PCDQTM:", long_options, NULL);
c = getopt_long(argc, argv, "p:d:i:s:r:n:l:eg:G:K:S:B:PCDQTM:U", long_options, NULL);
if (c == -1)
break;

Expand Down Expand Up @@ -821,6 +897,11 @@ int main(int argc, char *argv[])
}
break;

case 'U':
use_desc_apis = 1;
printf("INFO: use_desc_apis=%d\n", use_desc_apis);
break;

default:
usage(argv[0]);
return 1;
Expand Down Expand Up @@ -897,7 +978,7 @@ int main(int argc, char *argv[])
}
}

ctx = pp_init_ctx(ib_dev, size, calc_size, rx_depth, ib_port, 0, gpu_id, peersync, peersync_gpu_cq, peersync_gpu_dbrec, consume_rx_cqe, sched_mode);
ctx = pp_init_ctx(ib_dev, size, calc_size, rx_depth, ib_port, 0, gpu_id, peersync, peersync_gpu_cq, peersync_gpu_dbrec, consume_rx_cqe, sched_mode, use_desc_apis);
if (!ctx)
return 1;

Expand Down Expand Up @@ -1226,20 +1307,25 @@ int main(int argc, char *argv[])
ret = 1;
}

{
float usec = (end.tv_sec - start.tv_sec) * 1000000 +
(end.tv_usec - start.tv_usec) + pre_post_us;
long long bytes = (long long) size * iters * 2;

printf("[%d] %lld bytes in %.2f seconds = %.2f Mbit/sec\n",
my_rank, bytes, usec / 1000000., bytes * 8. / usec);
printf("[%d] %d iters in %.2f seconds = %.2f usec/iter\n",
my_rank, iters, usec / 1000000., usec / iters);
}

if (prof_enabled(&prof)) {
printf("dumping prof\n");
prof_dump(&prof);
int rid;
for (rid = 0; rid < comm_size; ++rid) {
MPI_Barrier(MPI_COMM_WORLD);
if (my_rank == rid) {
float usec = (end.tv_sec - start.tv_sec) * 1000000 +
(end.tv_usec - start.tv_usec) + pre_post_us;
long long bytes = (long long) size * iters * 2;

printf("[%d] %lld bytes in %.2f seconds = %.2f Mbit/sec\n",
my_rank, bytes, usec / 1000000., bytes * 8. / usec);
printf("[%d] %d iters in %.2f seconds = %.2f usec/iter\n",
my_rank, iters, usec / 1000000., usec / iters);

if (prof_enabled(&prof)) {
printf("[%d] dumping prof\n", my_rank);
prof_dump(&prof);
}
}
}

//ibv_ack_cq_events(ctx->cq, num_cq_events);
Expand Down
Loading