From 083a8ca09967c0b1cde3e44f50fd238d8a03d0e4 Mon Sep 17 00:00:00 2001 From: ronnywang Date: Thu, 2 Feb 2023 15:19:42 +0800 Subject: [PATCH] [CustomDevice] refine custom device api (#5604) * [CustomDevice] refine custom device api * update --- .../custom_device_docs/ccl_api_cn.md | 2 +- .../custom_device_docs/custom_runtime_cn.rst | 292 +++++++++--------- .../custom_device_docs/custom_runtime_en.rst | 256 +++++++++------ .../custom_device_docs/memory_api_cn.md | 10 +- .../custom_device_docs/memory_api_en.md | 10 +- .../custom_device_docs/stream_api_cn.md | 8 +- .../custom_device_docs/stream_api_en.md | 8 +- 7 files changed, 319 insertions(+), 267 deletions(-) diff --git a/docs/dev_guides/custom_device_docs/ccl_api_cn.md b/docs/dev_guides/custom_device_docs/ccl_api_cn.md index 5abdf8f8084..bd757990b03 100644 --- a/docs/dev_guides/custom_device_docs/ccl_api_cn.md +++ b/docs/dev_guides/custom_device_docs/ccl_api_cn.md @@ -222,7 +222,7 @@ C_Status (*xccl_group_end)() ### 接口说明 -结束集合通迅操作聚合。 +停止集合通迅操作聚合。 ## xccl_send 【optional】 diff --git a/docs/dev_guides/custom_device_docs/custom_runtime_cn.rst b/docs/dev_guides/custom_device_docs/custom_runtime_cn.rst index 5604769597e..1bb55535ddc 100644 --- a/docs/dev_guides/custom_device_docs/custom_runtime_cn.rst +++ b/docs/dev_guides/custom_device_docs/custom_runtime_cn.rst @@ -16,177 +16,177 @@ Device 接口 ############ -+------------------------+--------------------------+ -| 接口名称 | 功能简介 | -+========================+==========================+ -| initialize | 初始化硬件后端。 | -+------------------------+--------------------------+ -| finalize | 去初始化硬件后端。 | -+------------------------+--------------------------+ -| init_device | 初始化指定硬件设备。 | -+------------------------+--------------------------+ -| deinit_device | 去初始化指定硬件设备。 | -+------------------------+--------------------------+ -| set_device | 设置当前使用的硬件设备。 | -+------------------------+--------------------------+ -| get_device | 获取当前使用的硬件设备。 | -+------------------------+--------------------------+ -| synchronize_device | 同步指定的硬件设备。 | -+------------------------+--------------------------+ -| get_device_count | 查询可用设备数量。 | -+------------------------+--------------------------+ -| get_device_list | 查询可用设备号。 | -+------------------------+--------------------------+ -| get_compute_capability | 查询设备算力。 | -+------------------------+--------------------------+ -| get_runtime_version | 查询运行时版本号。 | -+------------------------+--------------------------+ -| get_driver_version | 查询驱动版本号。 | -+------------------------+--------------------------+ ++------------------------+--------------------------+------+ +| 接口名称 | 功能简介 | 必选 | ++========================+==========================+======+ +| initialize | 初始化硬件后端。 | N | ++------------------------+--------------------------+------+ +| finalize | 去初始化硬件后端。 | N | ++------------------------+--------------------------+------+ +| init_device | 初始化指定硬件设备。 | N | ++------------------------+--------------------------+------+ +| deinit_device | 去初始化指定硬件设备。 | N | ++------------------------+--------------------------+------+ +| set_device | 设置当前使用的硬件设备。 | Y | ++------------------------+--------------------------+------+ +| get_device | 获取当前使用的硬件设备。 | Y | ++------------------------+--------------------------+------+ +| synchronize_device | 同步指定的硬件设备。 | Y | ++------------------------+--------------------------+------+ +| get_device_count | 查询可用设备数量。 | Y | ++------------------------+--------------------------+------+ +| get_device_list | 查询可用设备号。 | Y | ++------------------------+--------------------------+------+ +| get_compute_capability | 查询设备算力。 | Y | ++------------------------+--------------------------+------+ +| get_runtime_version | 查询运行时版本号。 | Y | ++------------------------+--------------------------+------+ +| get_driver_version | 查询驱动版本号。 | Y | ++------------------------+--------------------------+------+ Memory 接口 ############ -+---------------------------+------------------------------+ -| 接口名称 | 功能简介 | -+===========================+==============================+ -| device_memory_allocate | 分配设备内存。 | -+---------------------------+------------------------------+ -| device_memory_deallocate | 释放设备内存。 | -+---------------------------+------------------------------+ -| host_memory_allocate | 分配主机锁页内存。 | -+---------------------------+------------------------------+ -| host_memory_deallocate | 释放主机锁页内存。 | -+---------------------------+------------------------------+ -| unified_memory_allocate | 分配统一地址空间内存。 | -+---------------------------+------------------------------+ -| unified_memory_deallocate | 释放统一地址空间内存。 | -+---------------------------+------------------------------+ -| memory_copy_h2d | 主机到设备的同步内存拷贝。 | -+---------------------------+------------------------------+ -| memory_copy_d2h | 设备到主机的同步内存拷贝。 | -+---------------------------+------------------------------+ -| memory_copy_d2d | 设备内同步内存拷贝。 | -+---------------------------+------------------------------+ -| memory_copy_p2d | 设备间同步内存拷贝。 | -+---------------------------+------------------------------+ -| async_memory_copy_h2d | 主机到设备异步内存拷贝。 | -+---------------------------+------------------------------+ -| async_memory_copy_d2h | 设备到主机异步内存拷贝。 | -+---------------------------+------------------------------+ -| async_memory_copy_d2d | 设备内异步内存拷贝。 | -+---------------------------+------------------------------+ -| async_memory_copy_p2d | 设备间异步内存拷贝。 | -+---------------------------+------------------------------+ -| device_memory_set | 填充设备内存。 | -+---------------------------+------------------------------+ -| device_memory_stats | 设备内存使用统计。 | -+---------------------------+------------------------------+ -| device_min_chunk_size | 查询设备内存最小块大小。 | -+---------------------------+------------------------------+ -| device_max_chunk_size | 查询设备内存最大块大小。 | -+---------------------------+------------------------------+ -| device_max_alloc_size | 查询设备最大可分配内存大小。 | -+---------------------------+------------------------------+ -| device_extra_padding_size | 查询设备内存额外填充大小。 | -+---------------------------+------------------------------+ -| device_init_alloc_size | 查询设备初始化分配内存大小。 | -+---------------------------+------------------------------+ -| device_realloc_size | 查询设备重分配内存大小。 | -+---------------------------+------------------------------+ ++---------------------------+------------------------------+------+ +| 接口名称 | 功能简介 | 必选 | ++===========================+==============================+======+ +| device_memory_allocate | 分配设备内存。 | Y | ++---------------------------+------------------------------+------+ +| device_memory_deallocate | 释放设备内存。 | Y | ++---------------------------+------------------------------+------+ +| host_memory_allocate | 分配主机锁页内存。 | N | ++---------------------------+------------------------------+------+ +| host_memory_deallocate | 释放主机锁页内存。 | N | ++---------------------------+------------------------------+------+ +| unified_memory_allocate | 分配统一地址空间内存。 | N | ++---------------------------+------------------------------+------+ +| unified_memory_deallocate | 释放统一地址空间内存。 | N | ++---------------------------+------------------------------+------+ +| memory_copy_h2d | 主机到设备的同步内存拷贝。 | N | ++---------------------------+------------------------------+------+ +| memory_copy_d2h | 设备到主机的同步内存拷贝。 | N | ++---------------------------+------------------------------+------+ +| memory_copy_d2d | 设备内同步内存拷贝。 | N | ++---------------------------+------------------------------+------+ +| memory_copy_p2d | 设备间同步内存拷贝。 | N | ++---------------------------+------------------------------+------+ +| async_memory_copy_h2d | 主机到设备异步内存拷贝。 | N | ++---------------------------+------------------------------+------+ +| async_memory_copy_d2h | 设备到主机异步内存拷贝。 | N | ++---------------------------+------------------------------+------+ +| async_memory_copy_d2d | 设备内异步内存拷贝。 | N | ++---------------------------+------------------------------+------+ +| async_memory_copy_p2d | 设备间异步内存拷贝。 | N | ++---------------------------+------------------------------+------+ +| device_memory_set | 填充设备内存。 | N | ++---------------------------+------------------------------+------+ +| device_memory_stats | 设备内存使用统计。 | N | ++---------------------------+------------------------------+------+ +| device_min_chunk_size | 查询设备内存最小块大小。 | N | ++---------------------------+------------------------------+------+ +| device_max_chunk_size | 查询设备内存最大块大小。 | N | ++---------------------------+------------------------------+------+ +| device_max_alloc_size | 查询设备最大可分配内存大小。 | N | ++---------------------------+------------------------------+------+ +| device_extra_padding_size | 查询设备内存额外填充大小。 | N | ++---------------------------+------------------------------+------+ +| device_init_alloc_size | 查询设备初始化分配内存大小。 | N | ++---------------------------+------------------------------+------+ +| device_realloc_size | 查询设备重分配内存大小。 | N | ++---------------------------+------------------------------+------+ Stream 接口 ############ -+---------------------+-------------------------------------------+ -| 接口名称 | 功能简介 | -+=====================+===========================================+ -| create_stream | 创建一个 stream 对象。 | -+---------------------+-------------------------------------------+ -| destroy_stream | 销毁一个 stream 对象。 | -+---------------------+-------------------------------------------+ -| query_stream | 查询 stream 上任务是否完成。 | -+---------------------+-------------------------------------------+ -| synchronize_stream | 同步 stream,等待 stream 上所有任务完成。 | -+---------------------+-------------------------------------------+ -| stream_add_callback | 添加一个主机回调到 stream 上。 | -+---------------------+-------------------------------------------+ -| stream_wait_event | 等待 stream 上的一个 event 完成。 | -+---------------------+-------------------------------------------+ ++---------------------+-------------------------------------------+------+ +| 接口名称 | 功能简介 | 必选 | ++=====================+===========================================+======+ +| create_stream | 创建一个 stream 对象。 | N | ++---------------------+-------------------------------------------+------+ +| destroy_stream | 销毁一个 stream 对象。 | N | ++---------------------+-------------------------------------------+------+ +| query_stream | 查询 stream 上任务是否完成。 | N | ++---------------------+-------------------------------------------+------+ +| synchronize_stream | 同步 stream,等待 stream 上所有任务完成。 | N | ++---------------------+-------------------------------------------+------+ +| stream_add_callback | 添加一个主机回调到 stream 上。 | N | ++---------------------+-------------------------------------------+------+ +| stream_wait_event | 等待 stream 上的一个 event 完成。 | N | ++---------------------+-------------------------------------------+------+ Event 接口 ############ -+-------------------+-------------------------------+ -| 接口名称 | 功能简介 | -+===================+===============================+ -| create_event | 创建一个 event 对象。 | -+-------------------+-------------------------------+ -| destroy_event | 销毁一个 event 对象。 | -+-------------------+-------------------------------+ -| record_event | 在 stream 上记录 event。 | -+-------------------+-------------------------------+ -| query_event | 查询 event 是否完成。 | -+-------------------+-------------------------------+ -| synchronize_event | 同步 event,等待 event 完成。 | -+-------------------+-------------------------------+ ++-------------------+-------------------------------+------+ +| 接口名称 | 功能简介 | 必选 | ++===================+===============================+======+ +| create_event | 创建一个 event 对象。 | Y | ++-------------------+-------------------------------+------+ +| destroy_event | 销毁一个 event 对象。 | Y | ++-------------------+-------------------------------+------+ +| record_event | 在 stream 上记录 event。 | Y | ++-------------------+-------------------------------+------+ +| query_event | 查询 event 是否完成。 | N | ++-------------------+-------------------------------+------+ +| synchronize_event | 同步 event,等待 event 完成。 | Y | ++-------------------+-------------------------------+------+ 集合通讯接口 ############ -+-------------------------+-------------------------------+ -| 接口名称 | 功能简介 | -+=========================+===============================+ -| xccl_get_unique_id_size | 获取 unique_id 对象的大小。 | -+-------------------------+-------------------------------+ -| xccl_get_unique_id | 获取 unique_id 对象。 | -+-------------------------+-------------------------------+ -| xccl_comm_init_rank | 初始化 communicator。 | -+-------------------------+-------------------------------+ -| xccl_destroy_comm | 销毁 communicator。 | -+-------------------------+-------------------------------+ -| xccl_all_reduce | 集合通讯 AllReduce 操作。 | -+-------------------------+-------------------------------+ -| xccl_broadcast | 集合通讯 Broadcast 操作。 | -+-------------------------+-------------------------------+ -| xccl_reduce | 集合通讯 Reduce 操作。 | -+-------------------------+-------------------------------+ -| xccl_all_gather | 集合通讯 AllGather 操作。 | -+-------------------------+-------------------------------+ -| xccl_reduce_scatter | 集合通讯 ReduceScatter 操作。 | -+-------------------------+-------------------------------+ -| xccl_group_start | 集合通讯 AllReduce 操作。 | -+-------------------------+-------------------------------+ -| xccl_group_end | 集合通讯 AllReduce 操作。 | -+-------------------------+-------------------------------+ -| xccl_send | 集合通讯 Send 操作。 | -+-------------------------+-------------------------------+ -| xccl_recv | 集合通讯 Recv 操作。 | -+-------------------------+-------------------------------+ ++-------------------------+-------------------------------+------+ +| 接口名称 | 功能简介 | 必选 | ++=========================+===============================+======+ +| xccl_get_unique_id_size | 获取 unique_id 对象的大小。 | N | ++-------------------------+-------------------------------+------+ +| xccl_get_unique_id | 获取 unique_id 对象。 | N | ++-------------------------+-------------------------------+------+ +| xccl_comm_init_rank | 初始化 communicator。 | N | ++-------------------------+-------------------------------+------+ +| xccl_destroy_comm | 销毁 communicator。 | N | ++-------------------------+-------------------------------+------+ +| xccl_all_reduce | 集合通讯 AllReduce 操作。 | N | ++-------------------------+-------------------------------+------+ +| xccl_broadcast | 集合通讯 Broadcast 操作。 | N | ++-------------------------+-------------------------------+------+ +| xccl_reduce | 集合通讯 Reduce 操作。 | N | ++-------------------------+-------------------------------+------+ +| xccl_all_gather | 集合通讯 AllGather 操作。 | N | ++-------------------------+-------------------------------+------+ +| xccl_reduce_scatter | 集合通讯 ReduceScatter 操作。 | N | ++-------------------------+-------------------------------+------+ +| xccl_group_start | 开始集合通迅操作聚合。 | N | ++-------------------------+-------------------------------+------+ +| xccl_group_end | 停止集合通迅操作聚合。 | N | ++-------------------------+-------------------------------+------+ +| xccl_send | 集合通讯 Send 操作。 | N | ++-------------------------+-------------------------------+------+ +| xccl_recv | 集合通讯 Recv 操作。 | N | ++-------------------------+-------------------------------+------+ Profiler 接口 ############ -+-----------------------------+-----------------------+ -| 接口名称 | 功能简介 | -+=============================+=======================+ -| profiler_initialize | 初始化硬件 Profiler | -+-----------------------------+-----------------------+ -| profiler_finalize | 去初始化硬件 Profiler | -+-----------------------------+-----------------------+ -| profiler_prepare_tracing | Profiler 准备收集数据 | -+-----------------------------+-----------------------+ -| profiler_start_tracing | Profiler 开始收集数据 | -+-----------------------------+-----------------------+ -| profiler_stop_tracing | Profiler 停止收集数据 | -+-----------------------------+-----------------------+ -| profiler_collect_trace_data | Profiler 数据转换 | -+-----------------------------+-----------------------+ ++-----------------------------+-----------------------+------+ +| 接口名称 | 功能简介 | 必选 | ++=============================+=======================+======+ +| profiler_initialize | 初始化硬件 Profiler | N | ++-----------------------------+-----------------------+------+ +| profiler_finalize | 去初始化硬件 Profiler | N | ++-----------------------------+-----------------------+------+ +| profiler_prepare_tracing | Profiler 准备收集数据 | N | ++-----------------------------+-----------------------+------+ +| profiler_start_tracing | Profiler 开始收集数据 | N | ++-----------------------------+-----------------------+------+ +| profiler_stop_tracing | Profiler 停止收集数据 | N | ++-----------------------------+-----------------------+------+ +| profiler_collect_trace_data | Profiler 数据转换 | N | ++-----------------------------+-----------------------+------+ .. toctree:: diff --git a/docs/dev_guides/custom_device_docs/custom_runtime_en.rst b/docs/dev_guides/custom_device_docs/custom_runtime_en.rst index e15ddea1005..ec9e423b1cf 100644 --- a/docs/dev_guides/custom_device_docs/custom_runtime_en.rst +++ b/docs/dev_guides/custom_device_docs/custom_runtime_en.rst @@ -14,124 +14,176 @@ Custom Runtime offers a new method to register the runtime of new devices via pl Device APIs ############ -+------------------------+----------------------------------------+ -| API | Function | -+========================+========================================+ -| initialize | To initialize the device backend | -+------------------------+----------------------------------------+ -| finalize | To de-initialize the device backend | -+------------------------+----------------------------------------+ -| init_device | To initialize the designated device | -+------------------------+----------------------------------------+ -| deinit_device | To de-initialize the designated device | -+------------------------+----------------------------------------+ -| set_device | To set the current device | -+------------------------+----------------------------------------+ -| get_device | To get the current device | -+------------------------+----------------------------------------+ -| synchronize_device | To synchronize the desginated device | -+------------------------+----------------------------------------+ -| get_device_count | To count available devices | -+------------------------+----------------------------------------+ -| get_device_list | To get the list of available devices | -+------------------------+----------------------------------------+ -| get_compute_capability | To get computing capability of devices | -+------------------------+----------------------------------------+ -| get_runtime_version | To get the runtime version | -+------------------------+----------------------------------------+ -| get_driver_version | To get the driver version | -+------------------------+----------------------------------------+ ++------------------------+----------------------------------------+----------+ +| API | Function | Required | ++========================+========================================+==========+ +| initialize | To initialize the device backend | N | ++------------------------+----------------------------------------+----------+ +| finalize | To de-initialize the device backend | N | ++------------------------+----------------------------------------+----------+ +| init_device | To initialize the designated device | N | ++------------------------+----------------------------------------+----------+ +| deinit_device | To de-initialize the designated device | N | ++------------------------+----------------------------------------+----------+ +| set_device | To set the current device | Y | ++------------------------+----------------------------------------+----------+ +| get_device | To get the current device | Y | ++------------------------+----------------------------------------+----------+ +| synchronize_device | To synchronize the desginated device | Y | ++------------------------+----------------------------------------+----------+ +| get_device_count | To count available devices | Y | ++------------------------+----------------------------------------+----------+ +| get_device_list | To get the list of available devices | Y | ++------------------------+----------------------------------------+----------+ +| get_compute_capability | To get computing capability of devices | Y | ++------------------------+----------------------------------------+----------+ +| get_runtime_version | To get the runtime version | Y | ++------------------------+----------------------------------------+----------+ +| get_driver_version | To get the driver version | Y | ++------------------------+----------------------------------------+----------+ Memory APIs ############ -+---------------------------+-------------------------------------------------------------------+ -| API | Function | -+===========================+===================================================================+ -| device_memory_allocate | To allocate the device memory | -+---------------------------+-------------------------------------------------------------------+ -| device_memory_deallocate | To deallocate the device memory | -+---------------------------+-------------------------------------------------------------------+ -| host_memory_allocate | To allocate pinned host memory | -+---------------------------+-------------------------------------------------------------------+ -| host_memory_deallocate | To deallocate pinned host memory | -+---------------------------+-------------------------------------------------------------------+ -| unified_memory_allocate | To allocated unified memory | -+---------------------------+-------------------------------------------------------------------+ -| unified_memory_deallocate | To deallocate unified memory | -+---------------------------+-------------------------------------------------------------------+ -| memory_copy_h2d | To copy synchronous memory from host to device | -+---------------------------+-------------------------------------------------------------------+ -| memory_copy_d2h | To copy synchronous memory from device to host | -+---------------------------+-------------------------------------------------------------------+ -| memory_copy_d2d | To copy synchronous memory in the device | -+---------------------------+-------------------------------------------------------------------+ -| memory_copy_p2d | To copy synchronous memory between devices | -+---------------------------+-------------------------------------------------------------------+ -| async_memory_copy_h2d | To copy asynchronous memory from host to device | -+---------------------------+-------------------------------------------------------------------+ -| async_memory_copy_d2h | To copy asynchronous memory from device to host | -+---------------------------+-------------------------------------------------------------------+ -| async_memory_copy_d2d | To copy asynchronous memory in the device | -+---------------------------+-------------------------------------------------------------------+ -| async_memory_copy_p2d | To copy asynchronous memory between devices | -+---------------------------+-------------------------------------------------------------------+ -| device_memory_set | To fill the device memory | -+---------------------------+-------------------------------------------------------------------+ -| device_memory_stats | To measure device memory utilization | -+---------------------------+-------------------------------------------------------------------+ -| device_min_chunk_size | To check the minimum size of device memory chunks | -+---------------------------+-------------------------------------------------------------------+ -| device_max_chunk_size | To check the maximum size of device memory chunks | -+---------------------------+-------------------------------------------------------------------+ -| device_max_alloc_size | To check the maximum size of allocatable device memory | -+---------------------------+-------------------------------------------------------------------+ -| device_extra_padding_size | To check the extra padding size of device memory | -+---------------------------+-------------------------------------------------------------------+ -| device_init_alloc_size | To check the size of allocated device memory after initialization | -+---------------------------+-------------------------------------------------------------------+ -| device_realloc_size | To check the size of reallocated device memory | -+---------------------------+-------------------------------------------------------------------+ ++---------------------------+-------------------------------------------------------------------+----------+ +| API | Function | Required | ++===========================+===================================================================+==========+ +| device_memory_allocate | To allocate the device memory | Y | ++---------------------------+-------------------------------------------------------------------+----------+ +| device_memory_deallocate | To deallocate the device memory | Y | ++---------------------------+-------------------------------------------------------------------+----------+ +| host_memory_allocate | To allocate pinned host memory | N | ++---------------------------+-------------------------------------------------------------------+----------+ +| host_memory_deallocate | To deallocate pinned host memory | N | ++---------------------------+-------------------------------------------------------------------+----------+ +| unified_memory_allocate | To allocated unified memory | N | ++---------------------------+-------------------------------------------------------------------+----------+ +| unified_memory_deallocate | To deallocate unified memory | N | ++---------------------------+-------------------------------------------------------------------+----------+ +| memory_copy_h2d | To copy synchronous memory from host to device | N | ++---------------------------+-------------------------------------------------------------------+----------+ +| memory_copy_d2h | To copy synchronous memory from device to host | N | ++---------------------------+-------------------------------------------------------------------+----------+ +| memory_copy_d2d | To copy synchronous memory in the device | N | ++---------------------------+-------------------------------------------------------------------+----------+ +| memory_copy_p2d | To copy synchronous memory between devices | N | ++---------------------------+-------------------------------------------------------------------+----------+ +| async_memory_copy_h2d | To copy asynchronous memory from host to device | N | ++---------------------------+-------------------------------------------------------------------+----------+ +| async_memory_copy_d2h | To copy asynchronous memory from device to host | N | ++---------------------------+-------------------------------------------------------------------+----------+ +| async_memory_copy_d2d | To copy asynchronous memory in the device | N | ++---------------------------+-------------------------------------------------------------------+----------+ +| async_memory_copy_p2d | To copy asynchronous memory between devices | N | ++---------------------------+-------------------------------------------------------------------+----------+ +| device_memory_set | To fill the device memory | N | ++---------------------------+-------------------------------------------------------------------+----------+ +| device_memory_stats | To measure device memory utilization | N | ++---------------------------+-------------------------------------------------------------------+----------+ +| device_min_chunk_size | To check the minimum size of device memory chunks | N | ++---------------------------+-------------------------------------------------------------------+----------+ +| device_max_chunk_size | To check the maximum size of device memory chunks | N | ++---------------------------+-------------------------------------------------------------------+----------+ +| device_max_alloc_size | To check the maximum size of allocatable device memory | N | ++---------------------------+-------------------------------------------------------------------+----------+ +| device_extra_padding_size | To check the extra padding size of device memory | N | ++---------------------------+-------------------------------------------------------------------+----------+ +| device_init_alloc_size | To check the size of allocated device memory after initialization | N | ++---------------------------+-------------------------------------------------------------------+----------+ +| device_realloc_size | To check the size of reallocated device memory | N | ++---------------------------+-------------------------------------------------------------------+----------+ Stream APIs ############ -+---------------------+-------------------------------------------------------------------+ -| API | Function | -+=====================+===================================================================+ -| create_stream | To create a stream object | -+---------------------+-------------------------------------------------------------------+ -| destroy_stream | To destroy a stream object | -+---------------------+-------------------------------------------------------------------+ -| query_stream | To query whether all the tasks on the stream are done | -+---------------------+-------------------------------------------------------------------+ -| synchronize_stream | To synchronize the stream and wait for the completion of all tasks| -+---------------------+-------------------------------------------------------------------+ -| stream_add_callback | To add a host and call it back on the stream | -+---------------------+-------------------------------------------------------------------+ -| stream_wait_event | To wait for the completion of an event on the stream | -+---------------------+-------------------------------------------------------------------+ ++---------------------+--------------------------------------------------------------------+----------+ +| API | Function | Required | ++=====================+====================================================================+==========+ +| create_stream | To create a stream object | N | ++---------------------+--------------------------------------------------------------------+----------+ +| destroy_stream | To destroy a stream object | N | ++---------------------+--------------------------------------------------------------------+----------+ +| query_stream | To query whether all the tasks on the stream are done | N | ++---------------------+--------------------------------------------------------------------+----------+ +| synchronize_stream | To synchronize the stream and wait for the completion of all tasks | N | ++---------------------+--------------------------------------------------------------------+----------+ +| stream_add_callback | To add a host and call it back on the stream | N | ++---------------------+--------------------------------------------------------------------+----------+ +| stream_wait_event | To wait for the completion of an event on the stream | N | ++---------------------+--------------------------------------------------------------------+----------+ Event APIs ############ -+-------------------+---------------------------------------------------------+ -| API | Function | -+===================+=========================================================+ -| create_event | To create an event | -+-------------------+---------------------------------------------------------+ -| destroy_event | To destroy an event | -+-------------------+---------------------------------------------------------+ -| record_event | To record an event on the stream | -+-------------------+---------------------------------------------------------+ -| query_event | To query whether the event is done | -+-------------------+---------------------------------------------------------+ -| synchronize_event | To synchronize the event and wait for its completion | -+-------------------+---------------------------------------------------------+ ++-------------------+------------------------------------------------------+----------+ +| API | Function | Required | ++===================+======================================================+==========+ +| create_event | To create an event | Y | ++-------------------+------------------------------------------------------+----------+ +| destroy_event | To destroy an event | Y | ++-------------------+------------------------------------------------------+----------+ +| record_event | To record an event on the stream | Y | ++-------------------+------------------------------------------------------+----------+ +| query_event | To query whether the event is done | N | ++-------------------+------------------------------------------------------+----------+ +| synchronize_event | To synchronize the event and wait for its completion | Y | ++-------------------+------------------------------------------------------+----------+ + +Collective communication APIs +############ + ++-------------------------+---------------------------------------------------------+----------+ +| API | Function | Required | ++=========================+=========================================================+==========+ +| xccl_get_unique_id_size | Get the size of unique_id object | N | ++-------------------------+---------------------------------------------------------+----------+ +| xccl_get_unique_id | Get unique_id object | N | ++-------------------------+---------------------------------------------------------+----------+ +| xccl_comm_init_rank | To initialize communicator。 | N | ++-------------------------+---------------------------------------------------------+----------+ +| xccl_destroy_comm | To destroy communicator。 | N | ++-------------------------+---------------------------------------------------------+----------+ +| xccl_all_reduce | Collective communication AllReduce operation | N | ++-------------------------+---------------------------------------------------------+----------+ +| xccl_broadcast | Collective communication Broadcast operation | N | ++-------------------------+---------------------------------------------------------+----------+ +| xccl_reduce | Collective communication Reduce operation | N | ++-------------------------+---------------------------------------------------------+----------+ +| xccl_all_gather | Collective communication AllGather operation | N | ++-------------------------+---------------------------------------------------------+----------+ +| xccl_reduce_scatter | Collective communication ReduceScatter operation | N | ++-------------------------+---------------------------------------------------------+----------+ +| xccl_group_start | Begin aggregation of collection communication operation | N | ++-------------------------+---------------------------------------------------------+----------+ +| xccl_group_end | Stop aggregation of collection communication operation | N | ++-------------------------+---------------------------------------------------------+----------+ +| xccl_send | Collective communication Send operation | N | ++-------------------------+---------------------------------------------------------+----------+ +| xccl_recv | Collective communication Recv operation | N | ++-------------------------+---------------------------------------------------------+----------+ + + +Profiler APIs +############ ++-----------------------------+-----------------------------------+----------+ +| API | Function | Required | ++=============================+===================================+==========+ +| profiler_initialize | To initialize profiler | N | ++-----------------------------+-----------------------------------+----------+ +| profiler_finalize | To de-initialize profiler | N | ++-----------------------------+-----------------------------------+----------+ +| profiler_prepare_tracing | Prepare to collect profiling data | N | ++-----------------------------+-----------------------------------+----------+ +| profiler_start_tracing | Start collecting profiling data | N | ++-----------------------------+-----------------------------------+----------+ +| profiler_stop_tracing | Stop collecting profiling data | N | ++-----------------------------+-----------------------------------+----------+ +| profiler_collect_trace_data | Profiler data conversion | N | ++-----------------------------+-----------------------------------+----------+ .. toctree:: :hidden: diff --git a/docs/dev_guides/custom_device_docs/memory_api_cn.md b/docs/dev_guides/custom_device_docs/memory_api_cn.md index b808a2d7f94..1996f4c661b 100644 --- a/docs/dev_guides/custom_device_docs/memory_api_cn.md +++ b/docs/dev_guides/custom_device_docs/memory_api_cn.md @@ -120,7 +120,7 @@ ptr - 需要释放的统一地址空间内存地址。 size - 需要释放的内存大小(字节形式)。 -## memory_copy_h2d 【required】 +## memory_copy_h2d 【optional】 ### 接口定义 @@ -142,7 +142,7 @@ src - 源主机内存地址。 size - 需要拷贝的内存大小(字节形式)。 -## memory_copy_d2h 【required】 +## memory_copy_d2h 【optional】 ### 接口定义 @@ -164,7 +164,7 @@ src - 源设备内存地址。 size - 需要拷贝的内存大小(字节形式)。 -## memory_copy_d2d 【required】 +## memory_copy_d2d 【optional】 ### 接口定义 @@ -330,7 +330,7 @@ value - 填充值。 size - 填充大小(字节形式)。 -## device_memory_stats 【required】 +## device_memory_stats 【optional】 ### 接口定义 @@ -350,7 +350,7 @@ total_memory - 总内存(字节形式)。 free_memory - 剩余可用内存(字节形式)。 -## device_min_chunk_size 【required】 +## device_min_chunk_size 【optional】 ### 接口定义 diff --git a/docs/dev_guides/custom_device_docs/memory_api_en.md b/docs/dev_guides/custom_device_docs/memory_api_en.md index 876d0cba3aa..0d6595ae4db 100644 --- a/docs/dev_guides/custom_device_docs/memory_api_en.md +++ b/docs/dev_guides/custom_device_docs/memory_api_en.md @@ -120,7 +120,7 @@ ptr - the address of unified memory needed to be deallocated size - the size of memory needed to be deallocated (in byte) -## memory_copy_h2d 【required】 +## memory_copy_h2d 【optional】 ### Definition @@ -142,7 +142,7 @@ src - the address of the source host memory size - the size of memory needed to be copied (in byte) -## memory_copy_d2h 【required】 +## memory_copy_d2h 【optional】 ### Definition @@ -164,7 +164,7 @@ src - the address of the source device memory size - the size of memory needed to be copied (in byte) -## memory_copy_d2d 【required】 +## memory_copy_d2d 【optional】 ### Definition @@ -330,7 +330,7 @@ value - padded value size - padding size (in byte) -## device_memory_stats 【required】 +## device_memory_stats 【optional】 ### Definition @@ -350,7 +350,7 @@ total_memory - total memory (in byte) free_memory - free memory (in byte) -## device_min_chunk_size 【required】 +## device_min_chunk_size 【optional】 ### Definition diff --git a/docs/dev_guides/custom_device_docs/stream_api_cn.md b/docs/dev_guides/custom_device_docs/stream_api_cn.md index 639da5b2365..e1b741831de 100644 --- a/docs/dev_guides/custom_device_docs/stream_api_cn.md +++ b/docs/dev_guides/custom_device_docs/stream_api_cn.md @@ -1,6 +1,6 @@ # Stream 接口 -## create_stream 【required】 +## create_stream 【optional】 ### 接口定义 @@ -18,7 +18,7 @@ device - 使用的设备。 stream - 存储创建的 stream 对象。 -## destroy_stream 【required】 +## destroy_stream 【optional】 ### 接口定义 @@ -54,7 +54,7 @@ device - 使用的设备。 stream - 需要查询的 stream。 -## synchronize_stream 【required】 +## synchronize_stream 【optional】 ### 接口定义 @@ -94,7 +94,7 @@ callback - 回调函数。 user_data - 回调函数的参数。 -## stream_wait_event 【required】 +## stream_wait_event 【optional】 ### 接口定义 diff --git a/docs/dev_guides/custom_device_docs/stream_api_en.md b/docs/dev_guides/custom_device_docs/stream_api_en.md index d0be594acdf..9c854052773 100644 --- a/docs/dev_guides/custom_device_docs/stream_api_en.md +++ b/docs/dev_guides/custom_device_docs/stream_api_en.md @@ -1,6 +1,6 @@ # Stream APIs -## create_stream 【required】 +## create_stream 【optional】 ### Definition @@ -18,7 +18,7 @@ device - the device to be used stream - the created stream -## destroy_stream 【required】 +## destroy_stream 【optional】 ### Definition @@ -54,7 +54,7 @@ device - the device to be used stream - the stream required to be queried. -## synchronize_stream 【required】 +## synchronize_stream 【optional】 ### Definition @@ -94,7 +94,7 @@ callback - the callback function user_data - parameters of the function -## stream_wait_event 【required】 +## stream_wait_event 【optional】 ### Definition