GPU StorageBoost
GPU StorageBoost™ (GSB) optimizes data transfers between NVMe storage and GPUs for maximum performance and efficiency. It eliminates CPU involvement in I/O operations, allowing more CPU cores to focus on application performance.
File APIs are provided as an abstraction layer to use GSB features in the existing AI framework (e.g., Microsoft DeepSpeed).
Example
Here is an example of using GSB APIs.
#include <libmango.h>
int main()
{
// create a GSB handle
mango_file_gsb_h gsb;
mango_file_gsb_create(&gsb);
// enable GSB
mango_file_gsb_enable(gsb);
// create a GPU handle with the first AMD GPU device
mango_gpu_h gpu;
mango_gpu_create (&gpu);
mango_gpu_open (gpu, MANGO_GPU_AMD, 0)
// allocate GPU memory
size_t size = ...;
void *gpu_buf;
mango_gpu_alloc_data (gpu, size, &gpu_buf);
// register GPU memory
mango_file_gmem_h gmem;
mango_file_register_gmem(gsb, gpu_buf, size, &gmem);
// read from file to GPU memory
int fd = open (...);
loff_t file_offset = ...;
loff_t gpu_offset = ...;
while (1) {
mango_file_read(gsb, fd, gpu_buf, size, file_offset, gpu_offset);
...
}
// unregister and free GPU memory
mango_file_unregister_gmem(gmem);
mango_gpu_free_data (gpu, gpu_buf);
mango_gpu_destroy (gpu);
// disable GSB
mango_file_gsb_disable(gsb);
mango_file_gsb_delete(gsb);
return 0;
}
Datatypes
mango_file_gmem_h
typedef void * mango_file_gmem_h;
A handle of mango file gpu memory.
mango_file_gsb_h
typedef void * mango_file_gsb_h;
A handle of mango gpu storage boost.
mango_file_stream_h
typedef void * mango_file_stream_h;
A handle of mango file stream.
Functions
mango_file_create_stream
mango_status_e mango_file_create_stream(mango_file_stream_h *stream,
unsigned depth)
Create a stream for async operations of mango file.
Parameters
- out
streamThe mango file stream. - in
depthThe stream depth.
Returns
0 on success, Otherwise, a negative error value.
mango_file_delete_stream
mango_status_e mango_file_delete_stream(mango_file_stream_h stream)
Delete a stream for async operations of mango file.
Parameters
- in
streamThe mango file stream.
Returns
0 on success, Otherwise, a negative error value.
mango_file_gsb_create
mango_status_e mango_file_gsb_create(mango_file_gsb_h *gsb)
Create GSB handle.
Parameters
- out
gsbThe gpu storage boost handle.
Returns
0 on success, Otherwise, a negative error value.
mango_file_gsb_delete
mango_status_e mango_file_gsb_delete(mango_file_gsb_h gsb)
Delete GSB handle.
Parameters
- in
gsbThe gpu storage boost handle.
Returns
0 on success, Otherwise, a negative error value.
Close the FD.
mango_file_gsb_disable
mango_status_e mango_file_gsb_disable(mango_file_gsb_h gsb)
Disable GPU Storage Boost.
Parameters
- in
gsbThe gpu storage boost handle.
Returns
0 on success, Otherwise, a negative error value.
Close the FD of mango fs.
mango_file_gsb_enable
mango_status_e mango_file_gsb_enable(mango_file_gsb_h gsb)
Enable GPU Storage Boost.
Parameters
- in
gsbThe gpu storage boost handle.
Returns
0 on success, Otherwise, a negative error value.
Open the FD of mango fs.
mango_file_read
size_t mango_file_read(mango_file_gsb_h gsb,
int fd,
void *buf_base_addr,
size_t size,
loff_t file_offset,
loff_t buf_offset)
Read from a file to GPU memory.
Parameters
- in
gsbThe gpu storage boost handle. - in
fdThe file descriptor of the target file. - in
buf_base_addrThe base address of GPU memory buffer. - in
sizeThe size of GSB operation. - in
file_offsetThe file offset. - in
buf_offsetThe buffer offset.
Returns The size of file read on success, Otherwise, a negative error value.
mango_file_read_async
mango_status_e mango_file_read_async(mango_file_gsb_h gsb,
int fd,
loff_t file_offset,
void *gpuvaddr,
size_t size,
long **bytes_read_p,
mango_file_stream_h stream)
Async Read from a file to GPU memory.
Parameters
- in
gsbThe gpu storage boost handle. - in
fdThe file descriptor of the target file. - in
file_offsetThe file offset. - in
gpuvaddrThe GPU virtual address. - in
sizeThe size of GSB operation. - in
bytes_read_pThe ptr of the bytes read - in
streamThe GSB stream.
Returns
0 on success, Otherwise, a negative error value.
mango_file_register_gmem
mango_status_e mango_file_register_gmem(mango_file_gsb_h gsb,
const void *gpuvaddr,
size_t size,
mango_file_gmem_h *gmem)
Register GPU memory and Pin the memory.
Parameters
- in
gsbThe gpu storage boost handle. - in
gpuvaddrThe base GPU virtual address. - in
sizeThe GPU memory size. - out
gmemThe gpu memory handle.
Returns
0 on success, Otherwise, a negative error value.
mango_file_stream_sync
mango_status_e mango_file_stream_sync(mango_file_stream_h stream,
useconds_t max_wait,
useconds_t interval)
Synchronize all async operations on the stream.
Parameters
- in
streamThe mango file stream. - in
max_waitThe microseconds to wait. - in
intervalThe microseconds for wait interval.
Returns
0 on success, Otherwise, a negative error value.
mango_file_unregister_gmem
mango_status_e mango_file_unregister_gmem(mango_file_gmem_h gmem)
Unregister GPU memory and Unpin the memory.
Parameters
- in
gmemThe gpu memory handle.
Returns
0 on success, Otherwise, a negative error value.
mango_file_write
size_t mango_file_write(mango_file_gsb_h gsb,
int fd,
void *buf_base_addr,
size_t size,
loff_t file_offset,
loff_t buf_offset)
Write from a file to GPU memory.
Parameters
- in
gsbThe gpu storage boost handle. - in
fdThe file descriptor of the target file. - in
buf_base_addrThe base address of GPU memory buffer. - in
sizeThe size of GSB operation. - in
file_offsetThe file offset. - in
buf_offsetThe buffer offset.
Returns The size of file write on success, Otherwise, a negative error value.
mango_file_write_async
mango_status_e mango_file_write_async(mango_file_gsb_h gsb,
int fd,
loff_t file_offset,
void *gpuvaddr,
size_t size,
long **bytes_write_p,
mango_file_stream_h stream)
Async directly from a file to GPU memory.
Parameters
- in
gsbThe gpu storage boost handle. - in
fdThe file descriptor of the target file. - in
file_offsetThe file offset. - in
gpuvaddrThe GPU virtual address. - in
sizeThe size of GSB operation. - in
bytes_write_pThe ptr of the bytes write - in
streamThe GSB stream.
Returns
0 on success, Otherwise, a negative error value.