Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
ENH: Add the __dlpack__ and __dlpack_device__ methods to ndarray.
- Loading branch information
1 parent
3dcd924
commit 830903a
Showing
3 changed files
with
352 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,188 @@ | ||
/*! | ||
* Copyright (c) 2017 by Contributors | ||
* \file dlpack.h | ||
* \brief The common header of DLPack. | ||
*/ | ||
#ifndef DLPACK_DLPACK_H_ | ||
#define DLPACK_DLPACK_H_ | ||
|
||
#ifdef __cplusplus | ||
#define DLPACK_EXTERN_C extern "C" | ||
#else | ||
#define DLPACK_EXTERN_C | ||
#endif | ||
|
||
/*! \brief The current version of dlpack */ | ||
#define DLPACK_VERSION 050 | ||
|
||
/*! \brief DLPACK_DLL prefix for windows */ | ||
#ifdef _WIN32 | ||
#ifdef DLPACK_EXPORTS | ||
#define DLPACK_DLL __declspec(dllexport) | ||
#else | ||
#define DLPACK_DLL __declspec(dllimport) | ||
#endif | ||
#else | ||
#define DLPACK_DLL | ||
#endif | ||
|
||
#include <stdint.h> | ||
#include <stddef.h> | ||
|
||
#ifdef __cplusplus | ||
extern "C" { | ||
#endif | ||
/*! | ||
* \brief The device type in DLDevice. | ||
*/ | ||
typedef enum { | ||
/*! \brief CPU device */ | ||
kDLCPU = 1, | ||
/*! \brief CUDA GPU device */ | ||
kDLCUDA = 2, | ||
/*! | ||
* \brief Pinned CUDA CPU memory by cudaMallocHost | ||
*/ | ||
kDLCUDAHost = 3, | ||
/*! \brief OpenCL devices. */ | ||
kDLOpenCL = 4, | ||
/*! \brief Vulkan buffer for next generation graphics. */ | ||
kDLVulkan = 7, | ||
/*! \brief Metal for Apple GPU. */ | ||
kDLMetal = 8, | ||
/*! \brief Verilog simulator buffer */ | ||
kDLVPI = 9, | ||
/*! \brief ROCm GPUs for AMD GPUs */ | ||
kDLROCM = 10, | ||
/*! | ||
* \brief Reserved extension device type, | ||
* used for quickly test extension device | ||
* The semantics can differ depending on the implementation. | ||
*/ | ||
kDLExtDev = 12, | ||
} DLDeviceType; | ||
|
||
/*! | ||
* \brief A Device for Tensor and operator. | ||
*/ | ||
typedef struct { | ||
/*! \brief The device type used in the device. */ | ||
DLDeviceType device_type; | ||
/*! \brief The device index */ | ||
int device_id; | ||
} DLDevice; | ||
|
||
/*! | ||
* \brief The type code options DLDataType. | ||
*/ | ||
typedef enum { | ||
/*! \brief signed integer */ | ||
kDLInt = 0U, | ||
/*! \brief unsigned integer */ | ||
kDLUInt = 1U, | ||
/*! \brief IEEE floating point */ | ||
kDLFloat = 2U, | ||
/*! | ||
* \brief Opaque handle type, reserved for testing purposes. | ||
* Frameworks need to agree on the handle data type for the exchange to be well-defined. | ||
*/ | ||
kDLOpaqueHandle = 3U, | ||
/*! \brief bfloat16 */ | ||
kDLBfloat = 4U, | ||
/*! | ||
* \brief complex number | ||
* (C/C++/Python layout: compact struct per complex number) | ||
*/ | ||
kDLComplex = 5U, | ||
} DLDataTypeCode; | ||
|
||
/*! | ||
* \brief The data type the tensor can hold. | ||
* | ||
* Examples | ||
* - float: type_code = 2, bits = 32, lanes=1 | ||
* - float4(vectorized 4 float): type_code = 2, bits = 32, lanes=4 | ||
* - int8: type_code = 0, bits = 8, lanes=1 | ||
* - std::complex<float>: type_code = 5, bits = 64, lanes = 1 | ||
*/ | ||
typedef struct { | ||
/*! | ||
* \brief Type code of base types. | ||
* We keep it uint8_t instead of DLDataTypeCode for minimal memory | ||
* footprint, but the value should be one of DLDataTypeCode enum values. | ||
* */ | ||
uint8_t code; | ||
/*! | ||
* \brief Number of bits, common choices are 8, 16, 32. | ||
*/ | ||
uint8_t bits; | ||
/*! \brief Number of lanes in the type, used for vector types. */ | ||
uint16_t lanes; | ||
} DLDataType; | ||
|
||
/*! | ||
* \brief Plain C Tensor object, does not manage memory. | ||
*/ | ||
typedef struct { | ||
/*! | ||
* \brief The opaque data pointer points to the allocated data. This will be | ||
* CUDA device pointer or cl_mem handle in OpenCL. This pointer is always | ||
* aligned to 256 bytes as in CUDA. | ||
* | ||
* For given DLTensor, the size of memory required to store the contents of | ||
* data is calculated as follows: | ||
* | ||
* \code{.c} | ||
* static inline size_t GetDataSize(const DLTensor* t) { | ||
* size_t size = 1; | ||
* for (tvm_index_t i = 0; i < t->ndim; ++i) { | ||
* size *= t->shape[i]; | ||
* } | ||
* size *= (t->dtype.bits * t->dtype.lanes + 7) / 8; | ||
* return size; | ||
* } | ||
* \endcode | ||
*/ | ||
void* data; | ||
/*! \brief The device of the tensor */ | ||
DLDevice device; | ||
/*! \brief Number of dimensions */ | ||
int ndim; | ||
/*! \brief The data type of the pointer*/ | ||
DLDataType dtype; | ||
/*! \brief The shape of the tensor */ | ||
int64_t* shape; | ||
/*! | ||
* \brief strides of the tensor (in number of elements, not bytes) | ||
* can be NULL, indicating tensor is compact and row-majored. | ||
*/ | ||
int64_t* strides; | ||
/*! \brief The offset in bytes to the beginning pointer to data */ | ||
uint64_t byte_offset; | ||
} DLTensor; | ||
|
||
/*! | ||
* \brief C Tensor object, manage memory of DLTensor. This data structure is | ||
* intended to facilitate the borrowing of DLTensor by another framework. It is | ||
* not meant to transfer the tensor. When the borrowing framework doesn't need | ||
* the tensor, it should call the deleter to notify the host that the resource | ||
* is no longer needed. | ||
*/ | ||
typedef struct DLManagedTensor { | ||
/*! \brief DLTensor which is being memory managed */ | ||
DLTensor dl_tensor; | ||
/*! \brief the context of the original host framework of DLManagedTensor in | ||
* which DLManagedTensor is used in the framework. It can also be NULL. | ||
*/ | ||
void * manager_ctx; | ||
/*! \brief Destructor signature void (*)(void*) - this should be called | ||
* to destruct manager_ctx which holds the DLManagedTensor. It can be NULL | ||
* if there is no way for the caller to provide a reasonable destructor. | ||
* The destructors deletes the argument self as well. | ||
*/ | ||
void (*deleter)(struct DLManagedTensor * self); | ||
} DLManagedTensor; | ||
#ifdef __cplusplus | ||
} // DLPACK_EXTERN_C | ||
#endif | ||
#endif // DLPACK_DLPACK_H_ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters