Skip to content

Commit

Permalink
ENH: Add the __dlpack__ and __dlpack_device__ methods to ndarray.
Browse files Browse the repository at this point in the history
  • Loading branch information
hameerabbasi committed May 24, 2021
1 parent 3dcd924 commit 830903a
Show file tree
Hide file tree
Showing 3 changed files with 352 additions and 0 deletions.
6 changes: 6 additions & 0 deletions numpy/__init__.pyi
Expand Up @@ -1644,6 +1644,10 @@ _ArrayComplex_co = NDArray[Union[bool_, integer[Any], floating[Any], complexfloa
_ArrayNumber_co = NDArray[Union[bool_, number[Any]]]
_ArrayTD64_co = NDArray[Union[bool_, integer[Any], timedelta64]]

# `builtins.PyCapsule` unfortunately lacks annotations as of the moment;
# use `Any` as a stopgap measure
_PyCapsule = Any

class _SupportsItem(Protocol[_T_co]):
def item(self, __args: Any) -> _T_co: ...

Expand Down Expand Up @@ -2809,6 +2813,8 @@ class ndarray(_ArrayOrScalarCommon, Generic[_ShapeType, _DType_co]):
def __ior__(self: NDArray[object_], other: Any) -> NDArray[object_]: ...
@overload
def __ior__(self: NDArray[_ScalarType], other: _RecursiveSequence) -> NDArray[_ScalarType]: ...
def __dlpack__(self: NDArray[number[Any]], *, stream: None = ...) -> _PyCapsule: ...
def __dlpack_device__(self) -> Tuple[L[1], L[0]]: ...

# Keep `dtype` at the bottom to avoid name conflicts with `np.dtype`
@property
Expand Down
188 changes: 188 additions & 0 deletions numpy/core/include/numpy/dlpack/dlpack.h
@@ -0,0 +1,188 @@
/*!
* Copyright (c) 2017 by Contributors
* \file dlpack.h
* \brief The common header of DLPack.
*/
#ifndef DLPACK_DLPACK_H_
#define DLPACK_DLPACK_H_

#ifdef __cplusplus
#define DLPACK_EXTERN_C extern "C"
#else
#define DLPACK_EXTERN_C
#endif

/*! \brief The current version of dlpack */
#define DLPACK_VERSION 050

/*! \brief DLPACK_DLL prefix for windows */
#ifdef _WIN32
#ifdef DLPACK_EXPORTS
#define DLPACK_DLL __declspec(dllexport)
#else
#define DLPACK_DLL __declspec(dllimport)
#endif
#else
#define DLPACK_DLL
#endif

#include <stdint.h>
#include <stddef.h>

#ifdef __cplusplus
extern "C" {
#endif
/*!
* \brief The device type in DLDevice.
*/
typedef enum {
/*! \brief CPU device */
kDLCPU = 1,
/*! \brief CUDA GPU device */
kDLCUDA = 2,
/*!
* \brief Pinned CUDA CPU memory by cudaMallocHost
*/
kDLCUDAHost = 3,
/*! \brief OpenCL devices. */
kDLOpenCL = 4,
/*! \brief Vulkan buffer for next generation graphics. */
kDLVulkan = 7,
/*! \brief Metal for Apple GPU. */
kDLMetal = 8,
/*! \brief Verilog simulator buffer */
kDLVPI = 9,
/*! \brief ROCm GPUs for AMD GPUs */
kDLROCM = 10,
/*!
* \brief Reserved extension device type,
* used for quickly test extension device
* The semantics can differ depending on the implementation.
*/
kDLExtDev = 12,
} DLDeviceType;

/*!
* \brief A Device for Tensor and operator.
*/
typedef struct {
/*! \brief The device type used in the device. */
DLDeviceType device_type;
/*! \brief The device index */
int device_id;
} DLDevice;

/*!
* \brief The type code options DLDataType.
*/
typedef enum {
/*! \brief signed integer */
kDLInt = 0U,
/*! \brief unsigned integer */
kDLUInt = 1U,
/*! \brief IEEE floating point */
kDLFloat = 2U,
/*!
* \brief Opaque handle type, reserved for testing purposes.
* Frameworks need to agree on the handle data type for the exchange to be well-defined.
*/
kDLOpaqueHandle = 3U,
/*! \brief bfloat16 */
kDLBfloat = 4U,
/*!
* \brief complex number
* (C/C++/Python layout: compact struct per complex number)
*/
kDLComplex = 5U,
} DLDataTypeCode;

/*!
* \brief The data type the tensor can hold.
*
* Examples
* - float: type_code = 2, bits = 32, lanes=1
* - float4(vectorized 4 float): type_code = 2, bits = 32, lanes=4
* - int8: type_code = 0, bits = 8, lanes=1
* - std::complex<float>: type_code = 5, bits = 64, lanes = 1
*/
typedef struct {
/*!
* \brief Type code of base types.
* We keep it uint8_t instead of DLDataTypeCode for minimal memory
* footprint, but the value should be one of DLDataTypeCode enum values.
* */
uint8_t code;
/*!
* \brief Number of bits, common choices are 8, 16, 32.
*/
uint8_t bits;
/*! \brief Number of lanes in the type, used for vector types. */
uint16_t lanes;
} DLDataType;

/*!
* \brief Plain C Tensor object, does not manage memory.
*/
typedef struct {
/*!
* \brief The opaque data pointer points to the allocated data. This will be
* CUDA device pointer or cl_mem handle in OpenCL. This pointer is always
* aligned to 256 bytes as in CUDA.
*
* For given DLTensor, the size of memory required to store the contents of
* data is calculated as follows:
*
* \code{.c}
* static inline size_t GetDataSize(const DLTensor* t) {
* size_t size = 1;
* for (tvm_index_t i = 0; i < t->ndim; ++i) {
* size *= t->shape[i];
* }
* size *= (t->dtype.bits * t->dtype.lanes + 7) / 8;
* return size;
* }
* \endcode
*/
void* data;
/*! \brief The device of the tensor */
DLDevice device;
/*! \brief Number of dimensions */
int ndim;
/*! \brief The data type of the pointer*/
DLDataType dtype;
/*! \brief The shape of the tensor */
int64_t* shape;
/*!
* \brief strides of the tensor (in number of elements, not bytes)
* can be NULL, indicating tensor is compact and row-majored.
*/
int64_t* strides;
/*! \brief The offset in bytes to the beginning pointer to data */
uint64_t byte_offset;
} DLTensor;

/*!
* \brief C Tensor object, manage memory of DLTensor. This data structure is
* intended to facilitate the borrowing of DLTensor by another framework. It is
* not meant to transfer the tensor. When the borrowing framework doesn't need
* the tensor, it should call the deleter to notify the host that the resource
* is no longer needed.
*/
typedef struct DLManagedTensor {
/*! \brief DLTensor which is being memory managed */
DLTensor dl_tensor;
/*! \brief the context of the original host framework of DLManagedTensor in
* which DLManagedTensor is used in the framework. It can also be NULL.
*/
void * manager_ctx;
/*! \brief Destructor signature void (*)(void*) - this should be called
* to destruct manager_ctx which holds the DLManagedTensor. It can be NULL
* if there is no way for the caller to provide a reasonable destructor.
* The destructors deletes the argument self as well.
*/
void (*deleter)(struct DLManagedTensor * self);
} DLManagedTensor;
#ifdef __cplusplus
} // DLPACK_EXTERN_C
#endif
#endif // DLPACK_DLPACK_H_
158 changes: 158 additions & 0 deletions numpy/core/src/multiarray/methods.c
Expand Up @@ -30,6 +30,8 @@
#include "methods.h"
#include "alloc.h"

#include "numpy/dlpack/dlpack.h"


/* NpyArg_ParseKeywords
*
Expand Down Expand Up @@ -2694,6 +2696,152 @@ array_complex(PyArrayObject *self, PyObject *NPY_UNUSED(args))
return c;
}

#define NPY_DLPACK_CAPSULE_NAME "NumPy DLPack Wrapper"

static void array_dlpack_capsule_deleter(PyObject *self)
{
DLManagedTensor *managed =
(DLManagedTensor *)PyCapsule_GetPointer(self, NPY_DLPACK_CAPSULE_NAME);
managed->deleter(managed);
}

static void array_dlpack_deleter(DLManagedTensor *self)
{
PyArrayObject *array = (PyArrayObject *)self->manager_ctx;
free(self->dl_tensor.shape);
free(self->dl_tensor.strides);
free(self);

PyArray_XDECREF(array);
}

static PyObject *
array_dlpack(PyArrayObject *self,
PyObject *const *args, Py_ssize_t len_args, PyObject *kwnames)
{
PyObject *stream = Py_None;
NPY_PREPARE_ARGPARSER;
if (npy_parse_arguments("__dlpack__", args, len_args, kwnames,
"$stream", NULL, &stream,
NULL, NULL, NULL))
{
return NULL;
}

if (stream != Py_None)
{
PyErr_SetString(PyExc_RuntimeError, "NumPy only supports stream=None.");
return NULL;
}

npy_intp itemsize = PyArray_ITEMSIZE(self);
int ndim = PyArray_NDIM(self);
npy_intp *strides = PyArray_STRIDES(self);
npy_intp *shape = PyArray_SHAPE(self);

for (int i = 0; i < ndim; ++i)
{
if (strides[i] % itemsize != 0) {
PyErr_SetString(PyExc_RuntimeError,
"DLPack only supports strides which are a multiple of itemsize.");
return NULL;
}
}

DLDataType managed_dtype;
PyArray_Descr *dtype = PyArray_DESCR(self);

managed_dtype.bits = 8 * itemsize;
managed_dtype.lanes = 1;
if (PyDataType_ISSIGNED(dtype))
{
managed_dtype.code = kDLInt;
}
else if (PyDataType_ISUNSIGNED(dtype))
{
managed_dtype.code = kDLUInt;
}
else if (PyDataType_ISFLOAT(dtype))
{
managed_dtype.code = kDLFloat;
}
else if (PyDataType_ISCOMPLEX(dtype))
{
managed_dtype.code = kDLComplex;
}
else
{
PyErr_SetString(PyExc_TypeError,
"DLPack only supports signed/unsigned integers, float and complex dtypes.");
return NULL;
}

DLManagedTensor *managed = malloc(sizeof(DLManagedTensor));
if (managed == NULL)
{
PyErr_SetString(PyExc_MemoryError,
"Could not allocate the DLManagedTensor struct.");
return NULL;
}

managed->dl_tensor.data = PyArray_DATA(self);
managed->dl_tensor.device.device_type = kDLCPU;
managed->dl_tensor.device.device_id = 0;
managed->dl_tensor.dtype = managed_dtype;


int64_t *managed_shape = malloc(sizeof(int64_t) * ndim);
if (managed_shape == NULL)
{
PyErr_SetString(PyExc_MemoryError,
"Could not allocate the DLManagedTensor struct shape.");
free(managed);
return NULL;
}

int64_t *managed_strides = malloc(sizeof(int64_t) * ndim);
if (managed_strides == NULL)
{
PyErr_SetString(PyExc_MemoryError,
"Could not allocate the DLManagedTensor struct strides.");
free(managed);
free(managed_shape);
return NULL;
}

for (int i = 0; i < ndim; ++i)
{
managed_shape[i] = shape[i];
managed_strides[i] = strides[i];
}

managed->dl_tensor.ndim = ndim;
managed->dl_tensor.shape = managed_shape;
managed->dl_tensor.strides = managed_strides;
managed->dl_tensor.byte_offset = 0;
managed->manager_ctx = self;
managed->deleter = array_dlpack_deleter;

PyObject *capsule = PyCapsule_New(managed, NPY_DLPACK_CAPSULE_NAME, array_dlpack_capsule_deleter);
if (capsule != NULL)
{
PyArray_INCREF(self);
}
else
{
free(managed);
free(managed_shape);
free(managed_strides);
}
return capsule;
}

static PyObject *
array_dlpack_device(PyArrayObject *self, PyObject *args)
{
return Py_BuildValue("ii", 1, 0);
}

NPY_NO_EXPORT PyMethodDef array_methods[] = {

/* for subtypes */
Expand Down Expand Up @@ -2914,5 +3062,15 @@ NPY_NO_EXPORT PyMethodDef array_methods[] = {
{"view",
(PyCFunction)array_view,
METH_FASTCALL | METH_KEYWORDS, NULL},

// For data interchange between libraries
{"__dlpack__",
(PyCFunction)array_dlpack,
METH_FASTCALL | METH_KEYWORDS, NULL},

{"__dlpack_device__",
(PyCFunction)array_dlpack_device,
METH_NOARGS, NULL},

{NULL, NULL, 0, NULL} /* sentinel */
};

0 comments on commit 830903a

Please sign in to comment.