diff --git a/numpy/__init__.pyi b/numpy/__init__.pyi
index ac37eb8ad40..2621cd2f141 100644
--- a/numpy/__init__.pyi
+++ b/numpy/__init__.pyi
@@ -1644,6 +1644,10 @@ _ArrayComplex_co = NDArray[Union[bool_, integer[Any], floating[Any], complexfloa
 _ArrayNumber_co = NDArray[Union[bool_, number[Any]]]
 _ArrayTD64_co = NDArray[Union[bool_, integer[Any], timedelta64]]
 
+# `builtins.PyCapsule` unfortunately lacks annotations as of the moment;
+# use `Any` as a stopgap measure
+_PyCapsule = Any
+
 class _SupportsItem(Protocol[_T_co]):
     def item(self, __args: Any) -> _T_co: ...
 
@@ -2809,6 +2813,8 @@ class ndarray(_ArrayOrScalarCommon, Generic[_ShapeType, _DType_co]):
     def __ior__(self: NDArray[object_], other: Any) -> NDArray[object_]: ...
     @overload
     def __ior__(self: NDArray[_ScalarType], other: _RecursiveSequence) -> NDArray[_ScalarType]: ...
+    def __dlpack__(self: NDArray[number[Any]], *, stream: None = ...) -> _PyCapsule: ...
+    def __dlpack_device__(self) -> Tuple[L[1], L[0]]: ...
 
     # Keep `dtype` at the bottom to avoid name conflicts with `np.dtype`
     @property
diff --git a/numpy/core/include/numpy/dlpack/dlpack.h b/numpy/core/include/numpy/dlpack/dlpack.h
new file mode 100644
index 00000000000..84afca24829
--- /dev/null
+++ b/numpy/core/include/numpy/dlpack/dlpack.h
@@ -0,0 +1,188 @@
+/*!
+ *  Copyright (c) 2017 by Contributors
+ * \file dlpack.h
+ * \brief The common header of DLPack.
+ */
+#ifndef DLPACK_DLPACK_H_
+#define DLPACK_DLPACK_H_
+
+#ifdef __cplusplus
+#define DLPACK_EXTERN_C extern "C"
+#else
+#define DLPACK_EXTERN_C
+#endif
+
+/*! \brief The current version of dlpack */
+#define DLPACK_VERSION 050
+
+/*! \brief DLPACK_DLL prefix for windows */
+#ifdef _WIN32
+#ifdef DLPACK_EXPORTS
+#define DLPACK_DLL __declspec(dllexport)
+#else
+#define DLPACK_DLL __declspec(dllimport)
+#endif
+#else
+#define DLPACK_DLL
+#endif
+
+#include <stdint.h>
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+/*!
+ * \brief The device type in DLDevice.
+ */
+typedef enum {
+  /*! \brief CPU device */
+  kDLCPU = 1,
+  /*! \brief CUDA GPU device */
+  kDLCUDA = 2,
+  /*!
+   * \brief Pinned CUDA CPU memory by cudaMallocHost
+   */
+  kDLCUDAHost = 3,
+  /*! \brief OpenCL devices. */
+  kDLOpenCL = 4,
+  /*! \brief Vulkan buffer for next generation graphics. */
+  kDLVulkan = 7,
+  /*! \brief Metal for Apple GPU. */
+  kDLMetal = 8,
+  /*! \brief Verilog simulator buffer */
+  kDLVPI = 9,
+  /*! \brief ROCm GPUs for AMD GPUs */
+  kDLROCM = 10,
+  /*!
+   * \brief Reserved extension device type,
+   * used for quickly test extension device
+   * The semantics can differ depending on the implementation.
+   */
+  kDLExtDev = 12,
+} DLDeviceType;
+
+/*!
+ * \brief A Device for Tensor and operator.
+ */
+typedef struct {
+  /*! \brief The device type used in the device. */
+  DLDeviceType device_type;
+  /*! \brief The device index */
+  int device_id;
+} DLDevice;
+
+/*!
+ * \brief The type code options DLDataType.
+ */
+typedef enum {
+  /*! \brief signed integer */
+  kDLInt = 0U,
+  /*! \brief unsigned integer */
+  kDLUInt = 1U,
+  /*! \brief IEEE floating point */
+  kDLFloat = 2U,
+  /*!
+   * \brief Opaque handle type, reserved for testing purposes.
+   * Frameworks need to agree on the handle data type for the exchange to be well-defined.
+   */
+  kDLOpaqueHandle = 3U,
+  /*! \brief bfloat16 */
+  kDLBfloat = 4U,
+  /*!
+   * \brief complex number
+   * (C/C++/Python layout: compact struct per complex number)
+   */
+  kDLComplex = 5U,
+} DLDataTypeCode;
+
+/*!
+ * \brief The data type the tensor can hold.
+ *
+ *  Examples
+ *   - float: type_code = 2, bits = 32, lanes=1
+ *   - float4(vectorized 4 float): type_code = 2, bits = 32, lanes=4
+ *   - int8: type_code = 0, bits = 8, lanes=1
+ *   - std::complex<float>: type_code = 5, bits = 64, lanes = 1
+ */
+typedef struct {
+  /*!
+   * \brief Type code of base types.
+   * We keep it uint8_t instead of DLDataTypeCode for minimal memory
+   * footprint, but the value should be one of DLDataTypeCode enum values.
+   * */
+  uint8_t code;
+  /*!
+   * \brief Number of bits, common choices are 8, 16, 32.
+   */
+  uint8_t bits;
+  /*! \brief Number of lanes in the type, used for vector types. */
+  uint16_t lanes;
+} DLDataType;
+
+/*!
+ * \brief Plain C Tensor object, does not manage memory.
+ */
+typedef struct {
+  /*!
+   * \brief The opaque data pointer points to the allocated data. This will be
+   * CUDA device pointer or cl_mem handle in OpenCL. This pointer is always
+   * aligned to 256 bytes as in CUDA.
+   *
+   * For given DLTensor, the size of memory required to store the contents of
+   * data is calculated as follows:
+   *
+   * \code{.c}
+   * static inline size_t GetDataSize(const DLTensor* t) {
+   *   size_t size = 1;
+   *   for (tvm_index_t i = 0; i < t->ndim; ++i) {
+   *     size *= t->shape[i];
+   *   }
+   *   size *= (t->dtype.bits * t->dtype.lanes + 7) / 8;
+   *   return size;
+   * }
+   * \endcode
+   */
+  void* data;
+  /*! \brief The device of the tensor */
+  DLDevice device;
+  /*! \brief Number of dimensions */
+  int ndim;
+  /*! \brief The data type of the pointer*/
+  DLDataType dtype;
+  /*! \brief The shape of the tensor */
+  int64_t* shape;
+  /*!
+   * \brief strides of the tensor (in number of elements, not bytes)
+   *  can be NULL, indicating tensor is compact and row-majored.
+   */
+  int64_t* strides;
+  /*! \brief The offset in bytes to the beginning pointer to data */
+  uint64_t byte_offset;
+} DLTensor;
+
+/*!
+ * \brief C Tensor object, manage memory of DLTensor. This data structure is
+ *  intended to facilitate the borrowing of DLTensor by another framework. It is
+ *  not meant to transfer the tensor. When the borrowing framework doesn't need
+ *  the tensor, it should call the deleter to notify the host that the resource
+ *  is no longer needed.
+ */
+typedef struct DLManagedTensor {
+  /*! \brief DLTensor which is being memory managed */
+  DLTensor dl_tensor;
+  /*! \brief the context of the original host framework of DLManagedTensor in
+   *   which DLManagedTensor is used in the framework. It can also be NULL.
+   */
+  void * manager_ctx;
+  /*! \brief Destructor signature void (*)(void*) - this should be called
+   *   to destruct manager_ctx which holds the DLManagedTensor. It can be NULL
+   *   if there is no way for the caller to provide a reasonable destructor.
+   *   The destructors deletes the argument self as well.
+   */
+  void (*deleter)(struct DLManagedTensor * self);
+} DLManagedTensor;
+#ifdef __cplusplus
+}  // DLPACK_EXTERN_C
+#endif
+#endif  // DLPACK_DLPACK_H_
diff --git a/numpy/core/src/multiarray/methods.c b/numpy/core/src/multiarray/methods.c
index 251e527a6b9..fa043ac3129 100644
--- a/numpy/core/src/multiarray/methods.c
+++ b/numpy/core/src/multiarray/methods.c
@@ -30,6 +30,8 @@
 #include "methods.h"
 #include "alloc.h"
 
+#include "numpy/dlpack/dlpack.h"
+
 
 /* NpyArg_ParseKeywords
  *
@@ -2694,6 +2696,152 @@ array_complex(PyArrayObject *self, PyObject *NPY_UNUSED(args))
     return c;
 }
 
+#define NPY_DLPACK_CAPSULE_NAME "NumPy DLPack Wrapper"
+
+static void array_dlpack_capsule_deleter(PyObject *self)
+{
+    DLManagedTensor *managed = 
+        (DLManagedTensor *)PyCapsule_GetPointer(self, NPY_DLPACK_CAPSULE_NAME);
+    managed->deleter(managed);
+}
+
+static void array_dlpack_deleter(DLManagedTensor *self)
+{
+    PyArrayObject *array = (PyArrayObject *)self->manager_ctx;
+    free(self->dl_tensor.shape);
+    free(self->dl_tensor.strides);
+    free(self);
+
+    PyArray_XDECREF(array);
+}
+
+static PyObject *
+array_dlpack(PyArrayObject *self,
+        PyObject *const *args, Py_ssize_t len_args, PyObject *kwnames)
+{
+    PyObject *stream = Py_None;
+    NPY_PREPARE_ARGPARSER;
+    if (npy_parse_arguments("__dlpack__", args, len_args, kwnames,
+                             "$stream", NULL, &stream,
+                             NULL, NULL, NULL))
+    {
+        return NULL;
+    }
+
+    if (stream != Py_None)
+    {
+        PyErr_SetString(PyExc_RuntimeError, "NumPy only supports stream=None.");
+        return NULL;
+    }
+
+    npy_intp itemsize = PyArray_ITEMSIZE(self);
+    int ndim = PyArray_NDIM(self);
+    npy_intp *strides = PyArray_STRIDES(self);
+    npy_intp *shape = PyArray_SHAPE(self);
+
+    for (int i = 0; i < ndim; ++i)
+    {
+        if (strides[i] % itemsize != 0) {
+            PyErr_SetString(PyExc_RuntimeError,
+                            "DLPack only supports strides which are a multiple of itemsize.");
+            return NULL;
+        }
+    }
+
+    DLDataType managed_dtype;
+    PyArray_Descr *dtype = PyArray_DESCR(self);
+    
+    managed_dtype.bits = 8 * itemsize;
+    managed_dtype.lanes = 1;
+    if (PyDataType_ISSIGNED(dtype))
+    {
+        managed_dtype.code = kDLInt;
+    }
+    else if (PyDataType_ISUNSIGNED(dtype))
+    {
+        managed_dtype.code = kDLUInt;
+    }
+    else if (PyDataType_ISFLOAT(dtype))
+    {
+        managed_dtype.code = kDLFloat;
+    }
+    else if (PyDataType_ISCOMPLEX(dtype))
+    {
+        managed_dtype.code = kDLComplex;
+    }
+    else
+    {
+        PyErr_SetString(PyExc_TypeError,
+                        "DLPack only supports signed/unsigned integers, float and complex dtypes.");
+        return NULL;
+    }
+
+    DLManagedTensor *managed = malloc(sizeof(DLManagedTensor));
+    if (managed == NULL)
+    {
+        PyErr_SetString(PyExc_MemoryError,
+                        "Could not allocate the DLManagedTensor struct.");
+        return NULL;
+    }
+
+    managed->dl_tensor.data = PyArray_DATA(self);
+    managed->dl_tensor.device.device_type = kDLCPU;
+    managed->dl_tensor.device.device_id = 0;
+    managed->dl_tensor.dtype = managed_dtype;
+
+
+    int64_t *managed_shape = malloc(sizeof(int64_t) * ndim);
+    if (managed_shape == NULL)
+    {
+        PyErr_SetString(PyExc_MemoryError,
+                        "Could not allocate the DLManagedTensor struct shape.");
+        free(managed);
+        return NULL;
+    }
+
+    int64_t *managed_strides = malloc(sizeof(int64_t) * ndim);
+    if (managed_strides == NULL)
+    {
+        PyErr_SetString(PyExc_MemoryError,
+                        "Could not allocate the DLManagedTensor struct strides.");
+        free(managed);
+        free(managed_shape);
+        return NULL;
+    }
+
+    for (int i = 0; i < ndim; ++i)
+    {
+        managed_shape[i] = shape[i];
+        managed_strides[i] = strides[i];
+    }
+
+    managed->dl_tensor.ndim = ndim;
+    managed->dl_tensor.shape = managed_shape;
+    managed->dl_tensor.strides = managed_strides;
+    managed->dl_tensor.byte_offset = 0;
+    managed->manager_ctx = self;
+    managed->deleter = array_dlpack_deleter;
+
+    PyObject *capsule = PyCapsule_New(managed, NPY_DLPACK_CAPSULE_NAME, array_dlpack_capsule_deleter);
+    if (capsule != NULL)
+    {
+        PyArray_INCREF(self);
+    }
+    else
+    {
+        free(managed);
+        free(managed_shape);
+        free(managed_strides);
+    }
+    return capsule;
+}
+
+static PyObject *
+array_dlpack_device(PyArrayObject *self, PyObject *args)
+{
+    return Py_BuildValue("ii", 1, 0);
+}
+
 NPY_NO_EXPORT PyMethodDef array_methods[] = {
 
     /* for subtypes */
@@ -2914,5 +3062,15 @@ NPY_NO_EXPORT PyMethodDef array_methods[] = {
     {"view",
         (PyCFunction)array_view,
         METH_FASTCALL | METH_KEYWORDS, NULL},
+    
+    // For data interchange between libraries
+    {"__dlpack__",
+        (PyCFunction)array_dlpack,
+        METH_FASTCALL | METH_KEYWORDS, NULL},
+
+    {"__dlpack_device__",
+        (PyCFunction)array_dlpack_device,
+        METH_NOARGS, NULL},
+    
     {NULL, NULL, 0, NULL}           /* sentinel */
 };