complete video field

opendatalab · Apr 11, 2023 · d075e04 · d075e04
1 parent a3f93e5
commit d075e04
Show file tree

Hide file tree

Showing 9 changed files with 837 additions and 11 deletions.
diff --git a/dsdl/fields/__init__.py b/dsdl/fields/__init__.py
@@ -19,7 +19,7 @@
     "UniqueID",
     "Date",
     "Time",
-    "BBox3D"
+    "BBox3D",
 ]
 
 __generic_fields__ = [
@@ -28,14 +28,15 @@
     "Num",
     "Str",
     "Dict",
-    "List"
+    "List",
 ]
 
 __unstructure_fields__ = [
     "Image",
     "LabelMap",
     "InstanceMap",
-    "PointCloud"
+    "PointCloud",
+    "Video",
 ]
 
 __all__ = __unstructure_fields__ + __generic_fields__ + __special_fields__ + ["Struct"]
diff --git a/dsdl/fields/special.py b/dsdl/fields/special.py
@@ -314,14 +314,98 @@ def load_value(self, value):
 
 
 class BBox3D(BaseField):
+    default_args = {"mode": "auto-drive"}
+
+    args_schema = {
+        "type": "object",
+        "properties": {
+            "mode": {"type": "string", "enum": ["auto-drive", "indoor"]}
+        },
+        "minProperties": 1,
+        "maxProperties": 1,
+        "required": ["mode"]
+    }
+
     data_schema = {
         "$id": "/special/bbox3d",
         "title": "BBox3DField",
         "description": "BBox3D Field in dsdl.",
         "type": "array",
-        "items": {"type": "number"},
-        "minItems": 7,
-        "maxItems": 7,
+        "oneOf": [
+            {"minItems": 7, "maxItems": 7,
+             "items": [{"type": "number"},
+                       {"type": "number"},
+                       {"type": "number"},
+                       {"type": "number", "minimum": 0},
+                       {"type": "number", "minimum": 0},
+                       {"type": "number", "minimum": 0},
+                       {"type": "number"}]},
+            {"minItems": 9, "maxItems": 9,
+             "items": [{"type": "number"},
+                       {"type": "number"},
+                       {"type": "number"},
+                       {"type": "number", "minimum": 0},
+                       {"type": "number", "minimum": 0},
+                       {"type": "number", "minimum": 0},
+                       {"type": "number"},
+                       {"type": "number"},
+                       {"type": "number"}]}
+        ]
+    }
+
+    whole_schema = {
+        "type": "object",
+        "oneOf": [
+            {
+                "properties": {
+                    "args": {
+                        "type": "object",
+                        "properties": {
+                            "mode": {"type": "string", "enum": ["auto-drive"]}
+                        },
+                        "minProperties": 1,
+                        "maxProperties": 1,
+                        "required": ["mode"]
+                    },
+                    "value": {
+                        "type": "array",
+                        "minItems": 7, "maxItems": 7,
+                        "items": [{"type": "number"},
+                                  {"type": "number"},
+                                  {"type": "number"},
+                                  {"type": "number", "minimum": 0},
+                                  {"type": "number", "minimum": 0},
+                                  {"type": "number", "minimum": 0},
+                                  {"type": "number"}]
+                    }
+                }
+            },
+
+            {
+                "properties": {
+                    "args": {"type": "object",
+                             "properties": {
+                                 "mode": {"type": "string", "enum": ["indoor"]}
+                             },
+                             "minProperties": 1,
+                             "maxProperties": 1,
+                             "required": ["mode"]},
+                    "value": {
+                        "type": "array",
+                        "minItems": 9, "maxItems": 9,
+                        "items": [{"type": "number"},
+                                  {"type": "number"},
+                                  {"type": "number"},
+                                  {"type": "number", "minimum": 0},
+                                  {"type": "number", "minimum": 0},
+                                  {"type": "number", "minimum": 0},
+                                  {"type": "number"},
+                                  {"type": "number"},
+                                  {"type": "number"}]}
+                }
+            }
+        ],
+        "required": ["args", "value"]
     }
 
     geometry_class = "BBox3D"

diff --git a/dsdl/fields/unstructure.py b/dsdl/fields/unstructure.py
@@ -55,10 +55,21 @@ class PointCloud(UnstructuredObjectField):
     }
 
     data_schema = {
-        "$id": "/special/pointcloud",
+        "$id": "/unstructure/pointcloud",
         "title": "PointCloudField",
         "description": "PointCloud Field in dsdl.",
         "type": "string"
     }
 
     geometry_class = "PointCloud"
+
+
+class Video(UnstructuredObjectField):
+    data_schema = {
+        "$id": "/unstructure/video",
+        "title": "VideoField",
+        "description": "Video field in dsdl.",
+        "type": "string",
+    }
+
+    geometry_class = "Video"
diff --git a/dsdl/geometry/__init__.py b/dsdl/geometry/__init__.py
@@ -14,6 +14,7 @@
 from .classdomain import ClassDomain, ClassDomainMeta
 from .box3d import BBox3D
 from .pointcloud import PointCloud
+from .video import Video
 
 __all__ = [
     "BBox",
@@ -42,4 +43,5 @@
     "ClassDomainMeta",
     "BBox3D",
     "PointCloud",
+    "Video",
 ]
diff --git a/dsdl/geometry/box3d.py b/dsdl/geometry/box3d.py
@@ -1,9 +1,30 @@
 from .base_geometry import BaseGeometry
+import numpy as np
 
 
 class BBox3D(BaseGeometry):
-    def __init__(self, value):
-        self._data = value
+    def __init__(self, value, mode):
+        assert mode in ("indoor", "auto-drive")
+        self._mode = mode
+        if mode == "auto-drive":
+            self._data = list(value) + [0., 0.]
+
+    def to_array(self):
+        if self.mode == "auto-drive":
+            return np.array(self._data[:7])
+        else:
+            return np.array(self._data)
+
+    @property
+    def data(self):
+        if self.mode == "auto-drive":
+            return self._data[:7]
+        else:
+            return self._data
+
+    @property
+    def mode(self):
+        return self._mode
 
     @property
     def x(self):
@@ -30,9 +51,17 @@ def height(self):
         return self._data[5]
 
     @property
-    def alpha(self):
+    def yaw(self):
         return self._data[6]
 
+    @property
+    def pitch(self):
+        return self._data[7]
+
+    @property
+    def roll(self):
+        return self._data[8]
+
     @property
     def xmin(self):
         return self.x - self.length / 2
@@ -60,3 +89,6 @@ def zmax(self):
     @property
     def volumn(self):
         return self.length * self.width * self.height
+
+    def __repr__(self):
+        return f'BoundingBox3D(xmin={self.xmin}, ymin={self.ymin}, zmin={self.zmin}, xmax={self.xmax}, ymax={self.ymax}, zmax={self.zmax})'
diff --git a/dsdl/geometry/pointcloud.py b/dsdl/geometry/pointcloud.py
@@ -26,5 +26,5 @@ def to_array(self):
         points = np.frombuffer(self.to_bytes().read(), dtype=np.float32)
         return points.reshape(-1, self.load_dim)
 
-    def __repr(self):
+    def __repr__(self):
         return f"point cloud path: {self.location}"
diff --git a/dsdl/geometry/utils.py b/dsdl/geometry/utils.py
@@ -66,3 +66,114 @@ def bytes_to_numpy(bytes_: io.BytesIO) -> np.ndarray:  # type: ignore[type-arg]
         raise FileReadError("Currently unsupported image type")
     image_ = np.array(image.getdata(), dtype=dtype).reshape(*shape)
     return image_
+
+
+def video_encode(backend: str, bytes_: io.BytesIO, **kwargs):
+    backend = backend.lower()
+    assert backend in ("decord", "pyav", "pims")
+    video_reader, num_frames = None, 0
+    if backend == "decord":
+        try:
+            import decord
+        except ImportError:
+            raise ImportError('Please run "pip install decord" to install Decord first.')
+        video_reader = decord.VideoReader(bytes_, num_threads=kwargs.get("num_threads", 1))
+        num_frames = len(video_reader)
+
+    elif backend == "pyav":
+        try:
+            import av
+        except ImportError:
+            raise ImportError('Please run "conda install av -c conda-forge" '
+                              'or "pip install av" to install PyAV first.')
+        video_reader = av.open(bytes_)
+        num_frames = video_reader.streams.video[0].frames
+
+    elif backend == "pims":
+        mode = kwargs.get("mode", "accurate")
+        assert mode in ['accurate', 'efficient']
+        try:
+            import pims
+        except ImportError:
+            raise ImportError('Please run "conda install pims -c conda-forge" '
+                              'or "pip install pims" to install pims first.')
+        if mode == 'accurate':
+            container = pims.PyAVReaderIndexed(bytes_)
+        else:
+            container = pims.PyAVReaderTimed(bytes_)
+
+        video_reader = container
+        num_frames = len(video_reader)
+
+    return video_reader, num_frames
+
+
+def video_decode(backend: str, video_reader, frame_inds: np.ndarray, **kwargs):
+    backend = backend.lower()
+    assert backend in ("decord", "pyav", "pims")
+    if frame_inds.ndim != 1:
+        frame_inds = np.squeeze(frame_inds)
+
+    if backend == "decord":
+        mode = kwargs.get("mode", "accurate")
+        assert mode in ("accurate", "efficient")
+        if mode == "accurate":
+            imgs = video_reader.get_batch(frame_inds).asnumpy()
+            imgs = list(imgs)
+        else:  # efficient
+            video_reader.seek(0)
+            imgs = list()
+            for idx in frame_inds:
+                video_reader.seek(idx)
+                frame = video_reader.next()
+                imgs.append(frame.asnumpy())
+
+
+    elif backend == "pyav":
+        mode = kwargs.get("mode", "accurate")
+        assert mode in ("accurate", "efficient")
+        imgs = list()
+        if kwargs.get("multi_thread", False):
+            video_reader.streams.video[0].thread_type = 'AUTO'
+        if mode == 'accurate':
+            # set max indice to make early stop
+            max_inds = frame_inds
+            i = 0
+            for frame in video_reader.decode(video=0):
+                if i > max_inds + 1:
+                    break
+                imgs.append(frame.to_rgb().to_ndarray())
+                i += 1
+            # the available frame in pyav may be less than its length,
+            # which may raise error
+            imgs = [imgs[i % len(imgs)] for i in frame_inds]
+        else:  # mode == 'efficient'
+
+            def frame_generator(container, stream):
+                """Frame generator for PyAV."""
+                for packet in container.demux(stream):
+                    for frame in packet.decode():
+                        if frame:
+                            return frame.to_rgb().to_ndarray()
+
+            for frame in video_reader.decode(video=0):
+                backup_frame = frame
+                break
+            stream = video_reader.streams.video[0]
+            for idx in frame_inds:
+                pts_scale = stream.average_rate * stream.time_base
+                frame_pts = int(idx / pts_scale)
+                video_reader.seek(
+                    frame_pts, any_frame=False, backward=True, stream=stream)
+                frame = frame_generator(video_reader, stream)
+                if frame is not None:
+                    imgs.append(frame)
+                    backup_frame = frame
+                else:
+                    imgs.append(backup_frame)
+    else:  # pims backend
+        imgs = [video_reader[idx] for idx in frame_inds]
+
+    del video_reader
+
+    return imgs