# coding: utf-8
from __future__ import annotations
import json
import uuid
from copy import deepcopy
from typing import Dict, List, Optional, Tuple
from uuid import UUID
from supervisely._utils import take_with_default
from supervisely.project.project_meta import ProjectMeta
from supervisely.video_annotation.constants import (
DESCRIPTION,
FRAMES,
FRAMES_COUNT,
IMG_SIZE,
IMG_SIZE_HEIGHT,
IMG_SIZE_WIDTH,
KEY,
OBJECTS,
TAGS,
VIDEO_ID,
)
from supervisely.video_annotation.frame_collection import FrameCollection
from supervisely.video_annotation.key_id_map import KeyIdMap
from supervisely.video_annotation.video_figure import VideoFigure
from supervisely.video_annotation.video_object_collection import VideoObjectCollection
from supervisely.video_annotation.video_tag_collection import VideoTagCollection
[docs]class VideoAnnotation:
"""
VideoAnnotation for a single video. :class:`VideoAnnotation<VideoAnnotation>` object is immutable.
:param img_size: Size of the image (height, width).
:type img_size: Tuple[int, int] or List[int, int]
:param frames_count: Number of frames in VideoAnnotation.
:type frames_count: int
:param objects: VideoObjectCollection object.
:type objects: VideoObjectCollection, optional
:param frames: FrameCollection object.
:type frames: FrameCollection, optional
:param tags: VideoTagCollection object.
:type tags: VideoTagCollection, optional
:param description: Video description.
:type description: str, optional
:param key: UUID object.
:type key: UUID, optional
:raises: :class:`TypeError`, if img_size is not tuple or list
:Usage example:
.. code-block:: python
import supervisely as sly
# Simple VideoAnnotation example
height, width = 500, 700
frames_count = 10
video_ann = sly.VideoAnnotation((height, width), frames_count)
print(video_ann.to_json())
# Output: {
# "size": {
# "height": 500,
# "width": 700
# },
# "description": "",
# "key": "abef780b01ad4063b4b961ab2ba2f410",
# "tags": [],
# "objects": [],
# "frames": [],
# "framesCount": 10
# }
# More complex VideoAnnotation example
height, width = 500, 700
frames_count = 1
# VideoObjectCollection
obj_class_car = sly.ObjClass('car', sly.Rectangle)
video_obj_car = sly.VideoObject(obj_class_car)
objects = sly.VideoObjectCollection([video_obj_car])
# FrameCollection
fr_index = 7
geometry = sly.Rectangle(0, 0, 100, 100)
video_figure_car = sly.VideoFigure(video_obj_car, geometry, fr_index)
frame = sly.Frame(fr_index, figures=[video_figure_car])
frames = sly.FrameCollection([frame])
# VideoTagCollection
meta_car = sly.TagMeta('car_tag', sly.TagValueType.ANY_STRING)
from supervisely.video_annotation.video_tag import VideoTag
vid_tag = VideoTag(meta_car, value='acura')
from supervisely.video_annotation.video_tag_collection import VideoTagCollection
video_tags = VideoTagCollection([vid_tag])
# Description
descr = 'car example'
video_ann = sly.VideoAnnotation((height, width), frames_count, objects, frames, video_tags, descr)
print(video_ann.to_json())
# Output: {
# "size": {
# "height": 500,
# "width": 700
# },
# "description": "car example",
# "key": "a85b282e5e174e7ebad6f878b6919244",
# "tags": [
# {
# "name": "car_tag",
# "value": "acura",
# "key": "540a8212b0344788953996cea220ea8b"
# }
# ],
# "objects": [
# {
# "key": "7c74b8a495044ea0ac127f32751c8f5c",
# "classTitle": "car",
# "tags": []
# }
# ],
# "frames": [
# {
# "index": 7,
# "figures": [
# {
# "key": "82dcbf2e3c5f42a99eeea2ad34173793",
# "objectKey": "7c74b8a495044ea0ac127f32751c8f5c",
# "geometryType": "rectangle",
# "geometry": {
# "points": {
# "exterior": [
# [
# 0,
# 0
# ],
# [
# 100,
# 100
# ]
# ],
# "interior": []
# }
# }
# }
# ]
# }
# ],
# "framesCount": 1
# }
"""
def __init__(
self,
img_size: Tuple[int, int],
frames_count: int,
objects: Optional[VideoObjectCollection] = None,
frames: Optional[FrameCollection] = None,
tags: Optional[VideoTagCollection] = None,
description: Optional[str] = "",
key: Optional[UUID] = None,
):
if not isinstance(img_size, (tuple, list)):
raise TypeError(
'{!r} has to be a tuple or a list. Given type "{}".'.format(
"img_size", type(img_size)
)
)
self._img_size = tuple(img_size)
self._frames_count = frames_count
self._description = description
self._tags = take_with_default(tags, VideoTagCollection())
self._objects = take_with_default(objects, VideoObjectCollection())
self._frames = take_with_default(frames, FrameCollection())
self._key = take_with_default(key, uuid.uuid4())
self.validate_figures_bounds()
@property
def img_size(self) -> Tuple[int, int]:
"""
Size of the image (height, width).
:return: Image size
:rtype: :class:`Tuple[int, int]`
:Usage example:
.. code-block:: python
import supervisely as sly
height, width = 500, 700
frames_count = 1
video_ann = sly.VideoAnnotation((height, width), frames_count)
print(video_ann.img_size)
# Output: (500, 700)
"""
return deepcopy(self._img_size)
@property
def frames_count(self) -> int:
"""
Number of frames.
:return: Frames count
:rtype: :class:`int`
:Usage example:
.. code-block:: python
import supervisely as sly
height, width = 500, 700
frames_count = 15
video_ann = sly.VideoAnnotation((height, width), frames_count)
print(video_ann.frames_count)
# Output: 15
"""
return self._frames_count
@property
def objects(self) -> VideoObjectCollection:
"""
VideoAnnotation objects.
:return: VideoObjectCollection object
:rtype: :class:`VideoObjectCollection`
:Usage example:
.. code-block:: python
import supervisely as sly
height, width = 500, 700
frames_count = 1
# VideoObjectCollection
obj_class_car = sly.ObjClass('car', sly.Rectangle)
video_obj_car = sly.VideoObject(obj_class_car)
objects = sly.VideoObjectCollection([video_obj_car])
video_ann = sly.VideoAnnotation((height, width), frames_count, objects)
print(video_ann.objects.to_json())
# Output: [
# {
# "key": "79fc07a4a6ca4b2796279bc033b9ec9a",
# "classTitle": "car",
# "tags": []
# }
# ]
"""
return self._objects
@property
def frames(self) -> FrameCollection:
"""
VideoAnnotation frames.
:return: FrameCollection object
:rtype: :class:`FrameCollection`
:Usage example:
.. code-block:: python
import supervisely as sly
height, width = 500, 700
frames_count = 1
obj_class_car = sly.ObjClass('car', sly.Rectangle)
video_obj_car = sly.VideoObject(obj_class_car)
objects = sly.VideoObjectCollection([video_obj_car])
fr_index = 7
geometry = sly.Rectangle(0, 0, 100, 100)
video_figure_car = sly.VideoFigure(video_obj_car, geometry, fr_index)
frame = sly.Frame(fr_index, figures=[video_figure_car])
frames = sly.FrameCollection([frame])
video_ann = sly.VideoAnnotation((height, width), frames_count, objects, frames)
print(video_ann.frames.to_json())
# Output: [
# {
# "index": 7,
# "figures": [
# {
# "key": "2842f561b1924f6abd6ab6f696ed9b65",
# "objectKey": "7f30fa9b78444ad69e02b37edbf9a902",
# "geometryType": "rectangle",
# "geometry": {
# "points": {
# "exterior": [
# [
# 0,
# 0
# ],
# [
# 100,
# 100
# ]
# ],
# "interior": []
# }
# }
# }
# ]
# }
# ]
"""
return self._frames
@property
def figures(self) -> List[VideoFigure]:
"""
VideoAnnotation figures.
:return: List of VideoFigures from all frames in VideoAnnotation
:rtype: :class:`List[VideoFigure]<supervisely.video_annotation.video_figure.VideoFigure>`
:Usage example:
.. code-block:: python
import supervisely as sly
height, width = 500, 700
frames_count = 1
obj_class_car = sly.ObjClass('car', sly.Rectangle)
video_obj_car = sly.VideoObject(obj_class_car)
objects = sly.VideoObjectCollection([video_obj_car])
fr_index = 7
geometry = sly.Rectangle(0, 0, 100, 100)
video_figure_car = sly.VideoFigure(video_obj_car, geometry, fr_index)
frame = sly.Frame(fr_index, figures=[video_figure_car])
frames = sly.FrameCollection([frame])
video_ann = sly.VideoAnnotation((height, width), frames_count, objects, frames)
print(len(video_ann.figures)) # 1
"""
return self.frames.figures
@property
def tags(self) -> VideoTagCollection:
"""
VideoAnnotation tags.
:return: VideoTagCollection object
:rtype: :class:`VideoTagCollection`
:Usage example:
.. code-block:: python
import supervisely as sly
height, width = 500, 700
frames_count = 1
obj_class_car = sly.ObjClass('car', sly.Rectangle)
video_obj_car = sly.VideoObject(obj_class_car)
objects = sly.VideoObjectCollection([video_obj_car])
fr_index = 7
geometry = sly.Rectangle(0, 0, 100, 100)
video_figure_car = sly.VideoFigure(video_obj_car, geometry, fr_index)
frame = sly.Frame(fr_index, figures=[video_figure_car])
frames = sly.FrameCollection([frame])
meta_car = sly.TagMeta('car_tag', sly.TagValueType.ANY_STRING)
from supervisely.video_annotation.video_tag import VideoTag
vid_tag = VideoTag(meta_car, value='acura')
from supervisely.video_annotation.video_tag_collection import VideoTagCollection
tags = VideoTagCollection([vid_tag])
video_ann = sly.VideoAnnotation((height, width), frames_count, objects, frames, tags)
print(video_ann.tags.to_json())
# Output: [
# {
# "name": "car_tag",
# "value": "acura",
# "key": "c63e8259589a4fa5b4fb15a48c1f6a63"
# }
# ]
"""
return self._tags
[docs] def key(self) -> UUID:
"""
Annotation key value.
:returns: Key value of annotation object.
:rtype: str
:Usage example:
.. code-block:: python
import supervisely as sly
height, width = 500, 700
frames_count = 1
# VideoObjectCollection
obj_class_car = sly.ObjClass('car', sly.Rectangle)
video_obj_car = sly.VideoObject(obj_class_car)
objects = sly.VideoObjectCollection([video_obj_car])
video_ann = sly.VideoAnnotation((height, width), frames_count, objects)
print(video_ann.key())
# Output: 6e5bd622-4d7b-45ee-8bc5-807d5a5e2134
"""
return self._key
@property
def description(self) -> str:
"""
Video description.
:return: Video description
:rtype: :class:`str`
:Usage example:
.. code-block:: python
import supervisely as sly
height, width = 500, 700
frames_count = 1
descr = 'example'
video_ann = sly.VideoAnnotation((height, width), frames_count, description=descr)
print(video_ann.description) # example
"""
return self._description
# def get_tags_on_frame(self, frame_index: int) -> VideoTagCollection:
# """
# Get all existing video tags from frame of video.
# :param frame_index: Video frame index.
# :type frame_index: :class:`int`
# :return: Tags from the given frame.
# :rtype: :class:`VideoTagCollection<supervisely.video_annotation.video_tag_collection.VideoTagCollection>`
# :Usage example:
# .. code-block:: python
# import supervisely as sly
# height, width = 50, 700
# frames_count = 1
# obj_class_car = sly.ObjClass('car', sly.Rectangle)
# video_obj_car = sly.VideoObject(obj_class_car)
# objects = sly.VideoObjectCollection([video_obj_car])
# fr_index = 7
# geometry = sly.Rectangle(10, 10, 40, 40)
# video_figure_car = sly.VideoFigure(video_obj_car, geometry, fr_index)
# frame = sly.Frame(fr_index, figures=[video_figure_car])
# frames = sly.FrameCollection([frame])
# meta_car = sly.TagMeta('car', sly.TagValueType.NONE)
# tag_car = sly.VideoTag(meta_car, frame_range=(fr_index, fr_index))
# tags = sly.VideoTagCollection([tag_car])
# video_ann = sly.VideoAnnotation((height, width), frames_count, objects, frames, tags)
# tags_on_frame = video_ann.get_tags_on_frame(fr_index)
# print(len(tags_on_frame))
# # Output: 1
# """
# tags = []
# for tag in self._tags:
# if frame_index >= tag.frame_range[0] and frame_index <= tag.frame_range[1]:
# tags.append(tag)
# return VideoTagCollection(tags)
# def get_objects_on_frame(self, frame_index: int) -> VideoObjectCollection:
# """
# Get all existing video objects from frame of video.
# :param frame_index: Video frame index.
# :type frame_index: :class:`int`
# :return: Objects from the given frame.
# :rtype: :class:`VideoObjectCollection<supervisely.video_annotation.video_object_collection.VideoObjectCollection>`
# :Usage example:
# .. code-block:: python
# import supervisely as sly
# height, width = 50, 700
# frames_count = 1
# obj_class_car = sly.ObjClass('car', sly.Rectangle)
# video_obj_car = sly.VideoObject(obj_class_car)
# objects = sly.VideoObjectCollection([video_obj_car])
# fr_index = 7
# geometry = sly.Rectangle(10, 10, 40, 40)
# video_figure_car = sly.VideoFigure(video_obj_car, geometry, fr_index)
# frame = sly.Frame(fr_index, figures=[video_figure_car])
# frames = sly.FrameCollection([frame])
# video_ann = sly.VideoAnnotation((height, width), frames_count, objects, frames)
# objs_on_frame = video_ann.get_objects_on_frame(fr_index)
# print(len(objs_on_frame))
# # Output: 1
# """
# frame = self._frames.get(frame_index, None)
# if frame is None:
# raise ValueError(f"No frame with index {frame_index} in annotation.")
# frame_objects = {}
# for fig in frame.figures:
# if fig.parent_object.key() not in frame_objects.keys():
# frame_objects[fig.parent_object.key()] = fig.parent_object
# return VideoObjectCollection(list(frame_objects.values()))
[docs] def to_json(self, key_id_map: Optional[KeyIdMap] = None) -> Dict:
"""
Convert the VideoAnnotation to a json dict. Read more about `Supervisely format <https://docs.supervise.ly/data-organization/00_ann_format_navi>`_.
:param key_id_map: KeyIdMap object.
:type key_id_map: KeyIdMap, optional
:return: Json format as a dict
:rtype: :class:`dict`
:Usage example:
.. code-block:: python
import supervisely as sly
height, width = 500, 700
frames_count = 10
video_ann = sly.VideoAnnotation((height, width), frames_count)
print(video_ann.to_json())
# Output: {
# "size": {
# "height": 500,
# "width": 700
# },
# "description": "",
# "key": "abef780b01ad4063b4b961ab2ba2f410",
# "tags": [],
# "objects": [],
# "frames": [],
# "framesCount": 10
# }
"""
res_json = {
IMG_SIZE: {
IMG_SIZE_HEIGHT: int(self.img_size[0]),
IMG_SIZE_WIDTH: int(self.img_size[1]),
},
DESCRIPTION: self.description,
KEY: self.key().hex,
TAGS: self.tags.to_json(key_id_map),
OBJECTS: self.objects.to_json(key_id_map),
FRAMES: self.frames.to_json(key_id_map),
FRAMES_COUNT: self.frames_count,
}
if key_id_map is not None:
video_id = key_id_map.get_video_id(self.key())
if video_id is not None:
res_json[VIDEO_ID] = video_id
return res_json
[docs] @classmethod
def from_json(
cls, data: Dict, project_meta: ProjectMeta, key_id_map: KeyIdMap = None
) -> VideoAnnotation:
"""
Convert a json dict to VideoAnnotation. Read more about `Supervisely format <https://docs.supervise.ly/data-organization/00_ann_format_navi>`_.
:param data: Dict in json format.
:type data: dict
:param project_meta: Input :class:`ProjectMeta<supervisely.project.project_meta.ProjectMeta>`.
:type project_meta: ProjectMeta
:param key_id_map: KeyIdMap object.
:type key_id_map: Union[KeyIdMap, None]
:return: VideoAnnotation object
:rtype: :class:`VideoAnnotation`
:Usage example:
.. code-block:: python
import supervisely as sly
video_ann_json = {
"size": {
"height": 500,
"width": 700
},
"tags": [],
"objects": [],
"frames": [],
"framesCount": 1
}
key_id_map = sly.KeyIdMap()
meta = sly.ProjectMeta()
video_ann = sly.VideoAnnotation.from_json(video_ann_json, meta, key_id_map)
"""
# video_name = data[VIDEO_NAME]
video_key = uuid.UUID(data[KEY]) if KEY in data else uuid.uuid4()
if key_id_map is not None:
key_id_map.add_video(video_key, data.get(VIDEO_ID, None))
img_size_dict = data[IMG_SIZE]
img_height = img_size_dict[IMG_SIZE_HEIGHT]
img_width = img_size_dict[IMG_SIZE_WIDTH]
img_size = (img_height, img_width)
description = data.get(DESCRIPTION, "")
frames_count = data[FRAMES_COUNT]
tags = VideoTagCollection.from_json(data[TAGS], project_meta.tag_metas, key_id_map)
objects = VideoObjectCollection.from_json(data[OBJECTS], project_meta, key_id_map)
frames = FrameCollection.from_json(data[FRAMES], objects, frames_count, key_id_map)
return cls(
img_size=img_size,
frames_count=frames_count,
objects=objects,
frames=frames,
tags=tags,
description=description,
key=video_key,
)
[docs] @classmethod
def load_json_file(
cls, path: str, project_meta: ProjectMeta, key_id_map: Optional[KeyIdMap] = None
) -> VideoAnnotation:
"""
Loads json file and converts it to VideoAnnotation.
:param path: Path to the json file.
:type path: str
:param project_meta: Input :class:`ProjectMeta<supervisely.project.project_meta.ProjectMeta>`.
:type project_meta: ProjectMeta
:param key_id_map: KeyIdMap object.
:type key_id_map: KeyIdMap, optional
:return: VideoAnnotation object
:rtype: :class:`VideoAnnotation<VideoAnnotation>`
:Usage example:
.. code-block:: python
import supervisely as sly
address = 'https://app.supervise.ly/'
token = 'Your Supervisely API Token'
api = sly.Api(address, token)
team_name = 'Vehicle Detection'
workspace_name = 'Cities'
project_name = 'London'
team = api.team.get_info_by_name(team_name)
workspace = api.workspace.get_info_by_name(team.id, workspace_name)
project = api.project.get_info_by_name(workspace.id, project_name)
meta_json = api.project.get_meta(project.id)
meta = sly.ProjectMeta.from_json(meta_json)
# Load json file
path = "/home/admin/work/docs/my_dataset/ann/annotation.json"
ann = sly.VideoAnnotation.load_json_file(path, meta)
"""
with open(path) as fin:
data = json.load(fin)
return cls.from_json(data, project_meta, key_id_map)
[docs] def clone(
self,
img_size: Optional[Tuple[int, int]] = None,
frames_count: Optional[int] = None,
objects: Optional[VideoObjectCollection] = None,
frames: Optional[FrameCollection] = None,
tags: Optional[VideoTagCollection] = None,
description: Optional[str] = None,
) -> VideoAnnotation:
"""
Makes a copy of VideoAnnotation with new fields, if fields are given, otherwise it will use fields of the original VideoAnnotation.
:param img_size: Size of the image (height, width).
:type img_size: Tuple[int, int], optional
:param frames_count: Number of frames in VideoAnnotation.
:type frames_count: int, optional
:param objects: VideoObjectCollection object.
:type objects: VideoObjectCollection, optional
:param frames: FrameCollection object.
:type frames: FrameCollection, optional
:param tags: VideoTagCollection object.
:type tags: VideoTagCollection, optional
:param description: Video description.
:type description: str, optional
:raises: :class:`TypeError`, if img_size is not tuple or list
:Usage example:
.. code-block:: python
import supervisely as sly
height, width = 500, 700
frames_count = 1
video_ann = sly.VideoAnnotation((height, width), frames_count)
obj_class_car = sly.ObjClass('car', sly.Rectangle)
video_obj_car = sly.VideoObject(obj_class_car)
new_objects = sly.VideoObjectCollection([video_obj_car])
new_video_ann = video_ann.clone(objects=new_objects)
print(new_video_ann.to_json())
# Output: {
# "size": {
# "height": 500,
# "width": 700
# },
# "description": "",
# "key": "37f7d267864c4fd8b1a1a32f67e37f7d",
# "tags": [],
# "objects": [
# {
# "key": "27d4ba1aaee64930b2d0bfb7e8b53493",
# "classTitle": "car",
# "tags": []
# }
# ],
# "frames": [],
# "framesCount": 1
# }
"""
return VideoAnnotation(
img_size=take_with_default(img_size, self.img_size),
frames_count=take_with_default(frames_count, self.frames_count),
objects=take_with_default(objects, self.objects),
frames=take_with_default(frames, self.frames),
tags=take_with_default(tags, self.tags),
description=take_with_default(description, self.description),
)
[docs] def is_empty(self) -> bool:
"""
Check whether video annotation contains objects or tags, or not.
:returns: True if video annotation is empty, False otherwise.
:rtype: :class:`bool`
:Usage example:
.. code-block:: python
import supervisely as sly
from supervisely.video_annotation.key_id_map import KeyIdMap
address = 'https://app.supervise.ly/'
token = 'Your Supervisely API Token'
api = sly.Api(address, token)
project_id = 17208
video_id = 19371139
key_id_map = KeyIdMap()
meta_json = api.project.get_meta(project_id)
meta = sly.ProjectMeta.from_json(meta_json)
ann_json = api.video.annotation.download(video_id)
ann = sly.VideoAnnotation.from_json(ann_json, meta, key_id_map)
print(ann.is_empty()) # False
"""
if len(self.objects) == 0 and len(self.tags) == 0:
return True
else:
return False