VideoAnnotation

class VideoAnnotation(img_size, frames_count, objects=None, frames=None, tags=None, description='', key=None)[source]

Bases: object

Annotation for a single video in Supervisely format.

Stores video-level tags, objects and per-frame figures, and supports JSON (de)serialization via to_json() / from_json().

VideoAnnotation for a single video. VideoAnnotation object is immutable.

Parameters:
img_size : Tuple[int, int] or List[int, int]

Size of the image (height, width).

frames_count : int

Number of frames in VideoAnnotation.

objects=None

VideoObjectCollection object.

frames=None

FrameCollection object.

tags=None

VideoTagCollection object.

description : str, optional

Video description.

key : UUID, optional

UUID object.

Raises:

TypeError – if img_size is not tuple or list

Usage Example:
import supervisely as sly
from supervisely.video_annotation.video_tag import VideoTag
from supervisely.video_annotation.video_tag_collection import VideoTagCollection

# Simple VideoAnnotation example
height, width = 500, 700
frames_count = 10
video_ann = sly.VideoAnnotation((height, width), frames_count)
print(video_ann.to_json())
# Output: {
#     "size": {
#         "height": 500,
#         "width": 700
#     },
#     "description": "",
#     "key": "abef780b01ad4063b4b961ab2ba2f410",
#     "tags": [],
#     "objects": [],
#     "frames": [],
#     "framesCount": 10
# }

# More complex VideoAnnotation example
height, width = 500, 700
frames_count = 1
# VideoObjectCollection
obj_class_car = sly.ObjClass('car', sly.Rectangle)
video_obj_car = sly.VideoObject(obj_class_car)
objects = sly.VideoObjectCollection([video_obj_car])

# FrameCollection
fr_index = 7
geometry = sly.Rectangle(0, 0, 100, 100)
video_figure_car = sly.VideoFigure(video_obj_car, geometry, fr_index)
frame = sly.Frame(fr_index, figures=[video_figure_car])
frames = sly.FrameCollection([frame])

# VideoTagCollection
meta_car = sly.TagMeta('car_tag', sly.TagValueType.ANY_STRING)
vid_tag = VideoTag(meta_car, value='acura')
video_tags = VideoTagCollection([vid_tag])

# Description
descr = 'car example'

video_ann = sly.VideoAnnotation((height, width), frames_count, objects, frames, video_tags, descr)
print(video_ann.to_json())
# Output: {
#     "size": {
#         "height": 500,
#         "width": 700
#     },
#     "description": "car example",
#     "key": "a85b282e5e174e7ebad6f878b6919244",
#     "tags": [
#         {
#             "name": "car_tag",
#             "value": "acura",
#             "key": "540a8212b0344788953996cea220ea8b"
#         }
#     ],
#     "objects": [
#         {
#             "key": "7c74b8a495044ea0ac127f32751c8f5c",
#             "classTitle": "car",
#             "tags": []
#         }
#     ],
#     "frames": [
#         {
#             "index": 7,
#             "figures": [
#                 {
#                     "key": "82dcbf2e3c5f42a99eeea2ad34173793",
#                     "objectKey": "7c74b8a495044ea0ac127f32751c8f5c",
#                     "geometryType": "rectangle",
#                     "geometry": {
#                         "points": {
#                             "exterior": [
#                                 [
#                                     0,
#                                     0
#                                 ],
#                                 [
#                                     100,
#                                     100
#                                 ]
#                             ],
#                             "interior": []
#                         }
#                     }
#                 }
#             ]
#         }
#     ],
#     "framesCount": 1
# }

Methods

clone

Makes a copy of VideoAnnotation with new fields, if fields are given, otherwise it will use fields of the original VideoAnnotation.

from_json

Convert a json dict to VideoAnnotation.

is_empty

Check whether video annotation contains objects or tags, or not.

key

Annotation key value.

load_json_file

Loads json file and converts it to VideoAnnotation.

to_json

Convert the VideoAnnotation to a json dict.

validate_figures_bounds

Checks if image contains figures from all frames in collection.

Attributes

description

Video description.

figures

VideoAnnotation figures.

frames

VideoAnnotation frames.

frames_count

Number of frames.

img_size

Size of the image (height, width).

objects

VideoAnnotation objects.

tags

VideoAnnotation tags.

classmethod from_json(data, project_meta, key_id_map=None, skip_corrupted=False)[source]

Convert a json dict to VideoAnnotation. Read more about Supervisely format.

Parameters:
data : dict

Dict in json format.

project_meta

Input ProjectMeta object.

key_id_map=None

KeyIdMap object.

skip_corrupted : bool, optional

Skip corrupted items (currently only frames) during conversion.

Returns:

VideoAnnotation object

Return type:

VideoAnnotation

Usage Example:
import supervisely as sly

video_ann_json = {
    "size": {"height": 500, "width": 700},
    "tags": [],
    "objects": [],
    "frames": [],
    "framesCount": 1
}
key_id_map = sly.KeyIdMap()
meta = sly.ProjectMeta()

video_ann = sly.VideoAnnotation.from_json(video_ann_json, meta, key_id_map)
classmethod load_json_file(path, project_meta, key_id_map=None)[source]

Loads json file and converts it to VideoAnnotation.

Parameters:
path : str

Path to the json file.

project_meta

Input ProjectMeta object.

key_id_map=None

KeyIdMap object.

Returns:

VideoAnnotation object

Return type:

VideoAnnotation

Usage Example:
import os
from dotenv import load_dotenv

import supervisely as sly

# Load secrets and create API object from .env file (recommended)
# Learn more here: https://developer.supervisely.com/getting-started/basics-of-authentication
if sly.is_development():
    load_dotenv(os.path.expanduser("~/supervisely.env"))

api = sly.Api.from_env()

team_name = 'Vehicle Detection'
workspace_name = 'Cities'
project_name =  'London'

team = api.team.get_info_by_name(team_name)
workspace = api.workspace.get_info_by_name(team.id, workspace_name)
project = api.project.get_info_by_name(workspace.id, project_name)

meta_json = api.project.get_meta(project.id)
meta = sly.ProjectMeta.from_json(meta_json)

# Load json file
path = "/home/admin/work/docs/my_dataset/ann/annotation.json"
ann = sly.VideoAnnotation.load_json_file(path, meta)
clone(img_size=None, frames_count=None, objects=None, frames=None, tags=None, description=None)[source]

Makes a copy of VideoAnnotation with new fields, if fields are given, otherwise it will use fields of the original VideoAnnotation.

Parameters:
img_size : Tuple[int, int], optional

Size of the image (height, width).

frames_count : int, optional

Number of frames in VideoAnnotation.

objects=None

VideoObjectCollection object.

frames=None

FrameCollection object.

tags=None

VideoTagCollection object.

description : str, optional

Video description.

Raises:

TypeError – if img_size is not tuple or list

Usage Example:
import supervisely as sly

height, width = 500, 700
frames_count = 1
video_ann = sly.VideoAnnotation((height, width), frames_count)

obj_class_car = sly.ObjClass('car', sly.Rectangle)
video_obj_car = sly.VideoObject(obj_class_car)
new_objects = sly.VideoObjectCollection([video_obj_car])
new_video_ann = video_ann.clone(objects=new_objects)
print(new_video_ann.to_json())
# Output: {
#     "size": {
#         "height": 500,
#         "width": 700
#     },
#     "description": "",
#     "key": "37f7d267864c4fd8b1a1a32f67e37f7d",
#     "tags": [],
#     "objects": [
#         {
#             "key": "27d4ba1aaee64930b2d0bfb7e8b53493",
#             "classTitle": "car",
#             "tags": []
#         }
#     ],
#     "frames": [],
#     "framesCount": 1
# }
is_empty()[source]

Check whether video annotation contains objects or tags, or not.

Returns:

True if video annotation is empty, False otherwise.

Return type:

bool

Usage Example:
import os
from dotenv import load_dotenv

import supervisely as sly
from supervisely.video_annotation.key_id_map import KeyIdMap

# Load secrets and create API object from .env file (recommended)
# Learn more here: https://developer.supervisely.com/getting-started/basics-of-authentication
if sly.is_development():
    load_dotenv(os.path.expanduser("~/supervisely.env"))

api = sly.Api.from_env()

project_id = 17208
video_id = 19371139
key_id_map = KeyIdMap()
meta_json = api.project.get_meta(project_id)
meta = sly.ProjectMeta.from_json(meta_json)

ann_json = api.video.annotation.download(video_id)
ann = sly.VideoAnnotation.from_json(ann_json, meta, key_id_map)

print(ann.is_empty()) # False
key()[source]

Annotation key value.

Returns:

Key value of annotation object.

Return type:

UUID

Usage Example:
import supervisely as sly

height, width = 500, 700
frames_count = 1
# VideoObjectCollection
obj_class_car = sly.ObjClass('car', sly.Rectangle)
video_obj_car = sly.VideoObject(obj_class_car)
objects = sly.VideoObjectCollection([video_obj_car])
video_ann = sly.VideoAnnotation((height, width), frames_count, objects)

print(video_ann.key())
# Output: 6e5bd622-4d7b-45ee-8bc5-807d5a5e2134
to_json(key_id_map=None)[source]

Convert the VideoAnnotation to a json dict. Read more about Supervisely format.

Parameters:
key_id_map=None

KeyIdMap object.

Returns:

Json format as a dict

Return type:

Dict

Usage Example:
import supervisely as sly

height, width = 500, 700
frames_count = 10
video_ann = sly.VideoAnnotation((height, width), frames_count)
print(video_ann.to_json())
# Output: {
#     "size": {
#         "height": 500,
#         "width": 700
#     },
#     "description": "",
#     "key": "abef780b01ad4063b4b961ab2ba2f410",
#     "tags": [],
#     "objects": [],
#     "frames": [],
#     "framesCount": 10
# }
validate_figures_bounds()[source]

Checks if image contains figures from all frames in collection.

:raises OutOfImageBoundsException: if figure is out of image bounds :returns: None :rtype: None

Usage Example:
import supervisely as sly

height, width = 50, 700
frames_count = 1
obj_class_car = sly.ObjClass('car', sly.Rectangle)
video_obj_car = sly.VideoObject(obj_class_car)
objects = sly.VideoObjectCollection([video_obj_car])
fr_index = 7
geometry = sly.Rectangle(0, 0, 100, 100)
video_figure_car = sly.VideoFigure(video_obj_car, geometry, fr_index)
frame = sly.Frame(fr_index, figures=[video_figure_car])
frames = sly.FrameCollection([frame])

video_ann = sly.VideoAnnotation((height, width), frames_count, objects, frames)
video_ann.validate_figures_bounds()
# raise OutOfImageBoundsException("Figure is out of image bounds")
property description : str

Video description.

Returns:

Video description

Return type:

str

Usage Example:
import supervisely as sly

height, width = 500, 700
frames_count = 1
descr = 'example'
video_ann = sly.VideoAnnotation((height, width), frames_count, description=descr)
print(video_ann.description) # example
property figures : list[supervisely.video_annotation.video_figure.VideoFigure]

VideoAnnotation figures.

Returns:

List of VideoFigures from all frames in VideoAnnotation.

Return type:

List[VideoFigure]

Usage Example:
import supervisely as sly

height, width = 500, 700
frames_count = 1
obj_class_car = sly.ObjClass('car', sly.Rectangle)
video_obj_car = sly.VideoObject(obj_class_car)
objects = sly.VideoObjectCollection([video_obj_car])
fr_index = 7
geometry = sly.Rectangle(0, 0, 100, 100)
video_figure_car = sly.VideoFigure(video_obj_car, geometry, fr_index)
frame = sly.Frame(fr_index, figures=[video_figure_car])
frames = sly.FrameCollection([frame])

video_ann = sly.VideoAnnotation((height, width), frames_count, objects, frames)
print(len(video_ann.figures)) # 1
property frames : supervisely.video_annotation.frame_collection.FrameCollection

VideoAnnotation frames.

Returns:

FrameCollection object.

Return type:

FrameCollection

Usage Example:
import supervisely as sly

height, width = 500, 700
frames_count = 1
obj_class_car = sly.ObjClass('car', sly.Rectangle)
video_obj_car = sly.VideoObject(obj_class_car)
objects = sly.VideoObjectCollection([video_obj_car])
fr_index = 7
geometry = sly.Rectangle(0, 0, 100, 100)
video_figure_car = sly.VideoFigure(video_obj_car, geometry, fr_index)
frame = sly.Frame(fr_index, figures=[video_figure_car])
frames = sly.FrameCollection([frame])

video_ann = sly.VideoAnnotation((height, width), frames_count, objects, frames)
print(video_ann.frames.to_json())
# Output: [
#     {
#         "index": 7,
#         "figures": [
#             {
#                 "key": "2842f561b1924f6abd6ab6f696ed9b65",
#                 "objectKey": "7f30fa9b78444ad69e02b37edbf9a902",
#                 "geometryType": "rectangle",
#                 "geometry": {
#                     "points": {
#                         "exterior": [
#                             [
#                                 0,
#                                 0
#                             ],
#                             [
#                                 100,
#                                 100
#                             ]
#                         ],
#                         "interior": []
#                     }
#                 }
#             }
#         ]
#     }
# ]
property frames_count : int

Number of frames.

Returns:

Frames count

Return type:

int

Usage Example:
import supervisely as sly

height, width = 500, 700
frames_count = 15
video_ann = sly.VideoAnnotation((height, width), frames_count)
print(video_ann.frames_count)
# Output: 15
property img_size : tuple[int, int]

Size of the image (height, width).

Returns:

Image size

Return type:

Tuple[int, int]

Usage Example:
import supervisely as sly

height, width = 500, 700
frames_count = 1
video_ann = sly.VideoAnnotation((height, width), frames_count)
print(video_ann.img_size)
# Output: (500, 700)
property objects : supervisely.video_annotation.video_object_collection.VideoObjectCollection

VideoAnnotation objects.

Returns:

VideoObjectCollection object.

Return type:

VideoObjectCollection

Usage Example:
import supervisely as sly

height, width = 500, 700
frames_count = 1
# VideoObjectCollection
obj_class_car = sly.ObjClass('car', sly.Rectangle)
video_obj_car = sly.VideoObject(obj_class_car)
objects = sly.VideoObjectCollection([video_obj_car])
video_ann = sly.VideoAnnotation((height, width), frames_count, objects)
print(video_ann.objects.to_json())
# Output: [
#     {
#         "key": "79fc07a4a6ca4b2796279bc033b9ec9a",
#         "classTitle": "car",
#         "tags": []
#     }
# ]
property tags : supervisely.video_annotation.video_tag_collection.VideoTagCollection

VideoAnnotation tags.

Returns:

VideoTagCollection object.

Return type:

VideoTagCollection

Usage Example:
import supervisely as sly

height, width = 500, 700
frames_count = 1
obj_class_car = sly.ObjClass('car', sly.Rectangle)
video_obj_car = sly.VideoObject(obj_class_car)
objects = sly.VideoObjectCollection([video_obj_car])
fr_index = 7
geometry = sly.Rectangle(0, 0, 100, 100)
video_figure_car = sly.VideoFigure(video_obj_car, geometry, fr_index)
frame = sly.Frame(fr_index, figures=[video_figure_car])
frames = sly.FrameCollection([frame])
meta_car = sly.TagMeta('car_tag', sly.TagValueType.ANY_STRING)
from supervisely.video_annotation.video_tag import VideoTag
vid_tag = VideoTag(meta_car, value='acura')
from supervisely.video_annotation.video_tag_collection import VideoTagCollection
tags = VideoTagCollection([vid_tag])

video_ann = sly.VideoAnnotation((height, width), frames_count, objects, frames, tags)
print(video_ann.tags.to_json())
# Output: [
#     {
#         "name": "car_tag",
#         "value": "acura",
#         "key": "c63e8259589a4fa5b4fb15a48c1f6a63"
#     }
# ]