# coding: utf-8
"""utilities used for labeling jobs"""
from __future__ import annotations
import urllib.parse
from typing import List, NamedTuple, Tuple
from supervisely.api.labeling_job_api import LabelingJobApi
Status = LabelingJobApi.Status
from pandas import DataFrame # for typehints
from supervisely.api.module_api import ApiField
from supervisely.labeling_jobs import constants as constants
[docs]def total_desc() -> Tuple[str, str]:
"""
:return: Description about total number of Labeling Jobs
:rtype: :class:`Tuple[str, str]`
"""
return "TOTAL", "the total number of jobs in current team"
[docs]def is_completed_desc() -> Tuple[str, str]:
"""
:return: Description about total number of completed Labeling Jobs
:rtype: :class:`Tuple[str, str]`
"""
return "COMPLETED", "the number of completed jobs"
[docs]def is_completed(job_info: NamedTuple) -> bool:
"""
:return: True if Labeling Job is completed, otherwise None
:rtype: :class:`bool` or :class:`None`
"""
if job_info.status == str(Status.COMPLETED):
return True
[docs]def is_stopped_desc() -> Tuple[str, str]:
"""
:return: Description about number of stopped Labeling Jobs
:rtype: :class:`Tuple[str, str]`
"""
return "STOPPED", "the number of stopped jobs"
[docs]def is_stopped(job_info: NamedTuple) -> bool:
"""
:return: True if Labeling Job is stopped, otherwise None
:rtype: :class:`bool` or :class:`None`
"""
if job_info.status == str(Status.STOPPED):
return True
[docs]def is_not_started_desc() -> Tuple[str, str]:
"""
:return: Description about total number of pending Labeling Jobs
:rtype: :class:`Tuple[str, str]`
"""
return "PENDING", "the number of jobs labeler haven't even opened yet (created but not started)"
[docs]def is_not_started(job_info: NamedTuple) -> bool:
"""
:return: True if Labeling Job is not started, otherwise None
:rtype: :class:`bool` or :class:`None`
"""
if job_info.status == str(Status.PENDING):
return True
[docs]def total_items_count_desc() -> Tuple[str, str]:
"""
:return: Description about total number of items in all Labeling Jobs
:rtype: :class:`Tuple[str, str]`
"""
return "TOTAL", "the total number of items in all labeling jobs"
[docs]def total_items_count(job_info: NamedTuple) -> int:
"""
:return: Number of total items count in Labeling Jobs
:rtype: :class:`int`
"""
return job_info.images_count
[docs]def labeled_items_count_desc() -> Tuple[str, str]:
"""
:return: Description about total number of labeled items in all Labeling Jobs
:rtype: :class:`Tuple[str, str]`
"""
return (
"LABELED",
'the total number of labeled items (labelers marked as "finished") in all labeling jobs',
)
# cnt images, that labeler marked as done
[docs]def labeled_items_count(job_info: NamedTuple) -> int:
"""
:return: Number of Images, that labeler marked as done
:rtype: :class:`int`
"""
if is_on_labeling(job_info):
return job_info.finished_images_count
else:
return total_items_count(job_info)
[docs]def reviewed_items_count_desc() -> Tuple[str, str]:
"""
:return: Description about total number of reviewed items in all Labeling Jobs
:rtype: :class:`Tuple[str, str]`
"""
return (
"REVIEWED",
'the total number of reviewed items (reviewers marked as "accepted" or "rejected") in all labeling jobs',
)
# cnt images, that reviewer accepted or rejected
[docs]def reviewed_items_count(job_info: NamedTuple) -> int:
"""
:return: Number of reviewed Images(accepted and rejected)
:rtype: :class:`int`
"""
return job_info.accepted_images_count + job_info.rejected_images_count
[docs]def accepted_items_count_desc() -> Tuple[str, str]:
"""
:return: Description about total number of accepted items in all Labeling Jobs
:rtype: :class:`Tuple[str, str]`
"""
return (
"ACCEPTED",
'the total number of accepted items (reviewers marked as "accepted") in all labeling jobs',
)
[docs]def accepted_items_count(job_info: NamedTuple) -> int:
"""
:return: Number of accepted images in all Labeling Jobs
:rtype: :class:`int`
"""
return job_info.accepted_images_count
[docs]def rejected_items_count_desc() -> Tuple[str, str]:
"""
:return: Description about total number of rejected items in Labeling Jobs
:rtype: :class:`Tuple[str, str]`
"""
return (
"REJECTED",
'the total number of rejected items (reviewers marked as "rejected") in all labeling jobs',
)
[docs]def rejected_items_count(job_info: NamedTuple) -> int:
"""
:return: Number of rejected images in all Labeling Jobs
:rtype: :class:`int`
"""
return job_info.rejected_images_count
def is_on_labeling_desc() -> str:
return "LABELING IN PROGRESS", "the number of jobs with status IN_PROGRESS"
# labeling is in progress
[docs]def is_on_labeling(job_info: NamedTuple) -> bool:
"""
:return: True if Labeling Job is in progress, False otherwise
:rtype: :class:`bool`
"""
if job_info.status == str(Status.IN_PROGRESS):
return True
return False
[docs]def is_labeling_started_desc() -> Tuple[str, str]:
"""
:return: Description about total number of Labeling Jobs that are started by labeler
:rtype: :class:`Tuple[str, str]`
"""
return (
"LABELING STARTED",
'the number of jobs that are started by labeler and with at least one labeled item (marked "done" by labeler)',
)
# cnt jobs that are started by labeler and with at least one image that marked "done" by labeler
[docs]def is_labeling_started(job_info: NamedTuple) -> bool:
"""
:return: True if Labeling Job is started, False otherwise
:rtype: :class:`bool`
"""
if is_on_labeling(job_info) and labeled_items_count(job_info) != 0:
return True
return False
[docs]def is_on_review_desc() -> Tuple[str, str]:
"""
:return: Description about number of Labeling Jobs with status 'ON REVIEW'
:rtype: :class:`Tuple[str, str]`
"""
return "ON REVIEW", "the number of jobs with status ON_REVIEW"
# cnt jobs with "on review" status
[docs]def is_on_review(job_info: NamedTuple) -> bool:
"""
:return: True if Labeling Job is in 'ON REVIEW' status , False otherwise
:rtype: :class:`bool`
"""
if job_info.status == str(Status.ON_REVIEW):
return True
return False
[docs]def is_review_started_desc() -> Tuple[str, str]:
"""
:return: Description about number of Labeling Jobs that are started by reviewer
:rtype: :class:`Tuple[str, str]`
"""
return (
"REVIEW STARTED",
'the number of jobs that are started by reviewer - with at least one reviewed item (marked "accepted" or "rejected")',
)
# cnt jobs with at least one reviewed (accepted or rejected) item
[docs]def is_review_started(job_info: NamedTuple) -> bool:
"""
:return: True if Labeling Job is in 'review' status and there are Images that reviewer accepted or rejected, False otherwise
:rtype: :class:`bool`
"""
if is_on_review(job_info) and reviewed_items_count(job_info) != 0:
return True
return False
[docs]def is_zero_labeling_desc() -> Tuple[str, str]:
"""
:return: Description about number of Labeling Jobs with status "IN PROGRESS" with zero labeled items
:rtype: :class:`Tuple[str, str]`
"""
return "ZERO LABELED", 'the number of jobs with status "IN PROGRESS" with zero labeled items'
[docs]def is_zero_reviewed_desc() -> Tuple[str, str]:
"""
:return: Description about number of Labeling Jobs with status "ON REVIEW" with zero reviewed items
:rtype: :class:`Tuple[str, str]`
"""
return "ZERO REVIEWED", 'the number of jobs with status "ON REVIEW" with zero reviewed items'
[docs]def get_job_url(server_address: str, job: NamedTuple) -> str:
"""
Get labeling job url.
:param server_address: Server address there labeling job working.
:type server_address: str
:param job: Information about labeling job.
:type job: NamedTuple
:return: Labeling job url
:rtype: :class:`str`
:Usage example:
.. code-block:: python
address = 'https://app.supervise.ly'
os.environ['SERVER_ADDRESS'] = address
os.environ['API_TOKEN'] = 'Your Supervisely API Token'
api = sly.Api.from_env()
job_info = api.labeling_job.get_info_by_id(2)
from supervisely.labeling_jobs.utils import get_job_url
job_url = get_job_url(address, job_info)
print(job_url)
# Output: https://app.supervise.ly/app/images/4/8/58/54?jobId=2
"""
result = urllib.parse.urljoin(
server_address,
"app/images/{}/{}/{}/{}?jobId={}".format(
job.team_id, job.workspace_id, job.project_id, job.dataset_id, job.id
),
)
return result
[docs]def jobs_stats(server_address: str, jobs: List[NamedTuple], stats: List) -> DataFrame:
"""
Get statistics about Labeling Jobs items.
:param server_address: Supervisely Server Address.
:type server_address: str
:param jobs: List of information about Labeling Jobs.
:type jobs: List[NamedTuple]
:param stats:
:type stats: list
:return: Statistics about Labeling Jobs items as `pd.DataFrame <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html>`_
:rtype: :class:`pd.DataFrame`
:Usage example:
.. code-block:: python
os.environ['SERVER_ADDRESS'] = 'https://app.supervisely.com'
os.environ['API_TOKEN'] = 'Your Supervisely API Token'
api = sly.Api.from_env()
job_info1 = api.labeling_job.get_info_by_id(2)
job_info2 = api.labeling_job.get_info_by_id(3)
address = https://app.supervise.ly
from supervisely.labeling_jobs.utils import jobs_stats
result = jobs_stats(address, [job_info1, job_info2], [1, 2])
print(result)
# Output:
# ID NAME STATUS TOTAL LABELED REVIEWED ACCEPTED REJECTED CREATED_AT
# 0 2 <a href="https://app.supervise.ly/app/images/... completed 3 3 3 2 1 08/04/2020 15:10
# 1 3 <a href="https://app.supervise.ly/app/images/... completed 2 2 2 2 0 08/04/2020 15:10
"""
import pandas as pd
col_job_id = []
col_job_name = [] # link here
col_job_status = []
col_items_total = []
col_items_labeled = []
col_items_reviewed = []
col_items_accepted = []
col_items_rejected = []
col_created_at = []
for job, stat in zip(jobs, stats):
col_job_id.append(job.id)
col_job_name.append(
'<a href="{0}" rel="noopener noreferrer" target="_blank">{1}</a>'.format(
get_job_url(server_address, job), job.name
)
)
col_job_status.append(job.status)
col_created_at.append(job.created_at)
col_items_total.append(total_items_count(job))
col_items_labeled.append(labeled_items_count(job))
col_items_reviewed.append(reviewed_items_count(job))
col_items_accepted.append(accepted_items_count(job))
col_items_rejected.append(rejected_items_count(job))
df = pd.DataFrame(
list(
zip(
col_job_id,
col_job_name,
col_job_status,
col_items_total,
col_items_labeled,
col_items_reviewed,
col_items_accepted,
col_items_rejected,
col_created_at,
)
),
columns=[
"ID",
"NAME",
"STATUS",
"TOTAL",
"LABELED",
"REVIEWED",
"ACCEPTED",
"REJECTED",
"CREATED_AT",
],
)
df["CREATED_AT"] = pd.to_datetime(df["CREATED_AT"]).dt.strftime("%d/%m/%Y %H:%M")
return df
[docs]def jobs_summary(jobs: List[NamedTuple]) -> DataFrame:
"""
Get summary statistics about given Labeling Jobs.
:param jobs: List of information about Labeling Jobs.
:type jobs: List[NamedTuple]
:return: Statistics about Labeling Jobs as `pd.DataFrame <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html>`_
:rtype: :class:`pd.DataFrame`
:Usage example:
.. code-block:: python
os.environ['SERVER_ADDRESS'] = 'https://app.supervisely.com'
os.environ['API_TOKEN'] = 'Your Supervisely API Token'
api = sly.Api.from_env()
job_info1 = api.labeling_job.get_info_by_id(2)
job_info2 = api.labeling_job.get_info_by_id(3)
from supervisely.labeling_jobs.utils import jobs_summary
result = jobs_summary([job_info1, job_info2])
print(result)
# Output:
# JOB STATUS QUANTITY PERCENTAGE DESCRIPTION
# 0 0 TOTAL 2 100.00 % the total number of jobs in current team
# 1 1 COMPLETED 2 100.00 % the number of completed jobs
# 2 2 STOPPED 0 0.00 % the number of stopped jobs
# 3 3 PENDING 0 0.00 % the number of jobs labeler haven't even opened...
# 4 4 LABELING IN PROGRESS 0 0.00 % the number of jobs with status IN_PROGRESS
# 5 5 LABELING STARTED 0 0.00 % the number of jobs that are started by labeler...
# 6 6 ZERO LABELED 0 0.00 % the number of jobs with status "IN PROGRESS" w...
# 7 7 ON REVIEW 0 0.00 % the number of jobs with status ON_REVIEW
# 8 8 REVIEW STARTED 0 0.00 % the number of jobs that are started by reviewe...
# 9 9 ZERO REVIEWED 0 0.00 % the number of jobs with status "ON REVIEW" wit...
"""
import pandas as pd
count_total = len(jobs)
count_completed = 0
count_stopped = 0
count_labeling_not_started = 0 # pending
count_on_labeling = 0
count_labeling_started = 0
count_labeling_zero_done = 0
count_on_review = 0
count_review_started = 0
count_review_zero_done = 0
for job in jobs:
if is_completed(job):
count_completed += 1
elif is_stopped(job):
count_stopped += 1
elif is_not_started(job):
count_labeling_not_started += 1
elif is_on_labeling(job):
count_on_labeling += 1
if is_labeling_started(job):
count_labeling_started += 1
elif is_on_review(job):
count_on_review += 1
if is_review_started(job):
count_review_started += 1
else:
raise RuntimeError("Unhandled job status: {}".format(str(job)))
count_labeling_zero_done = count_on_labeling - count_labeling_started
count_review_zero_done = count_on_review - count_review_started
names = []
percentages = []
descriptions = []
table_rows_f = [
total_desc,
is_completed_desc,
is_stopped_desc,
is_not_started_desc,
is_on_labeling_desc,
is_labeling_started_desc,
is_zero_labeling_desc,
is_on_review_desc,
is_review_started_desc,
is_zero_reviewed_desc,
]
values = [
count_total,
count_completed,
count_stopped,
count_labeling_not_started,
count_on_labeling,
count_labeling_started,
count_labeling_zero_done,
count_on_review,
count_review_started,
count_review_zero_done,
]
for v, func in zip(values, table_rows_f):
percentages.append("{:.2f} %".format(v * 100 / count_total))
name, desc = func()
names.append(name)
descriptions.append(desc)
df = pd.DataFrame(
list(zip(list(range(len(names))), names, values, percentages, descriptions)),
columns=["#", "JOB STATUS", "QUANTITY", "PERCENTAGE", "DESCRIPTION"],
)
return df
[docs]def images_summary(jobs: List[NamedTuple]) -> DataFrame:
"""
Get summary statistics about given Labeling Jobs images.
:param jobs: List of information about Labeling Jobs.
:type jobs: List[NamedTuple]
:return: Statistics about Labeling Jobs images
:rtype: :class:`pd.DataFrame`
:Usage example:
.. code-block:: python
os.environ['SERVER_ADDRESS'] = 'https://app.supervisely.com'
os.environ['API_TOKEN'] = 'Your Supervisely API Token'
api = sly.Api.from_env()
job_info1 = api.labeling_job.get_info_by_id(2)
job_info2 = api.labeling_job.get_info_by_id(3)
from supervisely.labeling_jobs.utils import images_summary
result = images_summary([job_info1, job_info2])
print(result)
# Output:
# # ITEM STATUS QUANTITY PERCENTAGE DESCRIPTION
# 0 0 TOTAL 5 100.00 % the total number of items in all labeling jobs
# 1 1 LABELED 5 100.00 % the total number of labeled items (labelers ma...
# 2 2 REVIEWED 5 100.00 % the total number of reviewed items (reviewers ...
# 3 3 ACCEPTED 4 80.00 % the total number of accepted items (reviewers ...
# 4 4 REJECTED 1 20.00 % the total number of rejected items (reviewers ...
"""
import pandas as pd
count_total_items = 0
count_labeled_items = 0
count_reviewed_items = 0
count_accepted_items = 0
count_rejected_items = 0
for job in jobs:
count_total_items += total_items_count(job)
count_labeled_items += labeled_items_count(job)
count_reviewed_items += reviewed_items_count(job)
count_accepted_items += accepted_items_count(job)
count_rejected_items += rejected_items_count(job)
values_items = [
count_total_items,
count_labeled_items,
count_reviewed_items,
count_accepted_items,
count_rejected_items,
]
items_f = [
total_items_count_desc,
labeled_items_count_desc,
reviewed_items_count_desc,
accepted_items_count_desc,
rejected_items_count_desc,
]
names_items = []
percentages_items = []
descriptions_items = []
for v, func in zip(values_items, items_f):
percentages_items.append("{:.2f} %".format(v * 100 / count_total_items))
name, desc = func()
names_items.append(name)
descriptions_items.append(desc)
df = pd.DataFrame(
list(
zip(
list(range(len(names_items))),
names_items,
values_items,
percentages_items,
descriptions_items,
)
),
columns=["#", "ITEM STATUS", "QUANTITY", "PERCENTAGE", "DESCRIPTION"],
)
return df
def classes_summary(stats: List) -> DataFrame:
import pandas as pd
class_id_stats = {}
for stat in stats:
for class_stat in stat[constants.CLASSES_STATS]:
class_id = class_stat[ApiField.ID]
class_name = class_stat[ApiField.NAME]
class_shape = class_stat[ApiField.SHAPE]
image_count = class_stat[ApiField.IMAGES_COUNT]
object_count = class_stat[ApiField.LABELS_COUNT]
class_color = class_stat[ApiField.COLOR]
if class_id not in class_id_stats:
class_id_stats[class_id] = {
ApiField.NAME: class_name,
ApiField.SHAPE: class_shape,
ApiField.COLOR: class_color,
ApiField.IMAGES_COUNT: 0,
ApiField.LABELS_COUNT: 0,
}
class_id_stats[class_id][ApiField.IMAGES_COUNT] += image_count
class_id_stats[class_id][ApiField.LABELS_COUNT] += object_count
col_name = []
col_shape = []
col_image_count = []
col_object_count = []
for class_id, value in class_id_stats.items():
col_name.append(
'<b style="display: inline-block; border-radius: 50%; background: {}; width: 8px; height: 8px"></b> {}'.format(
value[ApiField.COLOR], value[ApiField.NAME]
)
)
col_shape.append(value[ApiField.SHAPE])
col_image_count.append(value[ApiField.IMAGES_COUNT])
col_object_count.append(value[ApiField.LABELS_COUNT])
df = pd.DataFrame(
list(
zip(list(range(len(col_name))), col_name, col_shape, col_image_count, col_object_count)
),
columns=["#", "CLASS", "SHAPE", "IMAGES COUNT", "OBJECTS COUNT"],
)
return df
def tags_summary(stats: List) -> DataFrame:
import pandas as pd
tag_id_stats = {}
for stat in stats:
for tag_stat in stat["job"][constants.TAGS_STATS]:
if "parentId" in tag_stat:
# @TODO: consider parentId (for oneOf cases)
continue
tag_id = tag_stat[ApiField.ID]
tag_name = tag_stat[ApiField.NAME]
image_count = tag_stat[ApiField.IMAGES]
object_count = tag_stat[ApiField.FIGURES]
class_color = tag_stat[ApiField.COLOR]
if tag_id not in tag_id_stats:
tag_id_stats[tag_id] = {
ApiField.NAME: tag_name,
ApiField.COLOR: class_color,
ApiField.IMAGES_COUNT: 0,
ApiField.LABELS_COUNT: 0,
}
tag_id_stats[tag_id][ApiField.IMAGES_COUNT] += image_count
tag_id_stats[tag_id][ApiField.LABELS_COUNT] += object_count
col_name = []
col_image_count = []
col_object_count = []
for tag_id, value in tag_id_stats.items():
col_name.append(
'<b style="display: inline-block; border-radius: 50%; background: {}; width: 8px; height: 8px"></b> {}'.format(
value[ApiField.COLOR], value[ApiField.NAME]
)
)
col_image_count.append(value[ApiField.IMAGES_COUNT])
col_object_count.append(value[ApiField.LABELS_COUNT])
df = pd.DataFrame(
list(zip(list(range(len(col_name))), col_name, col_image_count, col_object_count)),
columns=["#", "TAG", "IMAGES COUNT", "OBJECTS COUNT"],
)
return df